--- /dev/null
+use std::{fmt::Write, sync::OnceLock};
+use flagset::{flags, FlagSet};
+use crate::{
+ integer::ToInteger,
+ lex::{
+ command_name::CommandMatcher,
+ lexer::Lexer,
+ token::{Punct, Token},
+ },
+ message::Diagnostic,
+flags! {
+ enum State: u8 {
+ /// No active dataset yet defined.
+ Initial,
+ /// Active dataset has been defined.
+ Data,
+ /// Inside `INPUT PROGRAM`.
+ InputProgram,
+ /// Inside `FILE TYPE`.
+ FileType,
+ /// State nested inside `LOOP` or `DO IF`, inside [State::Data].
+ NestedData,
+ /// State nested inside `LOOP` or `DO IF`, inside [State::InputProgram].
+ NestedInputProgram,
+ }
+struct Command {
+ allowed_states: FlagSet<State>,
+ enhanced_only: bool,
+ testing_only: bool,
+ no_abbrev: bool,
+ name: &'static str,
+ run: Box<dyn Fn(&Context) -> Result<(), Failure> + Send + Sync>,
+fn commands() -> &'static [Command] {
+ fn new_commands() -> Vec<Command> {
+ vec![Command {
+ allowed_states: State::Initial | State::Data,
+ enhanced_only: false,
+ testing_only: false,
+ no_abbrev: false,
+ name: "ECHO",
+ run: Box::new(|_context| {
+ println!("hi");
+ Ok(())
+ }),
+ }]
+ }
+ static COMMANDS: OnceLock<Vec<Command>> = OnceLock::new();
+ COMMANDS.get_or_init(|| new_commands()).as_slice()
+fn parse_command_word(lexer: &mut Lexer, s: &mut String, n: isize) -> bool {
+ let separator = match s.chars().next_back() {
+ Some(c) if c != '-' => " ",
+ _ => "",
+ };
+ match lexer.next(n) {
+ Token::Punct(Punct::Dash) => {
+ s.push('-');
+ true
+ }
+ Token::Id(id) => {
+ write!(s, "{separator}{id}").unwrap();
+ true
+ }
+ Token::Number(number) if number.is_sign_positive() => {
+ if let Some(integer) = number.to_exact_usize() {
+ write!(s, "{separator}{integer}").unwrap();
+ true
+ } else {
+ false
+ }
+ }
+ _ => false,
+ }
+fn find_best_match(s: &str) -> (Option<&'static Command>, isize) {
+ let mut cm = CommandMatcher::new(s);
+ for command in commands() {
+ cm.add(command.name, command);
+ }
+ cm.get_match()
+fn parse_command_name(
+ lexer: &mut Lexer,
+ error: &Box<dyn Fn(Diagnostic)>,
+) -> Result<(&'static Command, isize), ()> {
+ let mut s = String::new();
+ let mut word = 0;
+ let mut missing_words = 0;
+ let mut command = None;
+ while parse_command_word(lexer, &mut s, word) {
+ (command, missing_words) = find_best_match(&s);
+ if missing_words <= 0 {
+ break;
+ }
+ word += 1;
+ }
+ if command.is_none() && missing_words > 0 {
+ s.push_str(" .");
+ (command, missing_words) = find_best_match(&s);
+ s.truncate(s.len() - 2);
+ }
+ match command {
+ Some(command) => Ok((command, (word + 1) + missing_words)),
+ None => {
+ if s.is_empty() {
+ error(lexer.error("Syntax error expecting command name"))
+ } else {
+ error(lexer.error("Unknown command `{s}`."))
+ };
+ Err(())
+ }
+ }
+pub enum Success {
+ Success,
+ Eof,
+ Finish,
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Failure {
+ Failure,
+ NotImplemented,
+ CascadingFailure,
+pub fn end_of_command(context: &Context) -> Result<Success, Failure> {
+ match context.lexer.token() {
+ Token::EndCommand | Token::End => Ok(Success::Success),
+ _ => {
+ context.error(
+ context
+ .lexer
+ .error("Syntax error expecting end of command."),
+ );
+ Err(Failure::Failure)
+ }
+ }
+fn _parse_in_state(
+ lexer: &mut Lexer,
+ error: &Box<dyn Fn(Diagnostic)>,
+ _state: State,
+) -> Result<Success, Failure> {
+ match lexer.token() {
+ Token::End => Ok(Success::Eof),
+ Token::EndCommand => Ok(Success::Success),
+ _ => {
+ let (command, n_tokens) =
+ parse_command_name(lexer, error).map_err(|_| Failure::Failure)?;
+ for _ in 0..n_tokens {
+ lexer.get();
+ }
+ let context = Context {
+ error,
+ lexer,
+ command_name: Some(command.name),
+ };
+ match (command.run)(&context) {
+ Ok(()) => end_of_command(&context),
+ Err(error) => Err(error)
+ }
+ }
+ }
+fn parse_in_state(
+ lexer: &mut Lexer,
+ error: &Box<dyn Fn(Diagnostic)>,
+ state: State,
+) -> Result<Success, Failure> {
+ let result = _parse_in_state(lexer, error, state);
+ if result.is_err() {
+ lexer.interactive_reset();
+ }
+ lexer.discard_rest_of_command();
+ match result {
+ Ok(Success::Eof) | Ok(Success::Finish) => (),
+ _ => {
+ while let Token::EndCommand = lexer.token() {
+ lexer.get();
+ }
+ }
+ };
+ result
+pub fn parse(lexer: &mut Lexer, error: &Box<dyn Fn(Diagnostic)>) -> Result<Success, Failure> {
+ parse_in_state(lexer, error, State::Initial)
+pub struct Context<'a> {
+ error: &'a Box<dyn Fn(Diagnostic)>,
+ lexer: &'a mut Lexer,
+ command_name: Option<&'static str>,
+impl<'a> Context<'a> {
+ pub fn error(&self, diagnostic: Diagnostic) {
+ (self.error)(diagnostic);
+ }
--- /dev/null
+use crate::{
+ command::{parse, Failure, Success},
+ lex::lexer::{Lexer, Source},
+ message::Diagnostic,
+pub struct Engine {
+ lexer: Lexer,
+impl Engine {
+ fn new() -> Self {
+ Self {
+ lexer: Lexer::new(Box::new(|location, error| println!("{location}: {error}"))),
+ }
+ }
+ fn run(&mut self, source: Source) {
+ self.lexer.append(source);
+ self.lexer.get();
+ loop {
+ let error: Box<dyn Fn(Diagnostic)> = Box::new(|diagnostic| {
+ println!("{diagnostic}");
+ });
+ match parse(&mut self.lexer, &error) {
+ Ok(Success::Eof) | Ok(Success::Finish) => break,
+ Ok(Success::Success) => (),
+ Err(error) => match self.lexer.error_handling() {
+ crate::lex::lexer::ErrorHandling::Continue
+ if error == Failure::CascadingFailure =>
+ {
+ println!("Stopping syntax file processing here to avoid a cascade of dependent command failures.");
+ self.lexer.discard_noninteractive();
+ break;
+ }
+ crate::lex::lexer::ErrorHandling::Stop => {
+ println!("Error encountered while ERROR=STOP is effective.");
+ self.lexer.discard_noninteractive();
+ break;
+ }
+ _ => (),
+ },
+ }
+ }
+ }
+mod tests {
+ use encoding_rs::UTF_8;
+ use crate::lex::{
+ lexer::{ErrorHandling, Source},
+ segment::Mode,
+ };
+ use super::Engine;
+ #[test]
+ fn test_echo() {
+ let mut engine = Engine::new();
+ engine.run(Source::for_file_contents(
+ "ECHO 'hi there'.\n".to_string(),
+ Some("test.sps".to_string()),
+ UTF_8,
+ Mode::default(),
+ ErrorHandling::default(),
+ ));
+ }
--- /dev/null
+pub trait ToInteger {
+ fn to_exact_integer<T>(&self) -> Option<T>
+ where
+ T: FromFloat;
+ fn to_exact_usize(&self) -> Option<usize> {
+ self.to_exact_integer()
+ }
+ fn to_exact_u8(&self) -> Option<u8> {
+ self.to_exact_integer()
+ }
+ fn to_exact_u16(&self) -> Option<u16> {
+ self.to_exact_integer()
+ }
+ fn to_exact_u32(&self) -> Option<u32> {
+ self.to_exact_integer()
+ }
+ fn to_exact_u64(&self) -> Option<u64> {
+ self.to_exact_integer()
+ }
+ fn to_exact_u128(&self) -> Option<u128> {
+ self.to_exact_integer()
+ }
+ fn to_exact_isize(&self) -> Option<usize> {
+ self.to_exact_integer()
+ }
+ fn to_exact_i8(&self) -> Option<i8> {
+ self.to_exact_integer()
+ }
+ fn to_exact_i16(&self) -> Option<i16> {
+ self.to_exact_integer()
+ }
+ fn to_exact_i32(&self) -> Option<i32> {
+ self.to_exact_integer()
+ }
+ fn to_exact_i64(&self) -> Option<i64> {
+ self.to_exact_integer()
+ }
+ fn to_exact_i128(&self) -> Option<i128> {
+ self.to_exact_integer()
+ }
+impl ToInteger for f64 {
+ fn to_exact_integer<T>(&self) -> Option<T>
+ where
+ T: FromFloat,
+ {
+ T::from_float(*self)
+ }
+pub trait FromFloat {
+ fn from_float(x: f64) -> Option<Self>
+ where
+ Self: Sized;
+macro_rules! impl_from_float {
+ ($T:ident) => {
+ impl FromFloat for $T {
+ fn from_float(x: f64) -> Option<Self>
+ where
+ Self: Sized,
+ {
+ if x.trunc() == x && x >= $T::MIN as f64 && x <= $T::MAX as f64 {
+ Some(x as Self)
+ } else {
+ None
+ }
+ }
+ }
+ };
/// 4. Otherwise, `string` and `command` match. Set *MISSING_WORDS to n - m. Set
/// *EXACT to false if any of the S[i] were found to be abbreviated in the
/// comparisons done in step 3, or to true if they were all exactly equal
-/// (modulo case). Return true. */
+/// (modulo case). Return true.
pub fn command_match(command: &str, string: &str) -> Option<Match> {
let mut command_words = command.split_whitespace();
let mut string_words = string.split_whitespace();
let Some(cw) = command_words.next() else {
return Some(Match {
- missing_words: -count_words(string),
+ missing_words: -(string_words.count() as isize),
let Some(sw) = string_words.next() else {
return Some(Match {
- missing_words: 1 + count_words(command),
+ missing_words: 1 + command_words.count() as isize,
if !id_match_n_nonstatic(cw, sw, 3) {
+/// Matches a string against a collection of command names.
+pub struct CommandMatcher<'a, T> {
+ string: &'a str,
+ extensible: bool,
+ exact_match: Option<T>,
+ n_matches: usize,
+ match_: Option<T>,
+ match_missing_words: isize,
+impl<'a, T> CommandMatcher<'a, T> {
+ pub fn new(string: &'a str) -> Self {
+ Self {
+ string,
+ extensible: false,
+ exact_match: None,
+ n_matches: 0,
+ match_: None,
+ match_missing_words: 0,
+ }
+ }
+ /// Consider `command` as a candidate for the command name being parsed. If
+ /// `command` is the correct command name, then [Self::get_match] will
+ /// return `aux` later.
+ pub fn add(&mut self, command: &str, aux: T) {
+ if let Some(Match {
+ missing_words,
+ exact,
+ }) = command_match(command, self.string)
+ {
+ if missing_words > 0 {
+ self.extensible = true;
+ } else if exact && missing_words == 0 {
+ self.exact_match = Some(aux);
+ } else {
+ if missing_words > self.match_missing_words {
+ self.n_matches = 0;
+ }
+ if missing_words >= self.match_missing_words || self.n_matches == 0 {
+ self.n_matches += 1;
+ self.match_ = Some(aux);
+ self.match_missing_words = missing_words;
+ }
+ }
+ }
+ }
+ pub fn get_match(self) -> (Option<T>, isize) {
+ if self.extensible {
+ (None, 1)
+ } else if let Some(exact_match) = self.exact_match {
+ (Some(exact_match), 0)
+ } else if self.n_matches == 1 {
+ (self.match_, self.match_missing_words)
+ } else {
+ (None, self.match_missing_words)
+ }
+ }
pub const COMMAND_NAMES: &'static [&'static str] = &[
use crate::{
macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser},
- message::{Diagnostic, Location, Point, Severity},
+ message::{Category, Diagnostic, Location, Point, Severity},
fn get_parse(&mut self, context: &Context) -> bool {
// XXX deal with accumulated messages
+ /// Returns the syntax for 1-based line-number `line_number`.
+ fn get_line(&self, line_number: i32) -> &str {
+ if (1..=self.lines.len() as i32).contains(&line_number) {
+ let line_number = line_number as usize;
+ let start = self.lines[line_number - 1];
+ let end = self
+ .lines
+ .get(line_number)
+ .copied()
+ .unwrap_or(self.buffer.len());
+ let line = &self.buffer[start..end];
+ line.strip_suffix("\r\n")
+ .unwrap_or(line.strip_suffix('\n').unwrap_or(line))
+ } else {
+ ""
+ }
+ }
fn token_location(&self, range: RangeInclusive<&LexToken>) -> Location {
Location {
file_name: self.file_name.clone(),
- fn diagnostic<S>(&self, severity: Severity, ofs: RangeInclusive<usize>, text: S) -> Diagnostic
- where
- S: AsRef<str>,
- {
- let text = text.as_ref();
+ fn is_empty(&self) -> bool {
+ self.buffer.is_empty() && self.eof
+ }
+ fn diagnostic(
+ &self,
+ severity: Severity,
+ ofs: RangeInclusive<usize>,
+ text: String,
+ ) -> Diagnostic {
let mut s = String::with_capacity(text.len() + 16);
- if self.buffer.is_empty() && self.eof {
- write!(&mut s, "At end of input: ");
+ if self.is_empty() {
+ s.push_str("At end of input: ");
} else if let Some(call) = self.get_macro_call(ofs.clone()) {
- write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call));
+ write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call)).unwrap();
if !text.is_empty() {
- s.push_str(text);
+ s.push_str(&text);
} else {
s.push_str("Syntax error.");
+ let location = self.ofs_location(ofs);
+ let mut source = Vec::new();
+ if let Some(Range {
+ start: Point { line: l0, .. },
+ end: Point { line: l1, .. },
+ }) = location.span
+ {
+ let lines = if l1 - l0 > 3 {
+ vec![l0, l0 + 1, l1]
+ } else {
+ (l0..=l1).collect()
+ };
+ for line_number in lines {
+ source.push((line_number, self.get_line(line_number).to_string()));
+ }
+ }
Diagnostic {
+ category: Category::Syntax,
- location: self.ofs_location(ofs),
+ location,
+ source,
stack: Vec::new(),
command_name: None, // XXX
text: s,
+ fn interactive_reset(&mut self) {
+ if self.error_handling == ErrorHandling::Terminal {
+ let Source {
+ error_handling,
+ encoding,
+ read,
+ ..
+ } = mem::take(self);
+ *self = Self {
+ error_handling,
+ encoding,
+ read,
+ ..Source::default()
+ };
+ }
+ }
fn ellipsize(s: &str) -> Cow<str> {
macros: &self.macros,
error: &self.error,
- if !self.source.get_parse(&context) {
- let Some(new_source) = self.stack.pop() else {
- self.source = Source::default();
- self.source.parse.push(LexToken {
- token: Token::End,
- pos: 0..0,
- macro_rep: None,
- });
- return &Token::End;
- };
- self.source = new_source;
+ if !self.source.get_parse(&context) && !self.pop_stack() {
+ return &Token::End;
+ fn pop_stack(&mut self) -> bool {
+ if let Some(new_source) = self.stack.pop() {
+ self.source = new_source;
+ true
+ } else {
+ self.source = Source::default();
+ self.source.parse.push(LexToken {
+ token: Token::End,
+ pos: 0..0,
+ macro_rep: None,
+ });
+ false
+ }
+ }
/// Inserts `source` so that the next token comes from it. This is only
/// permitted when the lexer is either empty or at `Token::EndCommand`.
pub fn include(&mut self, mut source: Source) {
self.source.next(offset, &context)
- pub fn error(&self, text: String) -> Diagnostic {
+ pub fn error<S>(&self, text: S) -> Diagnostic
+ where
+ S: ToString,
+ {
- pub fn diagnostic(
+ pub fn diagnostic<S>(
severity: Severity,
ofs: RangeInclusive<usize>,
- text: String,
- ) -> Diagnostic {
- self.source.diagnostic(severity, ofs, text)
+ text: S,
+ ) -> Diagnostic
+ where
+ S: ToString,
+ {
+ self.source.diagnostic(severity, ofs, text.to_string())
+ }
+ pub fn error_handling(&self) -> ErrorHandling {
+ self.source.error_handling
+ }
+ /// Discards all lookahead tokens, then discards all input sources
+ /// until it encounters one with error mode [ErrorHandling::Terminal] or until it
+ /// runs out of input sources.
+ pub fn discard_noninteractive(&mut self) {
+ while self.source.error_handling != ErrorHandling::Ignore {
+ self.source.pp.clear();
+ self.source.merge.clear();
+ self.source.parse.clear();
+ self.source.parse_ofs = 0;
+ if self.source.error_handling == ErrorHandling::Terminal || !self.pop_stack() {
+ return;
+ }
+ }
+ }
+ /// If the source that the lexer is currently reading has error mode
+ /// [ErrorHandling::Terminal], discards all buffered input and tokens, so
+ /// that the next token to be read comes directly from whatever is next read
+ /// from the stream.
+ ///
+ /// It makes sense to call this function after encountering an error in a
+ /// command entered on the console, because usually the user would prefer
+ /// not to have cascading errors.
+ pub fn interactive_reset(&mut self) {
+ self.source.interactive_reset()
+ }
+ /// Advances past any tokens up to [Token::EndCommand] or [Token::End].
+ pub fn discard_rest_of_command(&mut self) {
+ while !matches!(self.token(), Token::EndCommand | Token::End) {
+ self.get();
+ }
pub mod macros;
pub mod settings;
pub mod command;
+pub mod integer;
+pub mod engine;
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Enum)]
pub enum Severity {
+impl Severity {
+ fn as_str(&self) -> &'static str {
+ match self {
+ Severity::Error => "error",
+ Severity::Warning => "warning",
+ Severity::Note => "note",
+ }
+ }
+impl Display for Severity {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ write!(f, "{}", self.as_str())
+ }
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Category {
+ General,
+ Syntax,
+ Data,
pub struct Stack {
location: Location,
description: String,
pub struct Diagnostic {
- severity: Severity,
- location: Location,
- stack: Vec<Stack>,
- command_name: Option<&'static str>,
- text: String,
+ pub severity: Severity,
+ pub category: Category,
+ pub location: Location,
+ pub source: Vec<(i32, String)>,
+ pub stack: Vec<Stack>,
+ pub command_name: Option<&'static str>,
+ pub text: String,
+impl Display for Diagnostic {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ for Stack {
+ location,
+ description,
+ } in &self.stack
+ {
+ if !!location.is_empty() {
+ write!(f, "{location}: ")?;
+ }
+ writeln!(f, "{description}")?;
+ }
+ if self.category != Category::General && !self.location.is_empty() {
+ write!(f, "{}: ", self.location)?;
+ }
+ write!(f, "{}: ", self.severity)?;
+ match self.command_name {
+ Some(command_name) if self.category == Category::Syntax => {
+ write!(f, "{command_name}: ")?
+ }
+ _ => (),
+ }
+ write!(f, "{}", self.text)?;
+ if let Some(Range {
+ start: Point {
+ line: l0,
+ column: Some(c0),
+ },
+ end: Point {
+ line: l1,
+ column: Some(c1),
+ },
+ }) = self.location.span
+ {
+ let mut prev_line_number = None;
+ for (line_number, line) in &self.source {
+ if let Some(prev_line_number) = prev_line_number {
+ if *line_number != prev_line_number + 1 {
+ write!(f, "\n ... |")?;
+ }
+ }
+ prev_line_number = Some(line_number);
+ write!(f, "\n{line_number:5} | {line}")?;
+ if !self.location.omit_underlines {
+ let c0 = if *line_number == l0 { c0 } else { 1 };
+ let c1 = if *line_number == l1 {
+ c1
+ } else {
+ line.width() as i32
+ };
+ write!(f, "\n |")?;
+ for _ in 0..c0 {
+ f.write_str(" ")?;
+ }
+ if *line_number == l0 {
+ f.write_str("^")?;
+ for _ in c0..c1 {
+ f.write_str("~")?;
+ }
+ } else {
+ for _ in c0..=c1 {
+ f.write_str("~")?;
+ }
+ }
+ }
+ }
+ }
+ Ok(())
+ }