From e6c74bd5d729ca62da42fc9cfbbf26e4cdde0c9e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 18 Aug 2024 22:43:58 -0700 Subject: [PATCH] work --- rust/Cargo.lock | 7 ++ rust/Cargo.toml | 1 + rust/src/endian.rs | 4 +- rust/src/format.rs | 6 ++ rust/src/lex/lexer.rs | 119 ++++++++++++++++++++++++++++--- rust/src/lex/segment/mod.rs | 16 ++--- rust/src/lib.rs | 1 + rust/src/macros.rs | 28 ++++---- rust/src/message.rs | 13 ++++ rust/src/settings.rs | 135 +++++++++++++++++++++++++----------- 10 files changed, 256 insertions(+), 74 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index ab286d0aa1..2c9fed4fa1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -234,6 +234,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" +[[package]] +name = "flagset" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" + [[package]] name = "flate2" version = "1.0.26" @@ -550,6 +556,7 @@ dependencies = [ "encoding_rs", "enum-map", "finl_unicode", + "flagset", "flate2", "float_next_after", "hexplay", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index c4e92cfe72..41b2f02c6f 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -27,6 +27,7 @@ bitflags = "2.5.0" unicode-width = "0.1.13" chardetng = "0.1.17" enum-map = "2.7.3" +flagset = "0.4.6" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/src/endian.rs b/rust/src/endian.rs index defd7f4bfa..dd89a6cc1d 100644 --- a/rust/src/endian.rs +++ b/rust/src/endian.rs @@ -14,9 +14,9 @@ pub enum Endian { impl Endian { #[cfg(target_endian = "big")] - const NATIVE: Endian = Endian::Big; + pub const NATIVE: Endian = Endian::Big; #[cfg(target_endian = "little")] - const NATIVE: Endian = Endian::Little; + pub const NATIVE: Endian = Endian::Little; pub fn identify_u32(expected_value: u32, bytes: [u8; 4]) -> Option { let as_big: u32 = Endian::Big.parse(bytes); diff --git a/rust/src/format.rs b/rust/src/format.rs index d0eba7d9b9..bafdf2726c 100644 --- a/rust/src/format.rs +++ b/rust/src/format.rs @@ -378,6 +378,12 @@ impl Format { d: 0, }; + pub const F8_2: Format = Format { + type_: Type::F, + w: 8, + d: 2, + }; + pub fn format(self) -> Type { self.type_ } diff --git a/rust/src/lex/lexer.rs b/rust/src/lex/lexer.rs index 9c53b40830..fd2c5cdba7 100644 --- a/rust/src/lex/lexer.rs +++ b/rust/src/lex/lexer.rs @@ -1,6 +1,7 @@ use std::{ - borrow::Borrow, + borrow::{Borrow, Cow}, collections::{HashMap, VecDeque}, + fmt::Write, fs, io::Result as IoResult, mem, @@ -12,12 +13,13 @@ use std::{ use chardetng::EncodingDetector; use encoding_rs::{Encoding, UTF_8}; use thiserror::Error as ThisError; -use unicode_width::UnicodeWidthStr; +use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; use crate::{ macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser}, - message::{Location, Point}, + message::{Diagnostic, Location, Point, Severity}, prompt::PromptStyle, + settings::Settings, }; use super::{ @@ -310,9 +312,7 @@ impl Source { return false; } - const MEXPAND: bool = true; - - if !MEXPAND { + if !Settings::global().macros.expand { self.merge.append(&mut self.pp); return true; } @@ -357,8 +357,7 @@ impl Source { ); let retval = !expansion.is_empty(); - const MPRINT: bool = false; - if MPRINT { + if Settings::global().macros.print_expansions { // XXX } @@ -452,7 +451,8 @@ impl Source { self.buffer .get(self.lines[line - 1]..offset) .unwrap_or_default() - .width() as i32 + 1, + .width() as i32 + + 1, ), } } @@ -468,6 +468,18 @@ impl Source { } } + fn ofs_location(&self, range: RangeInclusive) -> Location { + if *range.start() <= *range.end() && *range.end() < self.parse.len() { + self.token_location(&self.parse[*range.start()]..=&self.parse[*range.end()]) + } else { + Location { + file_name: self.file_name.clone(), + span: None, + omit_underlines: false, + } + } + } + fn token(&self) -> &Token { &self.parse[self.parse_ofs].token } @@ -492,6 +504,78 @@ impl Source { } &self.parse[index].token } + + /// If the tokens in `ofs` contains a macro call, this returns the raw + /// syntax for the macro call (not for the expansion) and for any other + /// tokens included in that range. The syntax is encoded in UTF-8 and in + /// the original form supplied to the lexer so that, for example, it may + /// include comments, spaces, and new-lines if it spans multiple tokens. + /// + /// Returns `None` if the token range doesn't include a macro call. + fn get_macro_call(&self, ofs: RangeInclusive) -> Option<&str> { + if self + .parse + .get(ofs.clone()) + .unwrap_or_default() + .iter() + .all(|token| token.macro_rep.is_none()) + { + return None; + } + + let token0 = &self.parse[*ofs.start()]; + let token1 = &self.parse[*ofs.end()]; + Some(&self.buffer[token0.pos.start..token1.pos.end]) + } + + fn diagnostic(&self, severity: Severity, ofs: RangeInclusive, text: S) -> Diagnostic + where + S: AsRef, + { + let text = text.as_ref(); + let mut s = String::with_capacity(text.len() + 16); + if self.buffer.is_empty() && self.eof { + write!(&mut s, "At end of input: "); + } else if let Some(call) = self.get_macro_call(ofs.clone()) { + write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call)); + } + + if !text.is_empty() { + s.push_str(text); + } else { + s.push_str("Syntax error."); + } + + if !s.ends_with('.') { + s.push('.'); + } + + Diagnostic { + severity, + location: self.ofs_location(ofs), + stack: Vec::new(), + command_name: None, // XXX + text: s, + } + } +} + +fn ellipsize(s: &str) -> Cow { + if s.width() > 64 { + let mut out = String::new(); + let mut width = 0; + for c in s.chars() { + out.push(c); + width += c.width().unwrap_or(0); + if width > 64 { + break; + } + } + out.push_str("..."); + Cow::from(out) + } else { + Cow::from(s) + } } /// A token in a [`Source`]. @@ -615,6 +699,23 @@ impl Lexer { }; self.source.next(offset, &context) } + + pub fn error(&self, text: String) -> Diagnostic { + self.diagnostic( + Severity::Error, + self.source.parse_ofs..=self.source.parse_ofs, + text, + ) + } + + pub fn diagnostic( + &self, + severity: Severity, + ofs: RangeInclusive, + text: String, + ) -> Diagnostic { + self.source.diagnostic(severity, ofs, text) + } } #[derive(ThisError, Clone, Debug, PartialEq, Eq)] diff --git a/rust/src/lex/segment/mod.rs b/rust/src/lex/segment/mod.rs index de682ac5dc..befe5b0c53 100644 --- a/rust/src/lex/segment/mod.rs +++ b/rust/src/lex/segment/mod.rs @@ -563,7 +563,7 @@ impl Segmenter { } '*' => { if self.state.1.contains(Substate::START_OF_COMMAND) { - self.state.0 = State::Comment1; + self.state = (State::Comment1, Substate::empty()); self.parse_comment_1(input, eof) } else { self.parse_digraph(&['*'], rest, eof) @@ -701,13 +701,13 @@ impl Segmenter { if self.state.1.contains(Substate::START_OF_COMMAND) { if id_match_n("COMMENT", identifier, 4) { - self.state.0 = State::Comment1; + self.state = (State::Comment1, Substate::empty()); return self.parse_comment_1(input, eof); } else if id_match("DOCUMENT", identifier) { - self.state.0 = State::Document1; + self.state = (State::Document1, Substate::empty()); return Ok((input, Segment::StartDocument)); } else if id_match_n("DEFINE", identifier, 6) { - self.state.0 = State::Define1; + self.state = (State::Define1, Substate::empty()); } else if id_match("FILE", identifier) { if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) { self.state = (State::FileLabel1, Substate::empty()); @@ -855,7 +855,7 @@ impl Segmenter { Substate::START_OF_LINE | Substate::START_OF_COMMAND, ); } else { - self.state.0 = State::Comment1; + self.state = (State::Comment1, Substate::empty()); } Ok((rest, Segment::Newline)) } @@ -867,7 +867,7 @@ impl Segmenter { let mut end_cmd = false; loop { let (Some(c), rest) = take(input, eof)? else { - self.state.0 = State::Document3; + self.state = (State::Document3, Substate::empty()); return Ok((input, Segment::Document)); }; match c { @@ -892,7 +892,7 @@ impl Segmenter { eof: bool, ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); - self.state.0 = State::Document1; + self.state = (State::Document1, Substate::empty()); Ok((rest, Segment::Newline)) } fn parse_document_3<'a>( @@ -942,7 +942,7 @@ impl Segmenter { eof: bool, ) -> Result<(&'a str, Segment), Incomplete> { let input = skip_spaces(input, eof)?; - self.state.0 = State::FileLabel3; + self.state = (State::FileLabel3, Substate::empty()); Ok((input, Segment::Spaces)) } fn parse_file_label_3<'a>( diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 4624edfe69..3841e83cbb 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -15,3 +15,4 @@ pub mod prompt; pub mod message; pub mod macros; pub mod settings; +pub mod command; diff --git a/rust/src/macros.rs b/rust/src/macros.rs index c7a122197d..85671b05a5 100644 --- a/rust/src/macros.rs +++ b/rust/src/macros.rs @@ -19,6 +19,7 @@ use crate::{ token::{Punct, Token}, }, message::Location, + settings::Settings, }; #[derive(Clone, Debug, ThisError)] @@ -840,7 +841,7 @@ impl DoInput { items .into_iter() .rev() - .take(MITERATE + 1) + .take(Settings::global().macros.max_iterations + 1) .map(|mt| mt.syntax) .collect(), ) @@ -897,7 +898,9 @@ impl<'a> Expander<'a> { fn expand(&mut self, input: &mut MacroTokens, output: &mut Vec) { if self.nesting_countdown == 0 { - (self.error)(MacroError::TooDeep { limit: MNEST }); + (self.error)(MacroError::TooDeep { + limit: Settings::global().macros.max_nest, + }); output.extend(take(&mut input.0).iter().cloned()); } else { while !input.is_empty() && !self.should_break() { @@ -1162,7 +1165,8 @@ impl<'a> Expander<'a> { input.advance(); return Some( macro_tokens_to_syntax(self.args.unwrap()[param_idx].as_ref().unwrap()) - .flatten().collect(), + .flatten() + .collect(), ); } if let Some(value) = self.vars.borrow().get(id) { @@ -1182,9 +1186,10 @@ impl<'a> Expander<'a> { if i > 0 { arg.push(' ') } - arg.extend(macro_tokens_to_syntax( - self.args.unwrap()[i].as_ref().unwrap(), - ).flatten()); + arg.extend( + macro_tokens_to_syntax(self.args.unwrap()[i].as_ref().unwrap()) + .flatten(), + ); } input.advance(); return Some(arg); @@ -1440,7 +1445,7 @@ impl<'a> Expander<'a> { let items = tokenize_string(list.as_str(), self.mode, &self.error); ( DoInput::from_list(items), - MacroError::MiterateList(MITERATE), + MacroError::MiterateList(Settings::global().macros.max_iterations), ) } else if input.match_("=") { let Some(first) = self.evaluate_number(&mut input) else { @@ -1467,7 +1472,7 @@ impl<'a> Expander<'a> { }; ( DoInput::from_by(first, last, by), - MacroError::MiterateNumeric(MITERATE), + MacroError::MiterateNumeric(Settings::global().macros.max_iterations), ) } else { (self.error)(MacroError::ExpectingEqualsOrIn); @@ -1495,7 +1500,7 @@ impl<'a> Expander<'a> { if subexpander.should_break() { break; } - if i >= MITERATE { + if i >= Settings::global().macros.max_iterations { (self.error)(miterate_error); break; } @@ -1635,7 +1640,7 @@ impl<'a> Call<'a> { macro_: Some(self.0.macro_), args: Some(&self.0.args), mode, - nesting_countdown: MNEST, + nesting_countdown: Settings::global().macros.max_nest, stack: vec![ Frame { name: None, @@ -1661,6 +1666,3 @@ impl<'a> Call<'a> { self.0.n_tokens } } - -const MNEST: usize = 50; -const MITERATE: usize = 1000; diff --git a/rust/src/message.rs b/rust/src/message.rs index 236592cad2..964649abfc 100644 --- a/rust/src/message.rs +++ b/rust/src/message.rs @@ -135,3 +135,16 @@ pub enum Severity { Warning, Note, } + +pub struct Stack { + location: Location, + description: String, +} + +pub struct Diagnostic { + severity: Severity, + location: Location, + stack: Vec, + command_name: Option<&'static str>, + text: String, +} diff --git a/rust/src/settings.rs b/rust/src/settings.rs index 65b0826a02..de51951202 100644 --- a/rust/src/settings.rs +++ b/rust/src/settings.rs @@ -1,49 +1,87 @@ +use std::sync::OnceLock; + use enum_map::EnumMap; -use crate::{endian::Endian, format::Format, message::Severity, format::Settings as FormatSettings}; +use crate::{ + endian::Endian, + format::{Format, Settings as FormatSettings}, + message::Severity, +}; pub struct Settings { - input_integer_format: Endian, - input_float_format: Endian, - output_integer_format: Endian, - output_float_format: Endian, + pub input_integer_format: Endian, + pub input_float_format: Endian, + pub output_integer_format: Endian, + pub output_float_format: Endian, /// `MDISPLAY`: how to display matrices in `MATRIX`...`END MATRIX`. - matrix_display: MatrixDisplay, - - view_length: usize, - - view_width: usize, - - safer: bool, - - include: bool, - - route_errors_to_terminal: bool, - - route_errors_to_listing: bool, - - scompress: bool, - - undefined: bool, + pub matrix_display: MatrixDisplay, + + pub view_length: usize, + pub view_width: usize, + pub safer: bool, + pub include: bool, + pub route_errors_to_terminal: bool, + pub route_errors_to_listing: bool, + pub scompress: bool, + pub undefined: bool, + pub blanks: Option, + pub max_messages: EnumMap, + pub printback: bool, + pub macros: MacroSettings, + pub max_loops: usize, + pub workspace: usize, + pub default_format: Format, + pub testing: bool, + pub fuzz_bits: usize, + pub scale_min: usize, + pub commands: Compatibility, + pub global: Compatibility, + pub syntax: Compatibility, + pub formats: FormatSettings, + pub small: f64, +} - blanks: f64, +impl Default for Settings { + fn default() -> Self { + Self { + input_integer_format: Endian::NATIVE, + input_float_format: Endian::NATIVE, + output_integer_format: Endian::NATIVE, + output_float_format: Endian::NATIVE, + matrix_display: MatrixDisplay::default(), + view_length: 24, + view_width: 79, + safer: false, + include: true, + route_errors_to_terminal: true, + route_errors_to_listing: true, + scompress: true, + undefined: true, + blanks: None, + max_messages: EnumMap::from_fn(|_| 100), + printback: true, + macros: MacroSettings::default(), + max_loops: 40, + workspace: 64 * 1024 * 1024, + default_format: Format::F8_2, + testing: false, + fuzz_bits: 6, + scale_min: 24, + commands: Compatibility::Enhanced, + global: Compatibility::Enhanced, + syntax: Compatibility::Enhanced, + formats: FormatSettings::default(), + small: 0.0001, + } + } +} - max_messages: EnumMap, - printback: bool, - macros: MacroSettings, - max_loops: usize, - workspace: usize, - default_format: Format, - testing: bool, - fuzz_bits: usize, - scale_min: usize, - commands: Compatibility, - global: Compatibility, - syntax: Compatibility, - formats: FormatSettings, - small: f64, - +impl Settings { + pub fn global() -> &'static Settings { + static GLOBAL: OnceLock = OnceLock::new(); + &GLOBAL.get_or_init(|| Settings::default()) + } } pub enum Compatibility { @@ -53,21 +91,34 @@ pub enum Compatibility { pub struct MacroSettings { /// Expand macros? - expand: bool, + pub expand: bool, /// Print macro expansions? - print_expansions: bool, + pub print_expansions: bool, /// Maximum iterations of `!FOR`. - max_iterations: usize, + pub max_iterations: usize, /// Maximum nested macro expansion levels. - max_nest: usize, + pub max_nest: usize, +} + +impl Default for MacroSettings { + fn default() -> Self { + Self { + expand: true, + print_expansions: false, + max_iterations: 1000, + max_nest: 50, + } + } } /// How to display matrices in `MATRIX`...`END MATRIX`. +#[derive(Default)] pub enum MatrixDisplay { /// Output matrices as text. + #[default] Text, /// Output matrices as pivot tables. -- 2.30.2