work
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 19 Aug 2024 05:43:58 +0000 (22:43 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 19 Aug 2024 05:43:58 +0000 (22:43 -0700)
rust/Cargo.lock
rust/Cargo.toml
rust/src/endian.rs
rust/src/format.rs
rust/src/lex/lexer.rs
rust/src/lex/segment/mod.rs
rust/src/lib.rs
rust/src/macros.rs
rust/src/message.rs
rust/src/settings.rs

index ab286d0aa1e7c79c69540c6e6af648ad518868a4..2c9fed4fa137f71578abfd588c56b8e8f883f13a 100644 (file)
@@ -234,6 +234,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
 
+[[package]]
+name = "flagset"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec"
+
 [[package]]
 name = "flate2"
 version = "1.0.26"
@@ -550,6 +556,7 @@ dependencies = [
  "encoding_rs",
  "enum-map",
  "finl_unicode",
+ "flagset",
  "flate2",
  "float_next_after",
  "hexplay",
index c4e92cfe72213eddf6775ce394f8f58426b1b840..41b2f02c6f27b85e8a1a44723a70a30413a7a698 100644 (file)
@@ -27,6 +27,7 @@ bitflags = "2.5.0"
 unicode-width = "0.1.13"
 chardetng = "0.1.17"
 enum-map = "2.7.3"
+flagset = "0.4.6"
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index defd7f4bfa3919d257894976fdd3c5ea4c4aa1ee..dd89a6cc1da51d40b09ac7861e9f61174bee58a1 100644 (file)
@@ -14,9 +14,9 @@ pub enum Endian {
 
 impl Endian {
     #[cfg(target_endian = "big")]
-    const NATIVE: Endian = Endian::Big;
+    pub const NATIVE: Endian = Endian::Big;
     #[cfg(target_endian = "little")]
-    const NATIVE: Endian = Endian::Little;
+    pub const NATIVE: Endian = Endian::Little;
 
     pub fn identify_u32(expected_value: u32, bytes: [u8; 4]) -> Option<Self> {
         let as_big: u32 = Endian::Big.parse(bytes);
index d0eba7d9b9c361f50ebb2bf611623909d10c00ce..bafdf2726c908a1fca64db4616179efb5bc84e96 100644 (file)
@@ -378,6 +378,12 @@ impl Format {
         d: 0,
     };
 
+    pub const F8_2: Format = Format {
+        type_: Type::F,
+        w: 8,
+        d: 2,
+    };
+
     pub fn format(self) -> Type {
         self.type_
     }
index 9c53b408305f7a5e84c56850f24dc91678286155..fd2c5cdba7d1972e5327eb4d9e5288aa3904d7d6 100644 (file)
@@ -1,6 +1,7 @@
 use std::{
-    borrow::Borrow,
+    borrow::{Borrow, Cow},
     collections::{HashMap, VecDeque},
+    fmt::Write,
     fs,
     io::Result as IoResult,
     mem,
@@ -12,12 +13,13 @@ use std::{
 use chardetng::EncodingDetector;
 use encoding_rs::{Encoding, UTF_8};
 use thiserror::Error as ThisError;
-use unicode_width::UnicodeWidthStr;
+use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
 use crate::{
     macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser},
-    message::{Location, Point},
+    message::{Diagnostic, Location, Point, Severity},
     prompt::PromptStyle,
+    settings::Settings,
 };
 
 use super::{
@@ -310,9 +312,7 @@ impl Source {
             return false;
         }
 
-        const MEXPAND: bool = true;
-
-        if !MEXPAND {
+        if !Settings::global().macros.expand {
             self.merge.append(&mut self.pp);
             return true;
         }
@@ -357,8 +357,7 @@ impl Source {
         );
         let retval = !expansion.is_empty();
 
-        const MPRINT: bool = false;
-        if MPRINT {
+        if Settings::global().macros.print_expansions {
             // XXX
         }
 
@@ -452,7 +451,8 @@ impl Source {
                 self.buffer
                     .get(self.lines[line - 1]..offset)
                     .unwrap_or_default()
-                    .width() as i32 + 1,
+                    .width() as i32
+                    + 1,
             ),
         }
     }
@@ -468,6 +468,18 @@ impl Source {
         }
     }
 
+    fn ofs_location(&self, range: RangeInclusive<usize>) -> Location {
+        if *range.start() <= *range.end() && *range.end() < self.parse.len() {
+            self.token_location(&self.parse[*range.start()]..=&self.parse[*range.end()])
+        } else {
+            Location {
+                file_name: self.file_name.clone(),
+                span: None,
+                omit_underlines: false,
+            }
+        }
+    }
+
     fn token(&self) -> &Token {
         &self.parse[self.parse_ofs].token
     }
@@ -492,6 +504,78 @@ impl Source {
         }
         &self.parse[index].token
     }
+
+    /// If the tokens in `ofs` contains a macro call, this returns the raw
+    /// syntax for the macro call (not for the expansion) and for any other
+    /// tokens included in that range.  The syntax is encoded in UTF-8 and in
+    /// the original form supplied to the lexer so that, for example, it may
+    /// include comments, spaces, and new-lines if it spans multiple tokens.
+    ///
+    /// Returns `None` if the token range doesn't include a macro call.
+    fn get_macro_call(&self, ofs: RangeInclusive<usize>) -> Option<&str> {
+        if self
+            .parse
+            .get(ofs.clone())
+            .unwrap_or_default()
+            .iter()
+            .all(|token| token.macro_rep.is_none())
+        {
+            return None;
+        }
+
+        let token0 = &self.parse[*ofs.start()];
+        let token1 = &self.parse[*ofs.end()];
+        Some(&self.buffer[token0.pos.start..token1.pos.end])
+    }
+
+    fn diagnostic<S>(&self, severity: Severity, ofs: RangeInclusive<usize>, text: S) -> Diagnostic
+    where
+        S: AsRef<str>,
+    {
+        let text = text.as_ref();
+        let mut s = String::with_capacity(text.len() + 16);
+        if self.buffer.is_empty() && self.eof {
+            write!(&mut s, "At end of input: ");
+        } else if let Some(call) = self.get_macro_call(ofs.clone()) {
+            write!(&mut s, "In syntax expanded from `{}`: ", ellipsize(call));
+        }
+
+        if !text.is_empty() {
+            s.push_str(text);
+        } else {
+            s.push_str("Syntax error.");
+        }
+
+        if !s.ends_with('.') {
+            s.push('.');
+        }
+
+        Diagnostic {
+            severity,
+            location: self.ofs_location(ofs),
+            stack: Vec::new(),
+            command_name: None, // XXX
+            text: s,
+        }
+    }
+}
+
+fn ellipsize(s: &str) -> Cow<str> {
+    if s.width() > 64 {
+        let mut out = String::new();
+        let mut width = 0;
+        for c in s.chars() {
+            out.push(c);
+            width += c.width().unwrap_or(0);
+            if width > 64 {
+                break;
+            }
+        }
+        out.push_str("...");
+        Cow::from(out)
+    } else {
+        Cow::from(s)
+    }
 }
 
 /// A token in a [`Source`].
@@ -615,6 +699,23 @@ impl Lexer {
         };
         self.source.next(offset, &context)
     }
+
+    pub fn error(&self, text: String) -> Diagnostic {
+        self.diagnostic(
+            Severity::Error,
+            self.source.parse_ofs..=self.source.parse_ofs,
+            text,
+        )
+    }
+
+    pub fn diagnostic(
+        &self,
+        severity: Severity,
+        ofs: RangeInclusive<usize>,
+        text: String,
+    ) -> Diagnostic {
+        self.source.diagnostic(severity, ofs, text)
+    }
 }
 
 #[derive(ThisError, Clone, Debug, PartialEq, Eq)]
index de682ac5dc4b6e341c0c19259db531142aae8433..befe5b0c5331fc9e819809fa877bc325b7fb8a25 100644 (file)
@@ -563,7 +563,7 @@ impl Segmenter {
             }
             '*' => {
                 if self.state.1.contains(Substate::START_OF_COMMAND) {
-                    self.state.0 = State::Comment1;
+                    self.state = (State::Comment1, Substate::empty());
                     self.parse_comment_1(input, eof)
                 } else {
                     self.parse_digraph(&['*'], rest, eof)
@@ -701,13 +701,13 @@ impl Segmenter {
 
         if self.state.1.contains(Substate::START_OF_COMMAND) {
             if id_match_n("COMMENT", identifier, 4) {
-                self.state.0 = State::Comment1;
+                self.state = (State::Comment1, Substate::empty());
                 return self.parse_comment_1(input, eof);
             } else if id_match("DOCUMENT", identifier) {
-                self.state.0 = State::Document1;
+                self.state = (State::Document1, Substate::empty());
                 return Ok((input, Segment::StartDocument));
             } else if id_match_n("DEFINE", identifier, 6) {
-                self.state.0 = State::Define1;
+                self.state = (State::Define1, Substate::empty());
             } else if id_match("FILE", identifier) {
                 if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::FileLabel1, Substate::empty());
@@ -855,7 +855,7 @@ impl Segmenter {
                 Substate::START_OF_LINE | Substate::START_OF_COMMAND,
             );
         } else {
-            self.state.0 = State::Comment1;
+            self.state = (State::Comment1, Substate::empty());
         }
         Ok((rest, Segment::Newline))
     }
@@ -867,7 +867,7 @@ impl Segmenter {
         let mut end_cmd = false;
         loop {
             let (Some(c), rest) = take(input, eof)? else {
-                self.state.0 = State::Document3;
+                self.state = (State::Document3, Substate::empty());
                 return Ok((input, Segment::Document));
             };
             match c {
@@ -892,7 +892,7 @@ impl Segmenter {
         eof: bool,
     ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::Document1;
+        self.state = (State::Document1, Substate::empty());
         Ok((rest, Segment::Newline))
     }
     fn parse_document_3<'a>(
@@ -942,7 +942,7 @@ impl Segmenter {
         eof: bool,
     ) -> Result<(&'a str, Segment), Incomplete> {
         let input = skip_spaces(input, eof)?;
-        self.state.0 = State::FileLabel3;
+        self.state = (State::FileLabel3, Substate::empty());
         Ok((input, Segment::Spaces))
     }
     fn parse_file_label_3<'a>(
index 4624edfe69b7780bd6634fd6342672fad3e4197f..3841e83cbb0f0db6df3b55642e5f69c4b8a0b2e6 100644 (file)
@@ -15,3 +15,4 @@ pub mod prompt;
 pub mod message;
 pub mod macros;
 pub mod settings;
+pub mod command;
index c7a122197d2fe2a552141c34de86471d93849ae8..85671b05a53f7463cb225940d5311e4a352528a3 100644 (file)
@@ -19,6 +19,7 @@ use crate::{
         token::{Punct, Token},
     },
     message::Location,
+    settings::Settings,
 };
 
 #[derive(Clone, Debug, ThisError)]
@@ -840,7 +841,7 @@ impl DoInput {
             items
                 .into_iter()
                 .rev()
-                .take(MITERATE + 1)
+                .take(Settings::global().macros.max_iterations + 1)
                 .map(|mt| mt.syntax)
                 .collect(),
         )
@@ -897,7 +898,9 @@ impl<'a> Expander<'a> {
 
     fn expand(&mut self, input: &mut MacroTokens, output: &mut Vec<MacroToken>) {
         if self.nesting_countdown == 0 {
-            (self.error)(MacroError::TooDeep { limit: MNEST });
+            (self.error)(MacroError::TooDeep {
+                limit: Settings::global().macros.max_nest,
+            });
             output.extend(take(&mut input.0).iter().cloned());
         } else {
             while !input.is_empty() && !self.should_break() {
@@ -1162,7 +1165,8 @@ impl<'a> Expander<'a> {
                         input.advance();
                         return Some(
                             macro_tokens_to_syntax(self.args.unwrap()[param_idx].as_ref().unwrap())
-                                .flatten().collect(),
+                                .flatten()
+                                .collect(),
                         );
                     }
                     if let Some(value) = self.vars.borrow().get(id) {
@@ -1182,9 +1186,10 @@ impl<'a> Expander<'a> {
                         if i > 0 {
                             arg.push(' ')
                         }
-                        arg.extend(macro_tokens_to_syntax(
-                            self.args.unwrap()[i].as_ref().unwrap(),
-                        ).flatten());
+                        arg.extend(
+                            macro_tokens_to_syntax(self.args.unwrap()[i].as_ref().unwrap())
+                                .flatten(),
+                        );
                     }
                     input.advance();
                     return Some(arg);
@@ -1440,7 +1445,7 @@ impl<'a> Expander<'a> {
             let items = tokenize_string(list.as_str(), self.mode, &self.error);
             (
                 DoInput::from_list(items),
-                MacroError::MiterateList(MITERATE),
+                MacroError::MiterateList(Settings::global().macros.max_iterations),
             )
         } else if input.match_("=") {
             let Some(first) = self.evaluate_number(&mut input) else {
@@ -1467,7 +1472,7 @@ impl<'a> Expander<'a> {
             };
             (
                 DoInput::from_by(first, last, by),
-                MacroError::MiterateNumeric(MITERATE),
+                MacroError::MiterateNumeric(Settings::global().macros.max_iterations),
             )
         } else {
             (self.error)(MacroError::ExpectingEqualsOrIn);
@@ -1495,7 +1500,7 @@ impl<'a> Expander<'a> {
             if subexpander.should_break() {
                 break;
             }
-            if i >= MITERATE {
+            if i >= Settings::global().macros.max_iterations {
                 (self.error)(miterate_error);
                 break;
             }
@@ -1635,7 +1640,7 @@ impl<'a> Call<'a> {
             macro_: Some(self.0.macro_),
             args: Some(&self.0.args),
             mode,
-            nesting_countdown: MNEST,
+            nesting_countdown: Settings::global().macros.max_nest,
             stack: vec![
                 Frame {
                     name: None,
@@ -1661,6 +1666,3 @@ impl<'a> Call<'a> {
         self.0.n_tokens
     }
 }
-
-const MNEST: usize = 50;
-const MITERATE: usize = 1000;
index 236592cad27ba84bbe8a6a386ec9a7472eb268f0..964649abfc37cabe95796c7a5a7ed638d959bcfa 100644 (file)
@@ -135,3 +135,16 @@ pub enum Severity {
     Warning,
     Note,
 }
+
+pub struct Stack {
+    location: Location,
+    description: String,
+}
+
+pub struct Diagnostic {
+    severity: Severity,
+    location: Location,
+    stack: Vec<Stack>,
+    command_name: Option<&'static str>,
+    text: String,
+}
index 65b0826a023848b6da1109d3b30d80c9ca238794..de519512025b6af1181207ddbc6a391d1e490676 100644 (file)
@@ -1,49 +1,87 @@
+use std::sync::OnceLock;
+
 use enum_map::EnumMap;
 
-use crate::{endian::Endian, format::Format, message::Severity, format::Settings as FormatSettings};
+use crate::{
+    endian::Endian,
+    format::{Format, Settings as FormatSettings},
+    message::Severity,
+};
 
 pub struct Settings {
-    input_integer_format: Endian,
-    input_float_format: Endian,
-    output_integer_format: Endian,
-    output_float_format: Endian,
+    pub input_integer_format: Endian,
+    pub input_float_format: Endian,
+    pub output_integer_format: Endian,
+    pub output_float_format: Endian,
 
     /// `MDISPLAY`: how to display matrices in `MATRIX`...`END MATRIX`.
-    matrix_display: MatrixDisplay,
-
-    view_length: usize,
-
-    view_width: usize,
-
-    safer: bool,
-
-    include: bool,
-
-    route_errors_to_terminal: bool,
-
-    route_errors_to_listing: bool,
-
-    scompress: bool,
-
-    undefined: bool,
+    pub matrix_display: MatrixDisplay,
+
+    pub view_length: usize,
+    pub view_width: usize,
+    pub safer: bool,
+    pub include: bool,
+    pub route_errors_to_terminal: bool,
+    pub route_errors_to_listing: bool,
+    pub scompress: bool,
+    pub undefined: bool,
+    pub blanks: Option<f64>,
+    pub max_messages: EnumMap<Severity, usize>,
+    pub printback: bool,
+    pub macros: MacroSettings,
+    pub max_loops: usize,
+    pub workspace: usize,
+    pub default_format: Format,
+    pub testing: bool,
+    pub fuzz_bits: usize,
+    pub scale_min: usize,
+    pub commands: Compatibility,
+    pub global: Compatibility,
+    pub syntax: Compatibility,
+    pub formats: FormatSettings,
+    pub small: f64,
+}
 
-    blanks: f64,
+impl Default for Settings {
+    fn default() -> Self {
+        Self {
+            input_integer_format: Endian::NATIVE,
+            input_float_format: Endian::NATIVE,
+            output_integer_format: Endian::NATIVE,
+            output_float_format: Endian::NATIVE,
+            matrix_display: MatrixDisplay::default(),
+            view_length: 24,
+            view_width: 79,
+            safer: false,
+            include: true,
+            route_errors_to_terminal: true,
+            route_errors_to_listing: true,
+            scompress: true,
+            undefined: true,
+            blanks: None,
+            max_messages: EnumMap::from_fn(|_| 100),
+            printback: true,
+            macros: MacroSettings::default(),
+            max_loops: 40,
+            workspace: 64 * 1024 * 1024,
+            default_format: Format::F8_2,
+            testing: false,
+            fuzz_bits: 6,
+            scale_min: 24,
+            commands: Compatibility::Enhanced,
+            global: Compatibility::Enhanced,
+            syntax: Compatibility::Enhanced,
+            formats: FormatSettings::default(),
+            small: 0.0001,
+        }
+    }
+}
 
-    max_messages: EnumMap<Severity, usize>,
-    printback: bool,
-    macros: MacroSettings,
-    max_loops: usize,
-    workspace: usize,
-    default_format: Format,
-    testing: bool,
-    fuzz_bits: usize,
-    scale_min: usize,
-    commands: Compatibility,
-    global: Compatibility,
-    syntax: Compatibility,
-    formats: FormatSettings,
-    small: f64,
-    
+impl Settings {
+    pub fn global() -> &'static Settings {
+        static GLOBAL: OnceLock<Settings> = OnceLock::new();
+        &GLOBAL.get_or_init(|| Settings::default())
+    }
 }
 
 pub enum Compatibility {
@@ -53,21 +91,34 @@ pub enum Compatibility {
 
 pub struct MacroSettings {
     /// Expand macros?
-    expand: bool,
+    pub expand: bool,
 
     /// Print macro expansions?
-    print_expansions: bool,
+    pub print_expansions: bool,
 
     /// Maximum iterations of `!FOR`.
-    max_iterations: usize,
+    pub max_iterations: usize,
 
     /// Maximum nested macro expansion levels.
-    max_nest: usize,
+    pub max_nest: usize,
+}
+
+impl Default for MacroSettings {
+    fn default() -> Self {
+        Self {
+            expand: true,
+            print_expansions: false,
+            max_iterations: 1000,
+            max_nest: 50,
+        }
+    }
 }
 
 /// How to display matrices in `MATRIX`...`END MATRIX`.
+#[derive(Default)]
 pub enum MatrixDisplay {
     /// Output matrices as text.
+    #[default]
     Text,
 
     /// Output matrices as pivot tables.