From c96949fddd433d80f5ce68a6949269464cb72491 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 14 Jul 2024 15:31:07 -0700 Subject: [PATCH] implment message location, start work on macros --- rust/Cargo.lock | 7 +++ rust/Cargo.toml | 1 + rust/src/identifier.rs | 6 -- rust/src/lib.rs | 2 + rust/src/macros.rs | 99 +++++++++++++++++++++++++++++++ rust/src/message.rs | 129 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 238 insertions(+), 6 deletions(-) create mode 100644 rust/src/macros.rs create mode 100644 rust/src/message.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index c8fc850cf8..c736c92eba 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -523,6 +523,7 @@ dependencies = [ "ordered-float", "thiserror", "unicase", + "unicode-width", "utf8-decode", "windows-sys 0.48.0", ] @@ -666,6 +667,12 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + [[package]] name = "utf8-decode" version = "1.0.1" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index f735638094..f980072290 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -24,6 +24,7 @@ libc = "0.2.147" indexmap = "2.1.0" utf8-decode = "1.0.1" bitflags = "2.5.0" +unicode-width = "0.1.13" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/src/identifier.rs b/rust/src/identifier.rs index 3d00520535..9977aa969d 100644 --- a/rust/src/identifier.rs +++ b/rust/src/identifier.rs @@ -59,10 +59,6 @@ impl IdentifierChar for char { } } } -#[test] -fn gc() { - println!("{:?}", '<'.get_major_category()); -} #[derive(Clone, Debug, ThisError)] pub enum Error { @@ -179,8 +175,6 @@ impl Identifier { } } - - /// Returns true if `token` is a case-insensitive match for `keyword`. /// /// Keywords match `keyword` and `token` are identical, or `token` is at least 3 diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 404ac18b67..46fe08622a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -11,3 +11,5 @@ pub mod raw; pub mod sack; pub mod lex; pub mod prompt; +pub mod message; +pub mod macros; diff --git a/rust/src/macros.rs b/rust/src/macros.rs new file mode 100644 index 0000000000..bf7e830722 --- /dev/null +++ b/rust/src/macros.rs @@ -0,0 +1,99 @@ +use std::collections::HashMap; + +use unicase::UniCase; + +use crate::{ + lex::token::{MacroToken, Token}, + message::Location, +}; + +/// A PSPP macro as defined with `!DEFINE`. +pub struct Macro { + /// The macro's name. This is an ordinary identifier except that it is + /// allowed (but not required) to begin with `!`. + pub name: String, + + /// Source code location of macro definition, for error reporting. + pub location: Location, + + /// Parameters. + parameters: Vec, + + /// Body. + body: Vec, +} + +struct Parameter { + /// `!name` or `!1`. + name: String, + + /// Default value. + /// + /// The tokens don't include white space, etc. between them. + default: Vec, + + /// Macro-expand the argument? + expand_arg: bool, + + /// How the argument is specified. + arg: Arg, +} + +impl Parameter { + /// Returns true if this is a positional parameter. Positional parameters + /// are expanded by index (position) rather than by name. + fn is_positional(&self) -> bool { + self.name.as_bytes()[1].is_ascii_digit() + } +} + +enum Arg { + /// Argument consists of `.0` tokens. + NTokens(usize), + + /// Argument runs until token `.0`. + CharEnd(Token), + + /// Argument starts with token `.0` and ends with token `.1`. + Enclose(Token, Token), + + /// Argument runs until the end of the command. + CmdEnd, +} + +/// A token and the syntax that was tokenized to produce it. The syntax allows +/// the token to be turned back into syntax accurately. +struct BodyToken { + /// The token. + token: Token, + + /// The syntax that produces `token`. + syntax: String, +} + +type MacroSet = HashMap, Macro>; + +pub enum MacroCallState { + Arg, + Enclose, + Keyword, + Equals, + Finished, +} + +pub struct MacroCallBuilder<'a> { + macros: &'a MacroSet, +} + +impl<'a> MacroCallBuilder<'a> { + fn new(macro_set: &'a MacroSet, token: &Token) -> Option { + let macro_name = match token { + Token::Id(s) => s, + Token::MacroToken(MacroToken::MacroId(s)) => s, + _ => return None, + }.clone(); + let Some(macro_) = macro_set.get(&UniCase::new(macro_name)) else { + return None; + }; + } +} diff --git a/rust/src/message.rs b/rust/src/message.rs new file mode 100644 index 0000000000..757ea78625 --- /dev/null +++ b/rust/src/message.rs @@ -0,0 +1,129 @@ +use std::{ + cmp::{max, min}, + fmt::Result as FmtResult, + fmt::{Display, Formatter}, + ops::RangeInclusive, + sync::Arc, +}; + +use unicode_width::UnicodeWidthStr; + +/// A line number and optional column number within a source file. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Point { + /// 1-based line number. + line: i32, + + /// 1-based column number. + /// + /// Column numbers are measured according to the width of characters as shown in + /// a typical fixed-width font, in which CJK characters have width 2 and + /// combining characters have width 0. + column: Option, +} + +impl Point { + /// Takes `point`, adds to it the syntax in `syntax`, incrementing the line + /// number for each new-line in `syntax` and the column number for each + /// column, and returns the result. + pub fn advance(&self, syntax: &str) -> Self { + let mut result = *self; + for line in syntax.split_inclusive('\n') { + if line.ends_with('\n') { + result.line += 1; + result.column = Some(1); + } else { + result.column = result.column.map(|column| column + line.width() as i32); + } + } + result + } + + pub fn without_column(&self) -> Self { + Self { + line: self.line, + column: None, + } + } +} + +/// Location relevant to an diagnostic message. +#[derive(Clone, Debug)] +pub struct Location { + /// File name, if any. + pub file_name: Option>, + + /// Starting and ending point, if any. + pub span: Option>, + + /// Normally, if `span` contains column information, then displaying the + /// message will underline the location. Setting this to true disables + /// displaying underlines. + pub omit_underlines: bool, +} + +impl Display for Location { + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { + if let Some(file_name) = &self.file_name { + write!(f, "{}", file_name)?; + } + + if let Some(span) = &self.span { + if self.file_name.is_some() { + write!(f, ":")?; + } + let l1 = span.start().line; + let l2 = span.end().line; + if let (Some(c1), Some(c2)) = (span.start().column, span.end().column) { + if l2 > l1 { + write!(f, "{l1}.{c1}-{l2}.{c2}")?; + } else { + write!(f, "{l1}.{c1}-{c2}")?; + } + } else { + if l2 > l1 { + write!(f, "{l1}-{l2}")?; + } else { + write!(f, "{l1}")?; + } + } + } + Ok(()) + } +} + +impl Location { + pub fn without_columns(&self) -> Self { + Self { + file_name: self.file_name.clone(), + span: self + .span + .as_ref() + .map(|span| span.start().without_column()..=span.end().without_column()), + omit_underlines: self.omit_underlines, + } + } + pub fn merge(a: Option, b: &Option) -> Option { + let Some(a) = a else { return b.clone() }; + let Some(b) = b else { return Some(a) }; + if a.file_name != b.file_name { + // Failure. + return Some(a); + } + let span = match (&a.span, &b.span) { + (None, None) => None, + (Some(r), None) | (None, Some(r)) => Some(r.clone()), + (Some(ar), Some(br)) => { + Some(min(ar.start(), br.start()).clone()..=max(ar.end(), br.end()).clone()) + } + }; + Some(Self { + file_name: a.file_name, + span, + omit_underlines: a.omit_underlines || b.omit_underlines, + }) + } + pub fn is_empty(&self) -> bool { + self.file_name.is_none() && self.span.is_none() + } +} -- 2.30.2