From: Ben Pfaff Date: Sun, 8 Sep 2024 17:39:13 +0000 (-0700) Subject: pspp-derive works a little X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0360bcc860df1f7ed716cf28fa36bc259c03a2f9;p=pspp pspp-derive works a little --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 3d7a9ebc7d..a2ff7b0098 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -872,6 +872,7 @@ dependencies = [ "num-derive", "num-traits", "ordered-float", + "pspp-derive", "thiserror", "unicase", "unicode-width", @@ -879,6 +880,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "pspp-derive" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pspp-lsp" version = "0.1.0" @@ -892,9 +902,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -1059,9 +1069,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.75" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 3aa9d37c26..224f9d0163 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "pspp", + "pspp-derive", "pspp-lsp", ] resolver = "2" diff --git a/rust/pspp-derive/Cargo.toml b/rust/pspp-derive/Cargo.toml new file mode 100644 index 0000000000..60b0cda0ae --- /dev/null +++ b/rust/pspp-derive/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "pspp-derive" +version = "0.1.0" +edition = "2021" + +[dependencies] +proc-macro2 = "1.0.86" +quote = "1.0.37" +syn = "2.0.77" + +[lib] +proc-macro = true diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs new file mode 100644 index 0000000000..17ba7e6189 --- /dev/null +++ b/rust/pspp-derive/src/lib.rs @@ -0,0 +1,81 @@ +use proc_macro::TokenStream; +use proc_macro2::{Literal, TokenStream as TokenStream2}; +use quote::quote; +use syn::{spanned::Spanned, Attribute, DeriveInput, Error, Token}; + +#[proc_macro_derive(FromTokens, attributes(pspp))] +pub fn from_tokens_derive(input: TokenStream) -> TokenStream { + // Construct a representation of Rust code as a syntax tree + // that we can manipulate + let ast: DeriveInput = syn::parse(input).unwrap(); + + match parse_derive_input(ast) { + Ok(output) => output.into(), + Err(error) => error.to_compile_error().into() + } +} + +fn parse_derive_input(ast: DeriveInput) -> Result { + let syn::Data::Enum(e) = &ast.data else { + return Err(Error::new(ast.span(), "Only enums may currently be derived")); + }; + + let mut body = TokenStream2::new(); + for (index, variant) in e.variants.iter().enumerate() { + let field_attrs = parse_attributes(&variant.attrs)?; + if index > 0 { + body.extend(quote! { else }.into_iter()); + } + let ident = &variant.ident; + if let Some(syntax) = field_attrs.syntax { + body.extend(quote! { if cursor.match_syntax(#syntax) { Self::#ident }}); + } else { + let ident_string = ident.to_string(); + if ident_string.eq_ignore_ascii_case("all") { + body.extend(quote! { if cursor.match_(&Token::Punct(Punct::All)) { Self::#ident }}); + } else { + body.extend(quote! { if cursor.match_keyword(#ident_string) { Self::#ident }}); + }; + } + } + body.extend(quote! { else { return Err(tokens.error("Syntax error.")); } }); + + let name = &ast.ident; + let output = quote! { + impl FromTokens for #name { + fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result { + let cursor = Cursor::new(&tokens); + let value = #body; + Ok(value) + } + } + }; + println!("{output}"); + Ok(output) +} + +#[derive(Default)] +struct FieldAttrs { + syntax: Option, +} + +fn parse_attributes(attributes: &[Attribute]) -> Result { + println!("{:?}", &attributes); + let mut field_attrs = FieldAttrs::default(); + for attr in attributes { + if !attr.path().is_ident("pspp") { + continue; + } + attr.parse_nested_meta(|meta| { + if meta.path.is_ident("syntax") { + println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); + meta.input.parse::()?; + let syntax = meta.input.parse::()?; + println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); + field_attrs.syntax = Some(syntax); + } + Ok(()) + })?; + } + Ok(field_attrs) +} diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 41b2f02c6f..0a32a66cd3 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -28,6 +28,7 @@ unicode-width = "0.1.13" chardetng = "0.1.17" enum-map = "2.7.3" flagset = "0.4.6" +pspp-derive = { version = "0.1.0", path = "../pspp-derive" } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/build.rs b/rust/pspp/build.rs index f8cb9efa13..7805bc2895 100644 --- a/rust/pspp/build.rs +++ b/rust/pspp/build.rs @@ -18,7 +18,7 @@ enum Source { type CodepageNumber = usize; fn process_converter<'a>( - fields: &Vec<&'a str>, + fields: &[&'a str], codepages: &mut BTreeMap>>, ) { if fields.is_empty() || fields[0] == "{" { diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs index 6a6772d16a..8d9cae5fb3 100644 --- a/rust/pspp/src/command.rs +++ b/rust/pspp/src/command.rs @@ -2,12 +2,13 @@ use std::{fmt::Write, ops::RangeFrom, sync::OnceLock}; use flagset::{flags, FlagSet}; +use pspp_derive::FromTokens; use crate::{ integer::ToInteger, lex::{ command_name::CommandMatcher, - lexer::TokenSlice, + lexer::{Cursor, TokenSlice}, token::{Punct, Token}, }, message::Diagnostic, @@ -95,6 +96,116 @@ trait ParsedCommand { } */ +/* +struct Descriptives<'a> { + subcommands: Vec> +} +enum DescriptivesSubcommand<'a> { + Variables(TokenSlice<'a>), + Missing(TokenSlice<'a>), + Save, + Statistics(Vec), + Sort(Sort), +} + + + + +struct Subcommand { + name: &str, +}*/ + +trait FromTokens { + fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result + where + Self: Sized; +} + +struct Sort { + key: SortKey, + direction: Option, +} + +#[derive(FromTokens)] +enum SortKey { + Mean, + SMean, + Stddev, + Variance, + Range, + Min, + Max, + Sum, + Skewness, + Kurtosis, + Name, +} + +#[derive(FromTokens)] +enum Direction { + #[pspp(syntax = "(A)")] + Ascending, + #[pspp(syntax = "(D)")] + Descending, +} + +#[derive(FromTokens)] +enum Statistic { + Default, + Mean, + SeMean, + Stddev, + Variance, + Range, + Sum, + Min, + Max, + Skewness, + Kurtosis, + All, +} +/* +impl FromTokens for Statistic { + fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result { + let cursor = Cursor::new(&tokens); + let statistic = if cursor.match_keyword("default") { + Self::Default + } else if cursor.match_keyword("stddev") { + Self::Stddev + } else if cursor.match_keyword("variance") { + Self::Variance + } else if cursor.match_keyword("mean") { + Self::Mean + } else if cursor.match_keyword("semean") { + Self::SeMean + } else if cursor.match_keyword("sum") { + Self::Sum + } else if cursor.match_keyword("min") { + Self::Min + } else if cursor.match_keyword("max") { + Self::Max + } else if cursor.match_keyword("skewness") { + Self::Skewness + } else if cursor.match_keyword("kurtosis") { + Self::Kurtosis + } else if cursor.match_(&Token::Punct(Punct::All)) { + Self::All + } else { + return Err(tokens.error("Syntax error expecting statistic.")) + }; + // XXX warn for trailing tokens + Ok(statistic) + } +}*/ + +struct Foo; + +fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { + src.split(|token| token.token == Token::Punct(Punct::Slash)) + .filter(|slice| !slice.is_empty()) + .collect() +} + fn commands() -> &'static [Command] { fn new_commands() -> Vec { vec![ @@ -108,7 +219,7 @@ fn commands() -> &'static [Command] { let cursor = context.lexer.cursor(); match cursor.force_string() { Ok(s) => println!("\"{s}\""), - Err(e) => println!("{e}") + Err(e) => println!("{e}"), } }), }, diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 02d08e1f24..51721cbe0c 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -207,6 +207,7 @@ pub struct LexToken<'a> { macro_rep: Option, } +#[allow(dead_code)] struct LexError { error: ScanError, pos: Range, @@ -501,8 +502,23 @@ impl<'a> TokenSlice<'a> { text: s, } } + + pub fn split(&'a self, predicate: F) -> impl Iterator + 'a + where + F: Fn(&LexToken) -> bool + 'a, + { + (&self.tokens[..self.len()]) + .split(predicate) + .map(move |slice| { + // SAFETY: `slice` is inside `self.tokens`. + let start_ofs = + unsafe { slice.as_ptr().offset_from(self.tokens.as_ptr()) } as usize; + self.subslice(start_ofs..start_ofs + slice.len()) + }) + } } +#[derive(Clone)] pub struct Cursor<'a> { slice: &'a TokenSlice<'a>, @@ -513,7 +529,14 @@ pub struct Cursor<'a> { impl<'a> Cursor<'a> { pub fn new(slice: &'a TokenSlice<'a>) -> Self { - Self { slice, pos: Cell::new(0) } + Self { + slice, + pos: Cell::new(0), + } + } + + pub fn remainder(&self) -> TokenSlice<'a> { + self.slice.subslice(self.pos.get()..self.slice.len()) } pub fn force_string(&self) -> Result<&str, Diagnostic> { @@ -522,9 +545,77 @@ impl<'a> Cursor<'a> { self.pos.set(pos + 1); Ok(s.as_str()) } else { - let slice = self.slice.subslice(pos..self.slice.len()); - Err(slice.error("Syntax error expecting string.")) + Err(self.error("Syntax error expecting string.")) + } + } + + pub fn error(&self, text: S) -> Diagnostic + where + S: ToString, + { + self.remainder().error(text) + } + + pub fn advance_to(&self, token: &Token) -> bool { + self.advance_until(|t| t == token) + } + + pub fn advance_until(&self, f: F) -> bool + where + F: Fn(&Token) -> bool, + { + while let Some(token) = self.token() { + if f(token) { + return true; + } + self.next(); } + false + } + + pub fn at(&self, token: &Token) -> bool { + if let Some(token2) = self.token() { + token == token2 + } else { + false + } + } + + pub fn match_(&self, token: &Token) -> bool { + let at = self.at(token); + if at { + self.next(); + } + at + } + + pub fn match_keyword(&self, keyword: &str) -> bool { + self.token() + .map_or(false, |token| token.matches_keyword(keyword)) + } + + pub fn at_end(&self) -> bool { + self.pos.get() >= self.slice.len() + } + + pub fn token(&self) -> Option<&Token> { + self.slice.get_token(self.pos.get()) + } + + pub fn next(&self) { + if self.pos.get() < self.slice.len() { + self.pos.set(self.pos.get() + 1) + } + } + + pub fn prev(&self) { + if self.pos.get() > 0 { + self.pos.set(self.pos.get() - 1) + } + } + + pub fn match_syntax(&self, _syntax: &str) -> bool { + todo!() } } diff --git a/rust/pspp/src/lex/token.rs b/rust/pspp/src/lex/token.rs index 06feca3ec6..2761f717ee 100644 --- a/rust/pspp/src/lex/token.rs +++ b/rust/pspp/src/lex/token.rs @@ -30,6 +30,10 @@ impl Token { _ => None, } } + + pub fn matches_keyword(&self, keyword: &str) -> bool { + self.id().map_or(false, |id| id.matches_keyword(keyword)) + } } fn is_printable(c: char) -> bool {