pspp-derive works a little
authorBen Pfaff <blp@cs.stanford.edu>
Sun, 8 Sep 2024 17:39:13 +0000 (10:39 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 8 Sep 2024 17:39:13 +0000 (10:39 -0700)
rust/Cargo.lock
rust/Cargo.toml
rust/pspp-derive/Cargo.toml [new file with mode: 0644]
rust/pspp-derive/src/lib.rs [new file with mode: 0644]
rust/pspp/Cargo.toml
rust/pspp/build.rs
rust/pspp/src/command.rs
rust/pspp/src/lex/lexer.rs
rust/pspp/src/lex/token.rs

index 3d7a9ebc7d9d9249c83279d06e77a72ba557d018..a2ff7b0098f4ccc4a821bfe6d47d52b96ee32430 100644 (file)
@@ -872,6 +872,7 @@ dependencies = [
  "num-derive",
  "num-traits",
  "ordered-float",
+ "pspp-derive",
  "thiserror",
  "unicase",
  "unicode-width",
@@ -879,6 +880,15 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "pspp-derive"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "pspp-lsp"
 version = "0.1.0"
@@ -892,9 +902,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.36"
+version = "1.0.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
 dependencies = [
  "proc-macro2",
 ]
@@ -1059,9 +1069,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.75"
+version = "2.0.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9"
+checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
 dependencies = [
  "proc-macro2",
  "quote",
index 3aa9d37c2633979e129ae872b14df2ea2308878f..224f9d01631c28769189a9b237603bbccff723a8 100644 (file)
@@ -1,6 +1,7 @@
 [workspace]
 members = [
   "pspp",
+  "pspp-derive",
   "pspp-lsp",
 ]
 resolver = "2"
diff --git a/rust/pspp-derive/Cargo.toml b/rust/pspp-derive/Cargo.toml
new file mode 100644 (file)
index 0000000..60b0cda
--- /dev/null
@@ -0,0 +1,12 @@
+[package]
+name = "pspp-derive"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+proc-macro2 = "1.0.86"
+quote = "1.0.37"
+syn = "2.0.77"
+
+[lib]
+proc-macro = true
diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs
new file mode 100644 (file)
index 0000000..17ba7e6
--- /dev/null
@@ -0,0 +1,81 @@
+use proc_macro::TokenStream;
+use proc_macro2::{Literal, TokenStream as TokenStream2};
+use quote::quote;
+use syn::{spanned::Spanned, Attribute, DeriveInput, Error, Token};
+
+#[proc_macro_derive(FromTokens, attributes(pspp))]
+pub fn from_tokens_derive(input: TokenStream) -> TokenStream {
+    // Construct a representation of Rust code as a syntax tree
+    // that we can manipulate
+    let ast: DeriveInput = syn::parse(input).unwrap();
+
+    match  parse_derive_input(ast) {
+        Ok(output) => output.into(),
+        Err(error) => error.to_compile_error().into()
+    }
+}
+
+fn parse_derive_input(ast: DeriveInput) -> Result<TokenStream2, Error> {
+    let syn::Data::Enum(e) = &ast.data else {
+        return Err(Error::new(ast.span(), "Only enums may currently be derived"));
+    };
+
+    let mut body = TokenStream2::new();
+    for (index, variant) in e.variants.iter().enumerate() {
+        let field_attrs =  parse_attributes(&variant.attrs)?;
+        if index > 0 {
+            body.extend(quote! { else }.into_iter());
+        }
+        let ident = &variant.ident;
+        if let Some(syntax) = field_attrs.syntax {
+            body.extend(quote! { if cursor.match_syntax(#syntax) { Self::#ident }});
+        } else {
+            let ident_string = ident.to_string();
+            if ident_string.eq_ignore_ascii_case("all") {
+                body.extend(quote! { if cursor.match_(&Token::Punct(Punct::All)) { Self::#ident }});
+            } else {
+                body.extend(quote! { if cursor.match_keyword(#ident_string) { Self::#ident }});
+            };
+        }
+    }
+    body.extend(quote! { else { return Err(tokens.error("Syntax error.")); } });
+
+    let name = &ast.ident;
+    let output = quote! {
+        impl FromTokens for #name {
+            fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic> {
+                let cursor = Cursor::new(&tokens);
+                let value = #body;
+                Ok(value)
+            }
+        }
+    };
+    println!("{output}");
+    Ok(output)
+}
+
+#[derive(Default)]
+struct FieldAttrs {
+    syntax: Option<Literal>,
+}
+
+fn parse_attributes(attributes: &[Attribute]) -> Result<FieldAttrs, Error> {
+    println!("{:?}", &attributes);
+    let mut field_attrs = FieldAttrs::default();
+    for attr in attributes {
+        if !attr.path().is_ident("pspp") {
+            continue;
+        }
+        attr.parse_nested_meta(|meta| {
+            if meta.path.is_ident("syntax") {
+                println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+                meta.input.parse::<Token![=]>()?;
+                let syntax = meta.input.parse::<Literal>()?;
+                println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+                field_attrs.syntax = Some(syntax);
+            }
+            Ok(())
+        })?;
+    }
+    Ok(field_attrs)
+}
index 41b2f02c6f27b85e8a1a44723a70a30413a7a698..0a32a66cd38d9acf51e10518cf11bd545bd7915a 100644 (file)
@@ -28,6 +28,7 @@ unicode-width = "0.1.13"
 chardetng = "0.1.17"
 enum-map = "2.7.3"
 flagset = "0.4.6"
+pspp-derive = { version = "0.1.0", path = "../pspp-derive" }
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
index f8cb9efa13a7439764c0773ff4a77caa6c30dbfa..7805bc2895c57553ba89055145fc3c2f0af3c53d 100644 (file)
@@ -18,7 +18,7 @@ enum Source {
 type CodepageNumber = usize;
 
 fn process_converter<'a>(
-    fields: &Vec<&'a str>,
+    fields: &[&'a str],
     codepages: &mut BTreeMap<CodepageNumber, BTreeMap<Source, Vec<&'a str>>>,
 ) {
     if fields.is_empty() || fields[0] == "{" {
index 6a6772d16aaf1d0e90fb471c5729eab9febc1077..8d9cae5fb36b98e093759d9a8d3fc7dc4fe30ae5 100644 (file)
@@ -2,12 +2,13 @@
 use std::{fmt::Write, ops::RangeFrom, sync::OnceLock};
 
 use flagset::{flags, FlagSet};
+use pspp_derive::FromTokens;
 
 use crate::{
     integer::ToInteger,
     lex::{
         command_name::CommandMatcher,
-        lexer::TokenSlice,
+        lexer::{Cursor, TokenSlice},
         token::{Punct, Token},
     },
     message::Diagnostic,
@@ -95,6 +96,116 @@ trait ParsedCommand {
 }
  */
 
+/*
+struct Descriptives<'a> {
+    subcommands: Vec<DescriptivesSubcommand<'a>>
+}
+enum DescriptivesSubcommand<'a> {
+    Variables(TokenSlice<'a>),
+    Missing(TokenSlice<'a>),
+    Save,
+    Statistics(Vec<Statistic>),
+    Sort(Sort),
+}
+
+
+
+
+struct Subcommand {
+    name: &str,
+}*/
+
+trait FromTokens {
+    fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized;
+}
+
+struct Sort {
+    key: SortKey,
+    direction: Option<Direction>,
+}
+
+#[derive(FromTokens)]
+enum SortKey {
+    Mean,
+    SMean,
+    Stddev,
+    Variance,
+    Range,
+    Min,
+    Max,
+    Sum,
+    Skewness,
+    Kurtosis,
+    Name,
+}
+
+#[derive(FromTokens)]
+enum Direction {
+    #[pspp(syntax = "(A)")]
+    Ascending,
+    #[pspp(syntax = "(D)")]
+    Descending,
+}
+
+#[derive(FromTokens)]
+enum Statistic {
+    Default,
+    Mean,
+    SeMean,
+    Stddev,
+    Variance,
+    Range,
+    Sum,
+    Min,
+    Max,
+    Skewness,
+    Kurtosis,
+    All,
+}
+/*
+impl FromTokens for Statistic {
+    fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic> {
+        let cursor = Cursor::new(&tokens);
+        let statistic = if cursor.match_keyword("default") {
+            Self::Default
+        } else if cursor.match_keyword("stddev") {
+            Self::Stddev
+        } else if cursor.match_keyword("variance") {
+            Self::Variance
+        } else if cursor.match_keyword("mean") {
+            Self::Mean
+        } else if cursor.match_keyword("semean") {
+            Self::SeMean
+        } else if cursor.match_keyword("sum") {
+            Self::Sum
+        } else if cursor.match_keyword("min") {
+            Self::Min
+        } else if cursor.match_keyword("max") {
+            Self::Max
+        } else if cursor.match_keyword("skewness") {
+            Self::Skewness
+        } else if cursor.match_keyword("kurtosis") {
+            Self::Kurtosis
+        } else if cursor.match_(&Token::Punct(Punct::All)) {
+            Self::All
+        } else {
+            return Err(tokens.error("Syntax error expecting statistic."))
+        };
+        // XXX warn for trailing tokens
+        Ok(statistic)
+    }
+}*/
+
+struct Foo;
+
+fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec<TokenSlice<'a>> {
+    src.split(|token| token.token == Token::Punct(Punct::Slash))
+        .filter(|slice| !slice.is_empty())
+        .collect()
+}
+
 fn commands() -> &'static [Command] {
     fn new_commands() -> Vec<Command> {
         vec![
@@ -108,7 +219,7 @@ fn commands() -> &'static [Command] {
                     let cursor = context.lexer.cursor();
                     match cursor.force_string() {
                         Ok(s) => println!("\"{s}\""),
-                        Err(e) => println!("{e}")
+                        Err(e) => println!("{e}"),
                     }
                 }),
             },
index 02d08e1f244d9c676c025f2e0d5f20ca53787e95..51721cbe0ca8d7d11b3490606c9239989fa7c05c 100644 (file)
@@ -207,6 +207,7 @@ pub struct LexToken<'a> {
     macro_rep: Option<MacroRepresentation>,
 }
 
+#[allow(dead_code)]
 struct LexError {
     error: ScanError,
     pos: Range<usize>,
@@ -501,8 +502,23 @@ impl<'a> TokenSlice<'a> {
             text: s,
         }
     }
+
+    pub fn split<F>(&'a self, predicate: F) -> impl Iterator<Item = Self> + 'a
+    where
+        F: Fn(&LexToken) -> bool + 'a,
+    {
+        (&self.tokens[..self.len()])
+            .split(predicate)
+            .map(move |slice| {
+                // SAFETY: `slice` is inside `self.tokens`.
+                let start_ofs =
+                    unsafe { slice.as_ptr().offset_from(self.tokens.as_ptr()) } as usize;
+                self.subslice(start_ofs..start_ofs + slice.len())
+            })
+    }
 }
 
+#[derive(Clone)]
 pub struct Cursor<'a> {
     slice: &'a TokenSlice<'a>,
 
@@ -513,7 +529,14 @@ pub struct Cursor<'a> {
 
 impl<'a> Cursor<'a> {
     pub fn new(slice: &'a TokenSlice<'a>) -> Self {
-        Self { slice, pos: Cell::new(0) }
+        Self {
+            slice,
+            pos: Cell::new(0),
+        }
+    }
+
+    pub fn remainder(&self) -> TokenSlice<'a> {
+        self.slice.subslice(self.pos.get()..self.slice.len())
     }
 
     pub fn force_string(&self) -> Result<&str, Diagnostic> {
@@ -522,9 +545,77 @@ impl<'a> Cursor<'a> {
             self.pos.set(pos + 1);
             Ok(s.as_str())
         } else {
-            let slice = self.slice.subslice(pos..self.slice.len());
-            Err(slice.error("Syntax error expecting string."))
+            Err(self.error("Syntax error expecting string."))
+        }
+    }
+
+    pub fn error<S>(&self, text: S) -> Diagnostic
+    where
+        S: ToString,
+    {
+        self.remainder().error(text)
+    }
+
+    pub fn advance_to(&self, token: &Token) -> bool {
+        self.advance_until(|t| t == token)
+    }
+
+    pub fn advance_until<F>(&self, f: F) -> bool
+    where
+        F: Fn(&Token) -> bool,
+    {
+        while let Some(token) = self.token() {
+            if f(token) {
+                return true;
+            }
+            self.next();
         }
+        false
+    }
+
+    pub fn at(&self, token: &Token) -> bool {
+        if let Some(token2) = self.token() {
+            token == token2
+        } else {
+            false
+        }
+    }
+
+    pub fn match_(&self, token: &Token) -> bool {
+        let at = self.at(token);
+        if at {
+            self.next();
+        }
+        at
+    }
+
+    pub fn match_keyword(&self, keyword: &str) -> bool {
+        self.token()
+            .map_or(false, |token| token.matches_keyword(keyword))
+    }
+
+    pub fn at_end(&self) -> bool {
+        self.pos.get() >= self.slice.len()
+    }
+
+    pub fn token(&self) -> Option<&Token> {
+        self.slice.get_token(self.pos.get())
+    }
+
+    pub fn next(&self) {
+        if self.pos.get() < self.slice.len() {
+            self.pos.set(self.pos.get() + 1)
+        }
+    }
+
+    pub fn prev(&self) {
+        if self.pos.get() > 0 {
+            self.pos.set(self.pos.get() - 1)
+        }
+    }
+
+    pub fn match_syntax(&self, _syntax: &str) -> bool {
+        todo!()
     }
 }
 
index 06feca3ec6a9871e070a2276088b83a2ced7bb5f..2761f717eec29e9cf02c2708094a12698e218b8d 100644 (file)
@@ -30,6 +30,10 @@ impl Token {
             _ => None,
         }
     }
+
+    pub fn matches_keyword(&self, keyword: &str) -> bool {
+        self.id().map_or(false, |id| id.matches_keyword(keyword))
+    }
 }
 
 fn is_printable(c: char) -> bool {