"num-derive",
"num-traits",
"ordered-float",
+ "pspp-derive",
"thiserror",
"unicase",
"unicode-width",
"windows-sys 0.48.0",
]
+[[package]]
+name = "pspp-derive"
+version = "0.1.0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "pspp-lsp"
version = "0.1.0"
[[package]]
name = "quote"
-version = "1.0.36"
+version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
+checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
-version = "2.0.75"
+version = "2.0.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9"
+checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
dependencies = [
"proc-macro2",
"quote",
[workspace]
members = [
"pspp",
+ "pspp-derive",
"pspp-lsp",
]
resolver = "2"
--- /dev/null
+[package]
+name = "pspp-derive"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+proc-macro2 = "1.0.86"
+quote = "1.0.37"
+syn = "2.0.77"
+
+[lib]
+proc-macro = true
--- /dev/null
+use proc_macro::TokenStream;
+use proc_macro2::{Literal, TokenStream as TokenStream2};
+use quote::quote;
+use syn::{spanned::Spanned, Attribute, DeriveInput, Error, Token};
+
+#[proc_macro_derive(FromTokens, attributes(pspp))]
+pub fn from_tokens_derive(input: TokenStream) -> TokenStream {
+ // Construct a representation of Rust code as a syntax tree
+ // that we can manipulate
+ let ast: DeriveInput = syn::parse(input).unwrap();
+
+ match parse_derive_input(ast) {
+ Ok(output) => output.into(),
+ Err(error) => error.to_compile_error().into()
+ }
+}
+
+fn parse_derive_input(ast: DeriveInput) -> Result<TokenStream2, Error> {
+ let syn::Data::Enum(e) = &ast.data else {
+ return Err(Error::new(ast.span(), "Only enums may currently be derived"));
+ };
+
+ let mut body = TokenStream2::new();
+ for (index, variant) in e.variants.iter().enumerate() {
+ let field_attrs = parse_attributes(&variant.attrs)?;
+ if index > 0 {
+ body.extend(quote! { else }.into_iter());
+ }
+ let ident = &variant.ident;
+ if let Some(syntax) = field_attrs.syntax {
+ body.extend(quote! { if cursor.match_syntax(#syntax) { Self::#ident }});
+ } else {
+ let ident_string = ident.to_string();
+ if ident_string.eq_ignore_ascii_case("all") {
+ body.extend(quote! { if cursor.match_(&Token::Punct(Punct::All)) { Self::#ident }});
+ } else {
+ body.extend(quote! { if cursor.match_keyword(#ident_string) { Self::#ident }});
+ };
+ }
+ }
+ body.extend(quote! { else { return Err(tokens.error("Syntax error.")); } });
+
+ let name = &ast.ident;
+ let output = quote! {
+ impl FromTokens for #name {
+ fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic> {
+ let cursor = Cursor::new(&tokens);
+ let value = #body;
+ Ok(value)
+ }
+ }
+ };
+ println!("{output}");
+ Ok(output)
+}
+
+#[derive(Default)]
+struct FieldAttrs {
+ syntax: Option<Literal>,
+}
+
+fn parse_attributes(attributes: &[Attribute]) -> Result<FieldAttrs, Error> {
+ println!("{:?}", &attributes);
+ let mut field_attrs = FieldAttrs::default();
+ for attr in attributes {
+ if !attr.path().is_ident("pspp") {
+ continue;
+ }
+ attr.parse_nested_meta(|meta| {
+ if meta.path.is_ident("syntax") {
+ println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+ meta.input.parse::<Token![=]>()?;
+ let syntax = meta.input.parse::<Literal>()?;
+ println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+ field_attrs.syntax = Some(syntax);
+ }
+ Ok(())
+ })?;
+ }
+ Ok(field_attrs)
+}
chardetng = "0.1.17"
enum-map = "2.7.3"
flagset = "0.4.6"
+pspp-derive = { version = "0.1.0", path = "../pspp-derive" }
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] }
type CodepageNumber = usize;
fn process_converter<'a>(
- fields: &Vec<&'a str>,
+ fields: &[&'a str],
codepages: &mut BTreeMap<CodepageNumber, BTreeMap<Source, Vec<&'a str>>>,
) {
if fields.is_empty() || fields[0] == "{" {
use std::{fmt::Write, ops::RangeFrom, sync::OnceLock};
use flagset::{flags, FlagSet};
+use pspp_derive::FromTokens;
use crate::{
integer::ToInteger,
lex::{
command_name::CommandMatcher,
- lexer::TokenSlice,
+ lexer::{Cursor, TokenSlice},
token::{Punct, Token},
},
message::Diagnostic,
}
*/
+/*
+struct Descriptives<'a> {
+ subcommands: Vec<DescriptivesSubcommand<'a>>
+}
+enum DescriptivesSubcommand<'a> {
+ Variables(TokenSlice<'a>),
+ Missing(TokenSlice<'a>),
+ Save,
+ Statistics(Vec<Statistic>),
+ Sort(Sort),
+}
+
+
+
+
+struct Subcommand {
+ name: &str,
+}*/
+
+trait FromTokens {
+ fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic>
+ where
+ Self: Sized;
+}
+
+struct Sort {
+ key: SortKey,
+ direction: Option<Direction>,
+}
+
+#[derive(FromTokens)]
+enum SortKey {
+ Mean,
+ SMean,
+ Stddev,
+ Variance,
+ Range,
+ Min,
+ Max,
+ Sum,
+ Skewness,
+ Kurtosis,
+ Name,
+}
+
+#[derive(FromTokens)]
+enum Direction {
+ #[pspp(syntax = "(A)")]
+ Ascending,
+ #[pspp(syntax = "(D)")]
+ Descending,
+}
+
+#[derive(FromTokens)]
+enum Statistic {
+ Default,
+ Mean,
+ SeMean,
+ Stddev,
+ Variance,
+ Range,
+ Sum,
+ Min,
+ Max,
+ Skewness,
+ Kurtosis,
+ All,
+}
+/*
+impl FromTokens for Statistic {
+ fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic> {
+ let cursor = Cursor::new(&tokens);
+ let statistic = if cursor.match_keyword("default") {
+ Self::Default
+ } else if cursor.match_keyword("stddev") {
+ Self::Stddev
+ } else if cursor.match_keyword("variance") {
+ Self::Variance
+ } else if cursor.match_keyword("mean") {
+ Self::Mean
+ } else if cursor.match_keyword("semean") {
+ Self::SeMean
+ } else if cursor.match_keyword("sum") {
+ Self::Sum
+ } else if cursor.match_keyword("min") {
+ Self::Min
+ } else if cursor.match_keyword("max") {
+ Self::Max
+ } else if cursor.match_keyword("skewness") {
+ Self::Skewness
+ } else if cursor.match_keyword("kurtosis") {
+ Self::Kurtosis
+ } else if cursor.match_(&Token::Punct(Punct::All)) {
+ Self::All
+ } else {
+ return Err(tokens.error("Syntax error expecting statistic."))
+ };
+ // XXX warn for trailing tokens
+ Ok(statistic)
+ }
+}*/
+
+struct Foo;
+
+fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec<TokenSlice<'a>> {
+ src.split(|token| token.token == Token::Punct(Punct::Slash))
+ .filter(|slice| !slice.is_empty())
+ .collect()
+}
+
fn commands() -> &'static [Command] {
fn new_commands() -> Vec<Command> {
vec![
let cursor = context.lexer.cursor();
match cursor.force_string() {
Ok(s) => println!("\"{s}\""),
- Err(e) => println!("{e}")
+ Err(e) => println!("{e}"),
}
}),
},
macro_rep: Option<MacroRepresentation>,
}
+#[allow(dead_code)]
struct LexError {
error: ScanError,
pos: Range<usize>,
text: s,
}
}
+
+ pub fn split<F>(&'a self, predicate: F) -> impl Iterator<Item = Self> + 'a
+ where
+ F: Fn(&LexToken) -> bool + 'a,
+ {
+ (&self.tokens[..self.len()])
+ .split(predicate)
+ .map(move |slice| {
+ // SAFETY: `slice` is inside `self.tokens`.
+ let start_ofs =
+ unsafe { slice.as_ptr().offset_from(self.tokens.as_ptr()) } as usize;
+ self.subslice(start_ofs..start_ofs + slice.len())
+ })
+ }
}
+#[derive(Clone)]
pub struct Cursor<'a> {
slice: &'a TokenSlice<'a>,
impl<'a> Cursor<'a> {
pub fn new(slice: &'a TokenSlice<'a>) -> Self {
- Self { slice, pos: Cell::new(0) }
+ Self {
+ slice,
+ pos: Cell::new(0),
+ }
+ }
+
+ pub fn remainder(&self) -> TokenSlice<'a> {
+ self.slice.subslice(self.pos.get()..self.slice.len())
}
pub fn force_string(&self) -> Result<&str, Diagnostic> {
self.pos.set(pos + 1);
Ok(s.as_str())
} else {
- let slice = self.slice.subslice(pos..self.slice.len());
- Err(slice.error("Syntax error expecting string."))
+ Err(self.error("Syntax error expecting string."))
+ }
+ }
+
+ pub fn error<S>(&self, text: S) -> Diagnostic
+ where
+ S: ToString,
+ {
+ self.remainder().error(text)
+ }
+
+ pub fn advance_to(&self, token: &Token) -> bool {
+ self.advance_until(|t| t == token)
+ }
+
+ pub fn advance_until<F>(&self, f: F) -> bool
+ where
+ F: Fn(&Token) -> bool,
+ {
+ while let Some(token) = self.token() {
+ if f(token) {
+ return true;
+ }
+ self.next();
}
+ false
+ }
+
+ pub fn at(&self, token: &Token) -> bool {
+ if let Some(token2) = self.token() {
+ token == token2
+ } else {
+ false
+ }
+ }
+
+ pub fn match_(&self, token: &Token) -> bool {
+ let at = self.at(token);
+ if at {
+ self.next();
+ }
+ at
+ }
+
+ pub fn match_keyword(&self, keyword: &str) -> bool {
+ self.token()
+ .map_or(false, |token| token.matches_keyword(keyword))
+ }
+
+ pub fn at_end(&self) -> bool {
+ self.pos.get() >= self.slice.len()
+ }
+
+ pub fn token(&self) -> Option<&Token> {
+ self.slice.get_token(self.pos.get())
+ }
+
+ pub fn next(&self) {
+ if self.pos.get() < self.slice.len() {
+ self.pos.set(self.pos.get() + 1)
+ }
+ }
+
+ pub fn prev(&self) {
+ if self.pos.get() > 0 {
+ self.pos.set(self.pos.get() - 1)
+ }
+ }
+
+ pub fn match_syntax(&self, _syntax: &str) -> bool {
+ todo!()
}
}
_ => None,
}
}
+
+ pub fn matches_keyword(&self, keyword: &str) -> bool {
+ self.id().map_or(false, |id| id.matches_keyword(keyword))
+ }
}
fn is_printable(c: char) -> bool {