From: Ben Pfaff Date: Fri, 6 Dec 2024 01:15:56 +0000 (-0800) Subject: progress with parser X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9c278552b137784d9bfdbeafa2f13221a213b3ee;p=pspp progress with parser --- diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs index 59c9d1e7e0..26e1c64fd4 100644 --- a/rust/pspp-derive/src/lib.rs +++ b/rust/pspp-derive/src/lib.rs @@ -29,52 +29,65 @@ fn parse_derive_input(ast: DeriveInput) -> Result { fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { let struct_attrs = StructAttrs::parse(&ast.attrs)?; let mut body = TokenStream2::new(); - let mut variants = Vec::new(); - let mut default = None; - for (index, variant) in e.variants.iter().enumerate() { - let field_attrs = FieldAttrs::parse(&variant.attrs)?; - if field_attrs.default { - if default.is_none() { - default = Some(index); - } else { - return Err(Error::new(variant.span(), "Duplicate default variant")); + let name = &ast.ident; + if struct_attrs.selector { + let mut variants = Vec::new(); + let mut default = None; + for (index, variant) in e.variants.iter().enumerate() { + let field_attrs = FieldAttrs::parse(&variant.attrs)?; + if field_attrs.default { + if default.is_none() { + default = Some(index); + } else { + return Err(Error::new(variant.span(), "Duplicate default variant")); + } } + variants.push((variant, field_attrs)); } - variants.push((variant, field_attrs)); - } - for (index, (variant, field_attrs)) in variants.iter().enumerate() { - if index > 0 { - body.extend(quote! { else }.into_iter()); + for (index, (variant, field_attrs)) in variants.iter().enumerate() { + if index > 0 { + body.extend(quote! { else }.into_iter()); + } + let ident = &variant.ident; + let ident_string = ident.to_string(); + let match_expr = if let Some(syntax) = &field_attrs.syntax { + quote! { input.skip_syntax(#syntax) } + } else if ident_string.eq_ignore_ascii_case("all") { + quote! { input.skip(&Token::Punct(Punct::All))} + } else { + quote! { input.skip_keyword(#ident_string)} + }; + let construction = + construct_fields(&variant.fields, quote! { Self::#ident}, true, None); + body.extend(quote! { if let Some(input) = #match_expr { #construction } }); } - let ident = &variant.ident; - let ident_string = ident.to_string(); - let match_expr = if let Some(syntax) = &field_attrs.syntax { - quote! { input.skip_syntax(#syntax) } - } else if ident_string.eq_ignore_ascii_case("all") { - quote! { input.skip(&Token::Punct(Punct::All))} - } else { - quote! { input.skip_keyword(#ident_string)} - }; - let construction = construct_fields(&variant.fields, quote! { Self::#ident}, true); - let check_equals = if struct_attrs.required_equals && !variant.fields.is_empty() { - quote! { let (Parsed { value: (), rest: input, diagnostics: _}) = parse_token(input, &Token::Punct(Punct::Equals)).mismatch_to_error()?; } + if let Some(default) = default { + let (variant, _field_attrs) = &variants[default]; + let ident = &variant.ident; + let construction = + construct_fields(&variant.fields, quote! { Self::#ident}, true, None); + body.extend(quote! { else { #construction } }); } else { - quote! {} - }; - body.extend(quote! { if let Some(input) = #match_expr { #check_equals #construction } }); - } - if let Some(default) = default { - let (variant, _field_attrs) = &variants[default]; - let ident = &variant.ident; - let construction = construct_fields(&variant.fields, quote! { Self::#ident}, true); - body.extend(quote! { else { #construction } }); + body.extend( + quote! { else { Err(ParseError::Mismatch(input.error("Syntax error.").into())) } }, + ); + } } else { - body.extend( - quote! { else { Err(ParseError::Mismatch(input.error("Syntax error.").into())) } }, - ); + for (index, variant) in e.variants.iter().enumerate() { + let ident = &variant.ident; + let construction = + construct_fields(&variant.fields, quote! { #name::#ident }, false, None); + let fnname = format_ident!("construct{index}"); + body.extend(quote! { + fn #fnname<'a>(input: TokenSlice<'a>) -> ParseResult<'a, #name> { #construction } + if let Ok(p) = #fnname(input) { + return Ok(p); + } + }); + } + body.extend(quote! { Err(ParseError::Mismatch(input.error("Syntax error.").into())) }); } - let name = &ast.ident; let lifetime = struct_attrs.lifetime(); let output = quote! { impl<'a> FromTokens<'a> for #name #lifetime { @@ -83,7 +96,7 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { } } }; - //println!("{output}"); + println!("{output}"); Ok(output) } @@ -91,8 +104,13 @@ fn construct_fields( fields: &Fields, name: impl ToTokens, mismatch_to_error: bool, + syntax: Option<&Literal>, ) -> impl ToTokens { let mut construction = TokenStream2::new(); + if !fields.is_empty() { + construction + .extend(quote! { let mut diagnostics = crate::command::Diagnostics::default(); }); + } let convert = if mismatch_to_error { quote! { .mismatch_to_error() } } else { @@ -101,7 +119,7 @@ fn construct_fields( for (index, _field) in fields.iter().enumerate() { let varname = format_ident!("field{index}"); construction - .extend(quote! { let Parsed { value: #varname, rest: input, diagnostics: _ } = FromTokens::from_tokens(input) #convert ?; }); + .extend(quote! { let (#varname, input) = FromTokens::from_tokens(input) #convert ?.take_diagnostics(&mut diagnostics); }); } match fields { Fields::Named(named) => { @@ -111,7 +129,7 @@ fn construct_fields( let field_name = &field.ident; body.extend(quote! { #field_name: #varname, }); } - quote! { #construction Ok(Parsed::ok(#name { #body }, input)) } + quote! { #construction Ok(Parsed::new(#name { #body }, input, diagnostics)) } } Fields::Unnamed(unnamed) => { let mut body = TokenStream2::new(); @@ -119,16 +137,27 @@ fn construct_fields( let varname = format_ident!("field{index}"); body.extend(quote! { #varname, }); } - quote! { #construction Ok(Parsed::ok(#name ( #body ), input)) } + quote! { #construction Ok(Parsed::new(#name ( #body ), input, diagnostics)) } + } + Fields::Unit => { + if let Some(syntax) = syntax { + quote! { crate::command::parse_syntax(input, #syntax).map(|p| p.map(|()| #name)) } + } else { + quote! { Ok(Parsed::ok(#name, input)) } + } } - Fields::Unit => quote! { Ok(Parsed::ok(#name, input)) }, } } fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result { let struct_attrs = StructAttrs::parse(&ast.attrs)?; let name = &ast.ident; - let construction = construct_fields(&s.fields, quote! {#name}, false); + let construction = construct_fields( + &s.fields, + quote! {#name}, + false, + struct_attrs.syntax.as_ref(), + ); let lifetime = struct_attrs.lifetime(); let output = quote! { impl<'a> FromTokens<'a> for #name #lifetime { @@ -156,10 +185,8 @@ impl FieldAttrs { } attr.parse_nested_meta(|meta| { if meta.path.is_ident("syntax") { - //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); meta.input.parse::()?; let syntax = meta.input.parse::()?; - //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input); field_attrs.syntax = Some(syntax); } else if meta.path.is_ident("default") { field_attrs.default = true; @@ -173,10 +200,20 @@ impl FieldAttrs { } } -#[derive(Default)] struct StructAttrs { add_lifetime: bool, - required_equals: bool, + syntax: Option, + selector: bool, +} + +impl Default for StructAttrs { + fn default() -> Self { + Self { + add_lifetime: false, + syntax: None, + selector: true, + } + } } impl StructAttrs { @@ -191,10 +228,14 @@ impl StructAttrs { continue; } attr.parse_nested_meta(|meta| { - if meta.path.is_ident("add_lifetime") { + if meta.path.is_ident("syntax") { + meta.input.parse::()?; + let syntax = meta.input.parse::()?; + field_attrs.syntax = Some(syntax); + } else if meta.path.is_ident("add_lifetime") { field_attrs.add_lifetime = true; - } else if meta.path.is_ident("required_equals") { - field_attrs.required_equals = true; + } else if meta.path.is_ident("no_selector") { + field_attrs.selector = false; } else { return Err(Error::new(meta.path.span(), "Unknown attribute")); } diff --git a/rust/pspp/src/command/crosstabs.rs b/rust/pspp/src/command/crosstabs.rs index 3634c171bb..14ce0b8d7c 100644 --- a/rust/pspp/src/command/crosstabs.rs +++ b/rust/pspp/src/command/crosstabs.rs @@ -1,9 +1,9 @@ use flagset::FlagSet; -use super::{By, Comma, Command, Integer, Number, Punctuated, Subcommands, VarList}; +use super::{By, Comma, Command, Equals, Integer, Number, Punctuated, Subcommands, VarList}; use crate::command::{ - parse_token, FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, Punct, - Token, TokenSlice, VarRange, + FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, Punct, Token, + TokenSlice, VarRange, }; pub(super) fn crosstabs_command() -> Command { @@ -39,20 +39,28 @@ pub(super) fn crosstabs_command() -> Command { struct Crosstabs<'a>(Subcommands>); #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime, required_equals)] +#[pspp(syntax = "COUNT")] +struct CountKw; + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] enum CrosstabsSubcommand<'a> { #[pspp(default)] - Tables(Punctuated, By>), - Missing(Missing), - Write(Write), - HideSmallCounts(HideSmallCounts), - ShowDim(Integer), - Statistics(Punctuated), - Cells(Punctuated), - Variables(Punctuated>), - Format(Punctuated), - Count(Punctuated), - Method(Method), + Tables(Option, Punctuated, By>), + Missing(Equals, Missing), + Write(Option<(Equals, Write)>), + HideSmallCounts(CountKw, Equals, Integer), + ShowDim(Equals, Integer), + Statistics(Equals, Punctuated), + Cells(Equals, Punctuated), + Variables( + Equals, + Punctuated<(VarRange<'a>, InParens<(Integer, Comma, Integer)>)>, + ), + Format(Equals, Punctuated), + Count(Equals, Punctuated), + Method(Equals, Method), + BarChart, } #[derive(Debug, pspp_derive::FromTokens)] @@ -182,13 +190,14 @@ mod tests { fn basics() { test( "CROSSTABS r by c /STATISTICS=CHISQ -/CELLS=COUNT EXPECTED RESID SRESID ASRESID. +/CELLS=COUNT EXPECTED RESID SRESID ASRESID +/HIDESMALLCOUNTS COUNT=6. ", ); } #[test] fn integer_mode() { - test("CROSSTABS VARIABLES=X (1,7) Y (1,7) /TABLES=X BY Y."); + test("CROSSTABS VARIABLES=X (1,7) Y (1,7) /TABLES=X BY Y/WRITE=CELLS."); } } diff --git a/rust/pspp/src/command/data_list.rs b/rust/pspp/src/command/data_list.rs new file mode 100644 index 0000000000..b951ed6142 --- /dev/null +++ b/rust/pspp/src/command/data_list.rs @@ -0,0 +1,133 @@ +use flagset::FlagSet; + +use super::{Comma, Command, Equals, Integer, Punctuated, Slash}; +use crate::{ + command::{FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, TokenSlice}, + identifier::Identifier, +}; + +pub(super) fn data_list_command() -> Command { + Command { + allowed_states: FlagSet::full(), + enhanced_only: false, + testing_only: false, + no_abbrev: false, + name: "DATA LIST", + run: Box::new(|context| { + let input = context.lexer; + match ::from_tokens(input) { + Ok(Parsed { + value, + rest: _, + diagnostics, + }) => { + println!("\n{value:#?}"); + //println!("rest: {rest:?}"); + println!("warnings: {diagnostics:?}"); + //println!("{:?}", DescriptivesSubcommand::from_tokens(subcommand.0)); + } + Err(error) => { + println!("{error:?}"); + } + } + }), + } +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct DataList<'a>(Vec>); + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum Setting<'a> { + File(Equals, File<'a>), + Encoding(&'a String), + Fixed, + Free(Option>>>), + List(Option>>>), + Records(Equals, Integer), + Skip(Equals, Integer), + Table, + NoTable, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +enum Delimiter<'a> { + #[pspp(default)] // XXX this allows `STRING "string"` + String(&'a String), + Tab, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector, add_lifetime)] +enum File<'a> { + Name(&'a String), + Handle(&'a Identifier), +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Record<'a> { + slash: Slash, + record: Option, + variables: Vec>, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Variable<'a> { + names: Vec<&'a Identifier>, + location: Location<'a>, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector, add_lifetime)] +enum Location<'a> { + Columns( + Integer, + Option, + Option)>>, + ), + Fortran(InParens, Format<'a>)>>), + Asterisk, +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(add_lifetime)] +struct Format<'a>(&'a Identifier); + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use encoding_rs::UTF_8; + + use crate::{ + engine::Engine, + lex::lexer::{Source, SourceFile}, + }; + + fn test(syntax: &str) { + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents(syntax.to_string(), Some("test.sps".to_string()), UTF_8), + ))); + } + + #[test] + fn basics() { + test( + "CROSSTABS r by c /STATISTICS=CHISQ +/CELLS=COUNT EXPECTED RESID SRESID ASRESID +/HIDESMALLCOUNTS COUNT=6. +", + ); + } + + #[test] + fn integer_mode() { + test("CROSSTABS VARIABLES=X (1,7) Y (1,7) /TABLES=X BY Y/WRITE=CELLS."); + } +} diff --git a/rust/pspp/src/command/descriptives.rs b/rust/pspp/src/command/descriptives.rs index 7e57f3de07..20ab7587ff 100644 --- a/rust/pspp/src/command/descriptives.rs +++ b/rust/pspp/src/command/descriptives.rs @@ -1,9 +1,9 @@ use flagset::FlagSet; -use super::{Command, Punctuated, Subcommand}; +use super::{Comma, Command, Equals, Punctuated, Subcommand}; use crate::command::{ - parse_token, FromTokens, Identifier, InParens, MismatchToError, ParseError, ParseResult, - Parsed, Punct, Token, TokenSlice, VarRange, + FromTokens, Identifier, InParens, MismatchToError, ParseError, ParseResult, Parsed, Punct, + Token, TokenSlice, VarRange, }; pub(super) fn descriptives_command() -> Command { @@ -45,15 +45,15 @@ struct Descriptives<'a> { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime, required_equals)] +#[pspp(add_lifetime)] enum DescriptivesSubcommand<'a> { #[pspp(default)] - Variables(Punctuated>), - Missing(Vec), + Variables(Option, Punctuated>), + Missing(Equals, Vec), Save, - Statistics(Vec), - Sort(Sort), - Format(Vec), + Statistics(Equals, Vec), + Sort(Equals, Sort), + Format(Equals, Vec), } #[derive(Debug, pspp_derive::FromTokens)] @@ -83,7 +83,7 @@ struct DescriptivesVars<'a> { #[derive(Debug, pspp_derive::FromTokens)] struct Sort { key: SortKey, - direction: Option, + direction: Option<(Comma, Direction, Comma)>, } #[derive(Debug, pspp_derive::FromTokens)] @@ -103,9 +103,9 @@ enum SortKey { #[derive(Debug, pspp_derive::FromTokens)] enum Direction { - #[pspp(syntax = "(A)")] + #[pspp(syntax = "A")] Ascending, - #[pspp(syntax = "(D)")] + #[pspp(syntax = "D")] Descending, } diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs index adf915fa02..7b8ac31ce5 100644 --- a/rust/pspp/src/command/mod.rs +++ b/rust/pspp/src/command/mod.rs @@ -6,6 +6,7 @@ use std::{ }; use crosstabs::crosstabs_command; +use data_list::data_list_command; use descriptives::descriptives_command; use flagset::{flags, FlagSet}; use pspp_derive::FromTokens; @@ -22,6 +23,7 @@ use crate::{ }; pub mod crosstabs; +pub mod data_list; pub mod descriptives; flags! { @@ -86,6 +88,11 @@ impl<'a, T> Parsed<'a, T> { pub fn into_tuple(self) -> (T, TokenSlice<'a>, Diagnostics) { (self.value, self.rest, self.diagnostics) } + pub fn take_diagnostics(self, d: &mut Diagnostics) -> (T, TokenSlice<'a>) { + let (value, rest, mut diagnostics) = self.into_tuple(); + d.0.append(&mut diagnostics.0); + (value, rest) + } pub fn map(self, f: F) -> Parsed<'a, R> where F: FnOnce(T) -> R, @@ -146,15 +153,66 @@ where } } +impl<'a, A, B> FromTokens<'a> for (A, B) +where + A: FromTokens<'a>, + B: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple(); + let (b, rest, mut diagnostics2) = B::from_tokens(input)?.into_tuple(); + diagnostics.0.append(&mut diagnostics2.0); + Ok(Parsed::new((a, b), rest, diagnostics)) + } +} + +impl<'a, A, B, C> FromTokens<'a> for (A, B, C) +where + A: FromTokens<'a>, + B: FromTokens<'a>, + C: FromTokens<'a>, +{ + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple(); + let (b, input, mut diagnostics2) = B::from_tokens(input)?.into_tuple(); + let (c, rest, mut diagnostics3) = C::from_tokens(input)?.into_tuple(); + diagnostics.0.append(&mut diagnostics2.0); + diagnostics.0.append(&mut diagnostics3.0); + Ok(Parsed::new((a, b, c), rest, diagnostics)) + } +} + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax="/")] +pub struct Slash; + #[derive(Debug)] -pub struct Comma(Token); +pub struct Comma; impl<'a> FromTokens<'a> for Comma { fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> where Self: Sized, { - _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|token| Comma(token))) + _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|_| Comma)) + } +} + +#[derive(Debug)] +pub struct Equals(Token); + +impl<'a> FromTokens<'a> for Equals { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + _parse_token(input, &Token::Punct(Punct::Equals)).map(|p| p.map(|token| Equals(token))) } } @@ -295,6 +353,7 @@ where } input = end; } + println!("{diagnostics:?}"); Ok(Parsed { value: Subcommands(items), rest: input, @@ -428,12 +487,12 @@ fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> } } -fn parse_keyword<'a>(input: TokenSlice<'a>, keyword: &str) -> ParseResult<'a, ()> { - if let Some(rest) = input.skip_if(|token| token.matches_keyword(keyword)) { +fn parse_syntax<'a>(input: TokenSlice<'a>, syntax: &str) -> ParseResult<'a, ()> { + if let Some(rest) = input.skip_syntax(syntax) { Ok(Parsed::ok((), rest)) } else { Err(ParseError::Mismatch( - input.error(format!("expecting {keyword}")).into(), + input.error(format!("expecting {syntax}")).into(), )) } } @@ -517,6 +576,21 @@ fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { } } +fn parse_string<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a String> { + let mut iter = input.iter(); + if let Some(LexToken { + token: Token::String(s), + .. + }) = iter.next() + { + Ok(Parsed::ok(s, iter.remainder())) + } else { + Err(ParseError::Mismatch( + input.error("Syntax error expecting identifier.").into(), + )) + } +} + impl<'a> FromTokens<'a> for &'a Identifier { fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> where @@ -526,6 +600,15 @@ impl<'a> FromTokens<'a> for &'a Identifier { } } +impl<'a> FromTokens<'a> for &'a String { + fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + where + Self: Sized, + { + parse_string(input) + } +} + fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { src.split(|token| token.token == Token::Punct(Punct::Slash)) .filter(|slice| !slice.is_empty()) @@ -537,6 +620,7 @@ fn commands() -> &'static [Command] { vec![ descriptives_command(), crosstabs_command(), + data_list_command(), Command { allowed_states: FlagSet::full(), enhanced_only: false, diff --git a/rust/pspp/src/lex/token.rs b/rust/pspp/src/lex/token.rs index 1f94f3be7b..b7119d9cae 100644 --- a/rust/pspp/src/lex/token.rs +++ b/rust/pspp/src/lex/token.rs @@ -55,6 +55,13 @@ impl Token { _ => None, } } + + pub fn as_id(&self) -> Option<&Identifier> { + match self { + Self::Id(id) => Some(id), + _ => None, + } + } } fn is_printable(c: char) -> bool {