From 374e5044290f6190367690f2c374c40cfd3c854f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 12 Dec 2024 19:58:23 -0800 Subject: [PATCH] parsing improvements --- rust/Cargo.lock | 7 + rust/pspp-derive/src/lib.rs | 23 +-- rust/pspp/Cargo.toml | 1 + rust/pspp/src/command/crosstabs.rs | 19 +- rust/pspp/src/command/ctables.rs | 162 +++++++++++++----- rust/pspp/src/command/data_list.rs | 55 +++--- rust/pspp/src/command/descriptives.rs | 21 +-- rust/pspp/src/command/mod.rs | 238 +++++++++++++++----------- rust/pspp/src/engine.rs | 3 +- rust/pspp/src/lex/lexer.rs | 108 ++++++------ 10 files changed, 366 insertions(+), 271 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a2ff7b0098..0254522591 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -295,6 +295,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "encoding_rs" version = "0.8.34" @@ -858,6 +864,7 @@ dependencies = [ "chrono", "clap", "diff", + "either", "encoding_rs", "enum-map", "finl_unicode", diff --git a/rust/pspp-derive/src/lib.rs b/rust/pspp-derive/src/lib.rs index 14e56ad4f0..afc1f5665f 100644 --- a/rust/pspp-derive/src/lib.rs +++ b/rust/pspp-derive/src/lib.rs @@ -30,6 +30,7 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { let struct_attrs = StructAttrs::parse(&ast.attrs)?; let mut body = TokenStream2::new(); let name = &ast.ident; + let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl(); if struct_attrs.selector { let mut variants = Vec::new(); let mut default = None; @@ -79,7 +80,7 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { construct_fields(&variant.fields, quote! { #name::#ident }, false, None); let fnname = format_ident!("construct{index}"); body.extend(quote! { - fn #fnname<'a>(input: TokenSlice<'a>) -> ParseResult<'a, #name> { #construction } + fn #fnname #impl_generics(input: &TokenSlice) -> ParseResult<#name #ty_generics> #where_clause { let input = input.clone(); #construction } if let Ok(p) = #fnname(input) { return Ok(p); } @@ -88,10 +89,9 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result { body.extend(quote! { Err(ParseError::Mismatch(input.error("Syntax error.").into())) }); } - let lifetime = struct_attrs.lifetime(); let output = quote! { - impl<'a> FromTokens<'a> for #name #lifetime { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> { + impl #impl_generics FromTokens for #name #ty_generics #where_clause { + fn from_tokens(input: &TokenSlice) -> ParseResult { #body } } @@ -119,7 +119,7 @@ fn construct_fields( for (index, _field) in fields.iter().enumerate() { let varname = format_ident!("field{index}"); construction - .extend(quote! { let (#varname, input) = FromTokens::from_tokens(input) #convert ?.take_diagnostics(&mut diagnostics); }); + .extend(quote! { let (#varname, input) = FromTokens::from_tokens(&input) #convert ?.take_diagnostics(&mut diagnostics); }); } match fields { Fields::Named(named) => { @@ -158,10 +158,10 @@ fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result FromTokens<'a> for #name #lifetime { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> { + impl #impl_generics FromTokens for #name #ty_generics #where_clause { + fn from_tokens(input: &TokenSlice) -> ParseResult { #construction } } @@ -201,7 +201,6 @@ impl FieldAttrs { } struct StructAttrs { - add_lifetime: bool, syntax: Option, selector: bool, } @@ -209,7 +208,6 @@ struct StructAttrs { impl Default for StructAttrs { fn default() -> Self { Self { - add_lifetime: false, syntax: None, selector: true, } @@ -217,9 +215,6 @@ impl Default for StructAttrs { } impl StructAttrs { - fn lifetime(&self) -> Option { - self.add_lifetime.then(|| quote! { <'a> }) - } fn parse(attributes: &[Attribute]) -> Result { //println!("{:?}", &attributes); let mut field_attrs = Self::default(); @@ -232,8 +227,6 @@ impl StructAttrs { meta.input.parse::()?; let syntax = meta.input.parse::()?; field_attrs.syntax = Some(syntax); - } else if meta.path.is_ident("add_lifetime") { - field_attrs.add_lifetime = true; } else if meta.path.is_ident("no_selector") { field_attrs.selector = false; } else { diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 0a32a66cd3..7004bc070c 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -29,6 +29,7 @@ chardetng = "0.1.17" enum-map = "2.7.3" flagset = "0.4.6" pspp-derive = { version = "0.1.0", path = "../pspp-derive" } +either = "1.13.0" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/command/crosstabs.rs b/rust/pspp/src/command/crosstabs.rs index 14ce0b8d7c..7644169f2b 100644 --- a/rust/pspp/src/command/crosstabs.rs +++ b/rust/pspp/src/command/crosstabs.rs @@ -14,8 +14,8 @@ pub(super) fn crosstabs_command() -> Command { no_abbrev: false, name: "CROSSTABS", run: Box::new(|context| { - let input = context.lexer; - match ::from_tokens(input) { + let input = context.lexer.clone(); + match ::from_tokens(&input) { Ok(Parsed { value, rest: _, @@ -35,18 +35,16 @@ pub(super) fn crosstabs_command() -> Command { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Crosstabs<'a>(Subcommands>); +struct Crosstabs(Subcommands); #[derive(Debug, pspp_derive::FromTokens)] #[pspp(syntax = "COUNT")] struct CountKw; #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum CrosstabsSubcommand<'a> { +enum CrosstabsSubcommand { #[pspp(default)] - Tables(Option, Punctuated, By>), + Tables(Option, Punctuated), Missing(Equals, Missing), Write(Option<(Equals, Write)>), HideSmallCounts(CountKw, Equals, Integer), @@ -55,7 +53,7 @@ enum CrosstabsSubcommand<'a> { Cells(Equals, Punctuated), Variables( Equals, - Punctuated<(VarRange<'a>, InParens<(Integer, Comma, Integer)>)>, + Punctuated<(VarRange, InParens<(Integer, Comma, Integer)>)>, ), Format(Equals, Punctuated), Count(Equals, Punctuated), @@ -64,9 +62,8 @@ enum CrosstabsSubcommand<'a> { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct BoundedVars<'a> { - vars: VarRange<'a>, +struct BoundedVars { + vars: VarRange, bounds: InParens, } diff --git a/rust/pspp/src/command/ctables.rs b/rust/pspp/src/command/ctables.rs index 55da82bbd8..328bac9976 100644 --- a/rust/pspp/src/command/ctables.rs +++ b/rust/pspp/src/command/ctables.rs @@ -1,7 +1,9 @@ +use either::Either; use flagset::FlagSet; use super::{ - And, By, Command, Equals, Gt, InSquares, Number, Plus, Punctuated, Seq1, Subcommands, VarList + And, Asterisk, By, Command, Dash, Equals, Exp, Gt, InSquares, Integer, Number, Plus, + Punctuated, Seq0, Seq1, Slash, Subcommands, VarList, }; use crate::{ command::{FromTokens, InParens, MismatchToError, ParseError, ParseResult, Parsed, TokenSlice}, @@ -15,10 +17,10 @@ pub(super) fn ctables_command() -> Command { enhanced_only: false, testing_only: false, no_abbrev: false, - name: "CROSSTABS", + name: "CTABLES", run: Box::new(|context| { - let input = context.lexer; - match ::from_tokens(input) { + let input = context.lexer.clone(); + match ::from_tokens(&input) { Ok(Parsed { value, rest: _, @@ -38,35 +40,31 @@ pub(super) fn ctables_command() -> Command { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct CTables<'a>(Subcommands>); +struct CTables(Subcommands); #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum CTablesSubcommand<'a> { - Table(Table<'a>), - Format(Seq1>), - VLabels(Seq1>), +enum CTablesSubcommand { + Table(Table), + Format(Seq1), + VLabels(Seq1), SMissing(SMissing), - PCompute(And, &'a Identifier, Equals, keyword::Expr, InParens), + PCompute(And, Identifier, Equals, keyword::Expr, InParens), } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Table<'a> { - rows: Option>, - columns: Option<(By, Option>)>, - layers: Option<(By, Option>)>, +struct Table { + rows: Option, + columns: Option<(By, Option)>, + layers: Option<(By, Option)>, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum Axis<'a> { - Variable(&'a Identifier, Option>), - Nest(Box>, Gt, Box>), - Stack(Box>, Plus, Box>), - Parens(InParens>>), - Annotate(InSquares>>), +enum Axis { + Variable(Identifier, Option>), + Nest(Box, Gt, Box), + Stack(Box, Plus, Box), + Parens(InParens>), + Annotate(InSquares>), } #[derive(Debug, pspp_derive::FromTokens)] @@ -76,22 +74,20 @@ enum Measurement { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Annotation<'a> { - function: &'a Identifier, +struct Annotation { + function: Identifier, percentile: Option, - label: Option<&'a String>, + label: Option, format: Option, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum Format<'a> { +enum Format { MinColWidth(Equals, Width), MaxColWidth(Equals, Width), Units(Equals, Unit), - Empty(Equals, Empty<'a>), - Missing(Equals, &'a String), + Empty(Equals, Empty), + Missing(Equals, String), } #[derive(Debug, pspp_derive::FromTokens)] @@ -102,11 +98,10 @@ enum Width { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(no_selector, add_lifetime)] -enum Empty<'a> { +enum Empty { Zero(keyword::Zero), Blank(keyword::Blank), - Value(&'a String), + Value(String), } #[derive(Debug, pspp_derive::FromTokens)] @@ -117,9 +112,8 @@ enum Unit { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum VLabels<'a> { - Variables(Equals, VarList<'a>), +enum VLabels { + Variables(Equals, VarList), Display(Display), } @@ -138,13 +132,40 @@ enum SMissing { Listwise, } +#[derive(Debug, pspp_derive::FromTokens)] +struct Expression(MulExpression, Seq0<(Either, Expression)>); + +#[derive(Debug, pspp_derive::FromTokens)] +struct MulExpression(PowExpression, Seq0<(Either, PowExpression)>); + +#[derive(Debug, pspp_derive::FromTokens)] +struct PowExpression(Terminal, Seq0<(Exp, PowExpression)>); + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(no_selector)] +enum Terminal { + Category(InSquares), + Missing(keyword::Missing), + OtherNm(keyword::OtherNm), + Subtotal(keyword::Subtotal, Option>), + Total(keyword::Total), + Number(Number), + Parens(InParens>), +} + +#[derive(Debug, pspp_derive::FromTokens)] +struct Category { + min: Value, + max: Option<(keyword::Thru, Value)>, +} + #[derive(Debug, pspp_derive::FromTokens)] #[pspp(no_selector)] -enum Expression { - //Category(InSquares>), - Missing, - OtherNm, - +enum Value { + Lo(keyword::Lo), + Hi(keyword::Hi), + Number(Number), + String(String), } mod keyword { @@ -165,4 +186,59 @@ mod keyword { #[derive(Debug, pspp_derive::FromTokens)] #[pspp(syntax = "blank")] pub struct Blank; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "thru")] + pub struct Thru; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "hi")] + pub struct Hi; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "lo")] + pub struct Lo; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "missing")] + pub struct Missing; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "othernm")] + pub struct OtherNm; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "subtotal")] + pub struct Subtotal; + + #[derive(Debug, pspp_derive::FromTokens)] + #[pspp(syntax = "total")] + pub struct Total; +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use encoding_rs::UTF_8; + + use crate::{ + engine::Engine, + lex::lexer::{Source, SourceFile}, + }; + + fn test(syntax: &str) { + let mut engine = Engine::new(); + engine.run(Source::new_default(&Arc::new( + SourceFile::for_file_contents(syntax.to_string(), Some("test.sps".to_string()), UTF_8), + ))); + } + + #[test] + fn basics() { + test( + "ctables /pcompute &all_drivers =expr([1 thru 2]) + /pcompute &all_drivers =expr(1).", + ); + } } diff --git a/rust/pspp/src/command/data_list.rs b/rust/pspp/src/command/data_list.rs index 5cdf1fb11b..fb4a1896ee 100644 --- a/rust/pspp/src/command/data_list.rs +++ b/rust/pspp/src/command/data_list.rs @@ -1,3 +1,4 @@ +use either::Either; use flagset::FlagSet; use super::{Comma, Command, Equals, Integer, Punctuated, Seq0, Seq1, Slash}; @@ -14,8 +15,7 @@ pub(super) fn data_list_command() -> Command { no_abbrev: false, name: "DATA LIST", run: Box::new(|context| { - let input = context.lexer; - match ::from_tokens(input) { + match ::from_tokens(&context.lexer) { Ok(Parsed { value, rest: _, @@ -35,69 +35,56 @@ pub(super) fn data_list_command() -> Command { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct DataList<'a>(Seq1>, Seq1>); +struct DataList(Seq1, Seq1); #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum Setting<'a> { - File(Equals, File<'a>), - Encoding(Equals, &'a String), +enum Setting { + File(Equals, Either), + Encoding(Equals, String), Fixed, - Free(Option>>>), - List(Option>>>), + Free(Option>>), + List(Option>>), Records(Equals, Integer), Skip(Equals, Integer), Table, NoTable, - End(Equals, &'a Identifier), + End(Equals, Identifier), } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum Delimiter<'a> { +enum Delimiter { #[pspp(default)] // XXX this allows `STRING "string"` - String(&'a String), + String(String), Tab, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(no_selector, add_lifetime)] -enum File<'a> { - Name(&'a String), - Handle(&'a Identifier), -} - -#[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Record<'a> { +struct Record { slash: Slash, record: Option, - variables: Seq0>, + variables: Seq0, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Variable<'a> { - names: Seq1<&'a Identifier>, - location: Location<'a>, +struct Variable { + names: Seq1, + location: Location, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(no_selector, add_lifetime)] -enum Location<'a> { +#[pspp(no_selector)] +enum Location { Columns( Integer, Option, - Option)>>, + Option)>>, ), - Fortran(InParens, Format<'a>)>>), + Fortran(InParens, Format)>>), Asterisk, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Format<'a>(&'a Identifier); +struct Format(Identifier); #[cfg(test)] mod tests { diff --git a/rust/pspp/src/command/descriptives.rs b/rust/pspp/src/command/descriptives.rs index 604b02be41..791de70054 100644 --- a/rust/pspp/src/command/descriptives.rs +++ b/rust/pspp/src/command/descriptives.rs @@ -14,9 +14,9 @@ pub(super) fn descriptives_command() -> Command { no_abbrev: false, name: "DESCRIPTIVES", run: Box::new(|context| { - let mut input = context.lexer; + let mut input = context.lexer.clone(); while !input.is_empty() { - match >::from_tokens(input) { + match >::from_tokens(&input) { Ok(Parsed { value: subcommand, rest, @@ -39,16 +39,14 @@ pub(super) fn descriptives_command() -> Command { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct Descriptives<'a> { - subcommands: Seq1>>, +struct Descriptives { + subcommands: Seq1>, } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -enum DescriptivesSubcommand<'a> { +enum DescriptivesSubcommand { #[pspp(default)] - Variables(Option, Punctuated>), + Variables(Option, Punctuated), Missing(Equals, Seq1), Save, Statistics(Equals, Seq1), @@ -74,10 +72,9 @@ enum Format { } #[derive(Debug, pspp_derive::FromTokens)] -#[pspp(add_lifetime)] -struct DescriptivesVars<'a> { - vars: VarRange<'a>, - z_name: Option>, +struct DescriptivesVars { + vars: VarRange, + z_name: Option>, } #[derive(Debug, pspp_derive::FromTokens)] diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs index 6fe65d5cf1..a4f84bea9d 100644 --- a/rust/pspp/src/command/mod.rs +++ b/rust/pspp/src/command/mod.rs @@ -9,6 +9,7 @@ use crosstabs::crosstabs_command; use ctables::ctables_command; use data_list::data_list_command; use descriptives::descriptives_command; +use either::Either; use flagset::{flags, FlagSet}; use pspp_derive::FromTokens; @@ -67,36 +68,36 @@ enum ParseError { } #[derive(Debug)] -struct Parsed<'a, T> { +struct Parsed { value: T, - rest: TokenSlice<'a>, + rest: TokenSlice, diagnostics: Diagnostics, } -impl<'a, T> Parsed<'a, T> { - pub fn new(value: T, rest: TokenSlice<'a>, warnings: Diagnostics) -> Self { +impl Parsed { + pub fn new(value: T, rest: TokenSlice, warnings: Diagnostics) -> Self { Self { value, rest: rest, diagnostics: warnings, } } - pub fn ok(value: T, rest: TokenSlice<'a>) -> Self { + pub fn ok(value: T, rest: TokenSlice) -> Self { Self { value, rest: rest, diagnostics: Diagnostics::default(), } } - pub fn into_tuple(self) -> (T, TokenSlice<'a>, Diagnostics) { + pub fn into_tuple(self) -> (T, TokenSlice, Diagnostics) { (self.value, self.rest, self.diagnostics) } - pub fn take_diagnostics(self, d: &mut Diagnostics) -> (T, TokenSlice<'a>) { + pub fn take_diagnostics(self, d: &mut Diagnostics) -> (T, TokenSlice) { let (value, rest, mut diagnostics) = self.into_tuple(); d.0.append(&mut diagnostics.0); (value, rest) } - pub fn map(self, f: F) -> Parsed<'a, R> + pub fn map(self, f: F) -> Parsed where F: FnOnce(T) -> R, { @@ -119,13 +120,13 @@ impl<'a, T> Parsed<'a, T> { } } -type ParseResult<'a, T> = Result, ParseError>; +type ParseResult = Result, ParseError>; trait MismatchToError { fn mismatch_to_error(self) -> Self; } -impl<'a, T> MismatchToError for ParseResult<'a, T> { +impl MismatchToError for ParseResult { fn mismatch_to_error(self) -> Self { match self { Err(ParseError::Mismatch(diagnostic)) => Err(ParseError::Error(diagnostic)), @@ -134,57 +135,74 @@ impl<'a, T> MismatchToError for ParseResult<'a, T> { } } -trait FromTokens<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +trait FromTokens { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized; } -impl<'a, T> FromTokens<'a> for Option +impl FromTokens for Option where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { match T::from_tokens(input) { Ok(p) => Ok(p.map(Some)), - Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input)), + Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input.clone())), Err(ParseError::Error(error)) => Err(ParseError::Error(error)), } } } -impl<'a, A, B> FromTokens<'a> for (A, B) +impl FromTokens for Either where - A: FromTokens<'a>, - B: FromTokens<'a>, + L: FromTokens, + R: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult + where + Self: Sized, + { + match L::from_tokens(input) { + Ok(p) => Ok(p.map(Either::Left)), + Err(ParseError::Mismatch(_)) => Ok(R::from_tokens(input)?.map(Either::Right)), + Err(ParseError::Error(error)) => Err(ParseError::Error(error)), + } + } +} + +impl FromTokens for (A, B) +where + A: FromTokens, + B: FromTokens, +{ + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple(); - let (b, rest, mut diagnostics2) = B::from_tokens(input)?.into_tuple(); + let (b, rest, mut diagnostics2) = B::from_tokens(&input)?.into_tuple(); diagnostics.0.append(&mut diagnostics2.0); Ok(Parsed::new((a, b), rest, diagnostics)) } } -impl<'a, A, B, C> FromTokens<'a> for (A, B, C) +impl FromTokens for (A, B, C) where - A: FromTokens<'a>, - B: FromTokens<'a>, - C: FromTokens<'a>, + A: FromTokens, + B: FromTokens, + C: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple(); - let (b, input, mut diagnostics2) = B::from_tokens(input)?.into_tuple(); - let (c, rest, mut diagnostics3) = C::from_tokens(input)?.into_tuple(); + let (b, input, mut diagnostics2) = B::from_tokens(&input)?.into_tuple(); + let (c, rest, mut diagnostics3) = C::from_tokens(&input)?.into_tuple(); diagnostics.0.append(&mut diagnostics2.0); diagnostics.0.append(&mut diagnostics3.0); Ok(Parsed::new((a, b, c), rest, diagnostics)) @@ -198,8 +216,8 @@ pub struct Slash; #[derive(Debug)] pub struct Comma; -impl<'a> FromTokens<'a> for Comma { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for Comma { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -223,6 +241,18 @@ pub struct Gt; #[pspp(syntax = "+")] pub struct Plus; +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "-")] +pub struct Dash; + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "*")] +pub struct Asterisk; + +#[derive(Debug, pspp_derive::FromTokens)] +#[pspp(syntax = "**")] +pub struct Exp; + #[derive(Debug, pspp_derive::FromTokens)] #[pspp(syntax = "BY")] struct By; @@ -254,19 +284,20 @@ where } } -impl<'a, T, P> FromTokens<'a> for Punctuated +impl FromTokens for Punctuated where - T: FromTokens<'a>, - P: FromTokens<'a>, + T: FromTokens, + P: FromTokens, { - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let mut head = Vec::new(); let mut warnings_vec = Vec::new(); + let mut input = input.clone(); let tail = loop { - let t = match T::from_tokens(input) { + let t = match T::from_tokens(&input) { Ok(Parsed { value, rest, @@ -279,7 +310,7 @@ where Err(ParseError::Mismatch(_)) => break None, Err(ParseError::Error(e)) => return Err(ParseError::Error(e)), }; - let p = match P::from_tokens(input) { + let p = match P::from_tokens(&input) { Ok(Parsed { value, rest, @@ -302,11 +333,11 @@ where } } -impl<'a, T> FromTokens<'a> for Box +impl FromTokens for Box where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -332,16 +363,17 @@ where } } -impl<'a, T> FromTokens<'a> for Subcommands +impl FromTokens for Subcommands where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let mut items = Vec::new(); let mut diagnostics = Vec::new(); + let mut input = input.clone(); loop { let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash)); if start.is_empty() { @@ -349,7 +381,7 @@ where } let end = start.skip_to(&Token::Punct(Punct::Slash)); let subcommand = start.subslice(0..start.len() - end.len()); - match T::from_tokens(subcommand) { + match T::from_tokens(&subcommand) { Ok(p) => { let (value, rest, mut d) = p.into_tuple(); items.push(value); @@ -376,18 +408,19 @@ where #[derive(Debug)] pub struct Seq0(Vec); -impl<'a, T> FromTokens<'a> for Seq0 +impl FromTokens for Seq0 where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let mut values_vec = Vec::new(); let mut warnings_vec = Vec::new(); + let mut input = input.clone(); while !input.is_empty() { - match T::from_tokens(input) { + match T::from_tokens(&input) { Ok(Parsed { value, rest, @@ -415,18 +448,19 @@ where #[derive(Debug)] pub struct Seq1(Vec); -impl<'a, T> FromTokens<'a> for Seq1 +impl FromTokens for Seq1 where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let mut values_vec = Vec::new(); let mut warnings_vec = Vec::new(); + let mut input = input.clone(); while !input.is_empty() { - match T::from_tokens(input) { + match T::from_tokens(&input) { Ok(Parsed { value, rest, @@ -455,11 +489,11 @@ where } /* -impl<'a, T> FromTokens<'a> for Vec +impl FromTokens for Vec where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(mut input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(mut input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -488,23 +522,23 @@ where } }*/ -impl<'a> FromTokens<'a> for TokenSlice<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for TokenSlice { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { - Ok(Parsed::ok(input, input.end())) + Ok(Parsed::ok(input.clone(), input.end())) } } #[derive(Debug)] struct Subcommand(pub T); -impl<'a, T> FromTokens<'a> for Subcommand +impl FromTokens for Subcommand where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -516,7 +550,7 @@ where } let end = start.skip_to(&Token::Punct(Punct::Slash)); let subcommand = start.subslice(0..start.len() - end.len()); - let (value, rest, mut warnings) = T::from_tokens(subcommand)?.into_tuple(); + let (value, rest, mut warnings) = T::from_tokens(&subcommand)?.into_tuple(); if !rest.is_empty() { warnings .0 @@ -529,17 +563,17 @@ where #[derive(Debug)] struct InParens(pub T); -impl<'a, T> FromTokens<'a> for InParens +impl FromTokens for InParens where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple(); - let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); - let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RParen))?.into_tuple(); + let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple(); + let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RParen))?.into_tuple(); Ok(Parsed { value: Self(value), rest, @@ -551,17 +585,17 @@ where #[derive(Debug)] struct InSquares(pub T); -impl<'a, T> FromTokens<'a> for InSquares +impl FromTokens for InSquares where - T: FromTokens<'a>, + T: FromTokens, { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LSquare))?.into_tuple(); - let (value, rest, warnings) = T::from_tokens(rest)?.into_tuple(); - let ((), rest, _) = parse_token(rest, &Token::Punct(Punct::RSquare))?.into_tuple(); + let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple(); + let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RSquare))?.into_tuple(); Ok(Parsed { value: Self(value), rest, @@ -570,7 +604,7 @@ where } } -fn parse_token_if<'a, F, R>(input: TokenSlice<'a>, parse: F) -> ParseResult<'a, R> +fn parse_token_if(input: &TokenSlice, parse: F) -> ParseResult where F: Fn(&Token) -> Option, { @@ -582,7 +616,7 @@ where Err(ParseError::Mismatch(Diagnostics::default())) } -fn _parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, Token> { +fn _parse_token(input: &TokenSlice, token: &Token) -> ParseResult { if let Some(rest) = input.skip(token) { Ok(Parsed::ok(input.first().token.clone(), rest)) } else { @@ -592,7 +626,7 @@ fn _parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, Tok } } -fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> { +fn parse_token(input: &TokenSlice, token: &Token) -> ParseResult<()> { if let Some(rest) = input.skip(token) { Ok(Parsed::ok((), rest)) } else { @@ -602,7 +636,7 @@ fn parse_token<'a>(input: TokenSlice<'a>, token: &Token) -> ParseResult<'a, ()> } } -fn parse_syntax<'a>(input: TokenSlice<'a>, syntax: &str) -> ParseResult<'a, ()> { +fn parse_syntax(input: &TokenSlice, syntax: &str) -> ParseResult<()> { if let Some(rest) = input.skip_syntax(syntax) { Ok(Parsed::ok((), rest)) } else { @@ -612,13 +646,13 @@ fn parse_syntax<'a>(input: TokenSlice<'a>, syntax: &str) -> ParseResult<'a, ()> } } -pub type VarList<'a> = Punctuated>; +pub type VarList = Punctuated; #[derive(Debug)] pub struct Number(f64); -impl<'a> FromTokens<'a> for Number { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for Number { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -630,8 +664,8 @@ impl<'a> FromTokens<'a> for Number { #[derive(Debug)] pub struct Integer(i64); -impl<'a> FromTokens<'a> for Integer { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for Integer { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -641,13 +675,13 @@ impl<'a> FromTokens<'a> for Integer { } } -pub enum VarRange<'a> { - Single(&'a Identifier), - Range(&'a Identifier, &'a Identifier), +pub enum VarRange { + Single(Identifier), + Range(Identifier, Identifier), All, } -impl<'a> Debug for VarRange<'a> { +impl Debug for VarRange { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Single(var) => write!(f, "{var:?}"), @@ -657,8 +691,8 @@ impl<'a> Debug for VarRange<'a> { } } -impl<'a> FromTokens<'a> for VarRange<'a> { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for VarRange { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -666,8 +700,8 @@ impl<'a> FromTokens<'a> for VarRange<'a> { Ok(Parsed::ok(Self::All, rest)) } else { let (from, rest, _) = parse_id(input)?.into_tuple(); - if let Ok(Parsed { rest, .. }) = parse_token(rest, &Token::Punct(Punct::To)) { - if let Ok(p) = parse_id(rest) { + if let Ok(Parsed { rest, .. }) = parse_token(&rest, &Token::Punct(Punct::To)) { + if let Ok(p) = parse_id(&rest) { return Ok(p.map(|to| Self::Range(from, to))); } } @@ -676,14 +710,14 @@ impl<'a> FromTokens<'a> for VarRange<'a> { } } -fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { +fn parse_id(input: &TokenSlice) -> ParseResult { let mut iter = input.iter(); if let Some(LexToken { token: Token::Id(id), .. }) = iter.next() { - Ok(Parsed::ok(id, iter.remainder())) + Ok(Parsed::ok(id.clone(), iter.remainder())) } else { Err(ParseError::Mismatch( input.error("Syntax error expecting identifier.").into(), @@ -691,7 +725,7 @@ fn parse_id<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a Identifier> { } } -fn parse_format<'a>(input: TokenSlice<'a>) -> ParseResult<'a, AbstractFormat> { +fn parse_format(input: &TokenSlice) -> ParseResult { let mut iter = input.iter(); if let Some(LexToken { token: Token::Id(id), @@ -707,14 +741,14 @@ fn parse_format<'a>(input: TokenSlice<'a>) -> ParseResult<'a, AbstractFormat> { )) } -fn parse_string<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a String> { +fn parse_string(input: &TokenSlice) -> ParseResult { let mut iter = input.iter(); if let Some(LexToken { token: Token::String(s), .. }) = iter.next() { - Ok(Parsed::ok(s, iter.remainder())) + Ok(Parsed::ok(s.clone(), iter.remainder())) } else { Err(ParseError::Mismatch( input.error("Syntax error expecting identifier.").into(), @@ -722,8 +756,8 @@ fn parse_string<'a>(input: TokenSlice<'a>) -> ParseResult<'a, &'a String> { } } -impl<'a> FromTokens<'a> for &'a Identifier { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl<'a> FromTokens for Identifier { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -731,8 +765,8 @@ impl<'a> FromTokens<'a> for &'a Identifier { } } -impl<'a> FromTokens<'a> for &'a String { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl<'a> FromTokens for String { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -740,8 +774,8 @@ impl<'a> FromTokens<'a> for &'a String { } } -impl<'a> FromTokens<'a> for AbstractFormat { - fn from_tokens(input: TokenSlice<'a>) -> ParseResult<'a, Self> +impl FromTokens for AbstractFormat { + fn from_tokens(input: &TokenSlice) -> ParseResult where Self: Sized, { @@ -749,7 +783,7 @@ impl<'a> FromTokens<'a> for AbstractFormat { } } -fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec> { +fn collect_subcommands(src: TokenSlice) -> Vec { src.split(|token| token.token == Token::Punct(Punct::Slash)) .filter(|slice| !slice.is_empty()) .collect() @@ -894,11 +928,11 @@ pub fn parse_command(lexer: TokenSlice, error: &Box) { pub struct Context<'a> { error: &'a Box, - lexer: TokenSlice<'a>, + lexer: TokenSlice, command_name: Option<&'static str>, } -impl<'a> Context<'a> { +impl Context<'_> { pub fn error(&self, diagnostic: Diagnostic) { (self.error)(diagnostic); } diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs index 28982e4007..7e8c2fcaa5 100644 --- a/rust/pspp/src/engine.rs +++ b/rust/pspp/src/engine.rs @@ -4,6 +4,7 @@ use crate::{ macros::MacroSet, message::Diagnostic, }; +use std::rc::Rc; pub struct Engine; @@ -17,7 +18,7 @@ impl Engine { let error: Box = Box::new(|diagnostic| { println!("{diagnostic}"); }); - parse_command(TokenSlice::new(&tokens), &error); + parse_command(TokenSlice::new(Rc::new(tokens)), &error); } } } diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index 7b125586c7..dc34052dd6 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -8,6 +8,7 @@ use std::{ mem::take, ops::{Range, RangeInclusive}, path::Path, + rc::Rc, sync::Arc, }; @@ -269,13 +270,21 @@ impl Debug for Tokens { } pub struct TokenSliceIter<'a> { - tokens: &'a [LexToken], + slice: &'a TokenSlice, + rest: Range, } impl<'a> TokenSliceIter<'a> { - pub fn remainder(&self) -> TokenSlice<'a> { + pub fn new(slice: &'a TokenSlice) -> Self { + Self { + slice, + rest: slice.range.clone(), + } + } + pub fn remainder(&self) -> TokenSlice { TokenSlice { - tokens: self.tokens, + backing: self.slice.backing.clone(), + range: self.rest.clone(), } } } @@ -284,25 +293,25 @@ impl<'a> Iterator for TokenSliceIter<'a> { type Item = &'a LexToken; fn next(&mut self) -> Option { - let (first, rest) = self.tokens.split_first().unwrap(); - if !rest.is_empty() { - self.tokens = rest; - Some(first) - } else { + if self.rest.is_empty() { None + } else { + self.rest.start += 1; + Some(&self.slice.backing.tokens[self.rest.start - 1]) } } } -#[derive(Copy, Clone)] -pub struct TokenSlice<'a> { - tokens: &'a [LexToken], +#[derive(Clone)] +pub struct TokenSlice { + backing: Rc, + range: Range, } -impl<'a> Debug for TokenSlice<'a> { +impl Debug for TokenSlice { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { write!(f, "TokenSlice {{ ")?; - for (index, token) in self.tokens[..self.tokens.len() - 1].iter().enumerate() { + for (index, token) in self.tokens().iter().enumerate() { if index > 0 { write!(f, ", ")?; } @@ -312,28 +321,21 @@ impl<'a> Debug for TokenSlice<'a> { } } -impl<'a> TokenSlice<'a> { - pub fn new(backing: &'a Tokens) -> Self { - Self { - tokens: backing.tokens.as_slice(), - } +impl TokenSlice { + pub fn new(backing: Rc) -> Self { + let range = 0..backing.tokens.len() - 1; + Self { backing, range } } - pub fn get_token(&self, index: usize) -> Option<&'a Token> { - //self.get(index).map(|token| &token.token) - if index < self.len() { - Some(&self.tokens[index].token) - } else { - None - } + fn tokens(&self) -> &[LexToken] { + &self.backing.tokens[self.range.clone()] + } + pub fn get_token(&self, index: usize) -> Option<&Token> { + self.get(index).map(|token| &token.token) } - pub fn get(&self, index: usize) -> Option<&'a LexToken> { - if index < self.len() { - Some(&self.tokens[index]) - } else { - None - } + pub fn get(&self, index: usize) -> Option<&LexToken> { + self.tokens().get(index) } pub fn error(&self, text: S) -> Diagnostic @@ -353,16 +355,19 @@ impl<'a> TokenSlice<'a> { pub fn subslice(&self, range: Range) -> Self { debug_assert!(range.start <= range.end); debug_assert!(range.end <= self.len()); + let start = self.range.start + range.start; + let end = start + range.len(); Self { - tokens: &self.tokens[range.start..range.end + 1], + backing: self.backing.clone(), + range: start..end, } } pub fn first(&self) -> &LexToken { - self.tokens.first().unwrap() + &self.backing.tokens[self.range.start] } fn last(&self) -> &LexToken { - self.tokens.last().unwrap() + &self.backing.tokens[self.range.end - 1] } pub fn end(&self) -> Self { self.subslice(self.len()..self.len()) @@ -379,17 +384,15 @@ impl<'a> TokenSlice<'a> { } pub fn len(&self) -> usize { - self.tokens.len() - 1 + self.tokens().len() } pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn iter(&self) -> TokenSliceIter<'a> { - TokenSliceIter { - tokens: self.tokens, - } + pub fn iter(&self) -> TokenSliceIter { + TokenSliceIter::new(self) } /// If the tokens contains a macro call, this returns the raw @@ -401,8 +404,8 @@ impl<'a> TokenSlice<'a> { /// Returns `None` if the token range doesn't include a macro call. fn get_macro_call(&self) -> Option<&str> { if self.iter().any(|token| token.macro_rep.is_some()) { - let token0 = &self.tokens[0]; - let token1 = &self.tokens[self.tokens.len() - 1]; + let token0 = self.first(); + let token1 = self.last(); if let Some(file) = self.file() { let start = token0.pos.start; let end = token1.pos.end; @@ -416,7 +419,9 @@ impl<'a> TokenSlice<'a> { fn location(&self) -> Location { if let Some(file) = self.file() { - file.token_location(self.first()..=self.last()) + file.token_location( + &self.backing.tokens[self.range.start]..=&self.backing.tokens[self.range.end], + ) } else { // XXX support non-contiguous locations? let first = self.first(); @@ -462,7 +467,7 @@ impl<'a> TokenSlice<'a> { pub fn skip_syntax(&self, syntax: &str) -> Option { let syntax_scanner = StringScanner::new(syntax, Syntax::Interactive, true); - let mut input = *self; + let mut input = self.clone(); for scan_token in syntax_scanner { let token = match scan_token { ScanToken::Token(token) => token, @@ -521,18 +526,15 @@ impl<'a> TokenSlice<'a> { } } - pub fn split(&'a self, predicate: F) -> impl Iterator + 'a + pub fn split(&self, predicate: F) -> impl Iterator + use<'_, F> where - F: Fn(&LexToken) -> bool + 'a, + F: Fn(&LexToken) -> bool, { - (&self.tokens[..self.len()]) - .split(predicate) - .map(move |slice| { - // SAFETY: `slice` is inside `self.tokens`. - let start_ofs = - unsafe { slice.as_ptr().offset_from(self.tokens.as_ptr()) } as usize; - self.subslice(start_ofs..start_ofs + slice.len()) - }) + self.tokens().split(predicate).map(move |slice| { + // SAFETY: `slice` is inside `self.tokens`. + let start_ofs = unsafe { slice.as_ptr().offset_from(self.tokens().as_ptr()) } as usize; + self.subslice(start_ofs..start_ofs + slice.len()) + }) } } -- 2.30.2