parsing approach works, descriptives can be parsed rust
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 14 Sep 2024 16:55:12 +0000 (09:55 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 14 Sep 2024 16:55:12 +0000 (09:55 -0700)
rust/pspp-derive/src/lib.rs
rust/pspp/src/command.rs
rust/pspp/src/engine.rs
rust/pspp/src/lex/lexer.rs
rust/pspp/src/lex/segment/test.rs
rust/pspp/src/message.rs

index 58bcbd2c60f09f895720a30c879c8da4ff7ce4f8..6fd7a8678d4afc077eeb3f0e6f812b79aea11656 100644 (file)
@@ -1,7 +1,7 @@
 use proc_macro::TokenStream;
 use proc_macro2::{Literal, TokenStream as TokenStream2};
-use quote::{format_ident, quote};
-use syn::{spanned::Spanned, Attribute, DataEnum, DataStruct, DeriveInput, Error, Token};
+use quote::{quote, ToTokens};
+use syn::{spanned::Spanned, Attribute, DataEnum, DataStruct, DeriveInput, Error, Fields, Token};
 
 #[proc_macro_derive(FromTokens, attributes(pspp))]
 pub fn from_tokens_derive(input: TokenStream) -> TokenStream {
@@ -27,30 +27,37 @@ fn parse_derive_input(ast: DeriveInput) -> Result<TokenStream2, Error> {
 }
 
 fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result<TokenStream2, Error> {
+    let struct_attrs = StructAttrs::parse(&ast.attrs)?;
     let mut body = TokenStream2::new();
     for (index, variant) in e.variants.iter().enumerate() {
-        let field_attrs = parse_attributes(&variant.attrs)?;
+        let field_attrs = FieldAttrs::parse(&variant.attrs)?;
         if index > 0 {
             body.extend(quote! { else }.into_iter());
         }
         let ident = &variant.ident;
-        if let Some(syntax) = field_attrs.syntax {
-            body.extend(quote! { if cursor.match_syntax(#syntax) { Self::#ident }});
+        let ident_string = ident.to_string();
+        let match_expr = if let Some(syntax) = field_attrs.syntax {
+            quote! { cursor.match_syntax(#syntax) }
+        } else if ident_string.eq_ignore_ascii_case("all") {
+            quote! { cursor.match_(&Token::Punct(Punct::All))}
         } else {
-            let ident_string = ident.to_string();
-            if ident_string.eq_ignore_ascii_case("all") {
-                body.extend(quote! { if cursor.match_(&Token::Punct(Punct::All)) { Self::#ident }});
-            } else {
-                body.extend(quote! { if cursor.match_keyword(#ident_string) { Self::#ident }});
-            };
-        }
+            quote! { cursor.match_keyword(#ident_string)}
+        };
+        let construction = construct_fields(&variant.fields);
+        let check_equals = if struct_attrs.required_equals && !variant.fields.is_empty() {
+            quote! { cursor.force(&Token::Punct(Punct::Equals))?; }
+        } else {
+            quote!{}
+        };
+        body.extend(quote! { if #match_expr { #check_equals Self::#ident #construction } });
     }
     body.extend(quote! { else { return Err(cursor.error("Syntax error.")); } });
 
     let name = &ast.ident;
+    let lifetime = struct_attrs.lifetime();
     let output = quote! {
-        impl FromTokens for #name {
-            fn from_tokens<'a>(cursor: &Cursor<'a>) -> Result<Self, Diagnostic> {
+        impl<'a> FromTokens<'a> for #name #lifetime {
+            fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic> {
                 Ok(#body)
             }
         }
@@ -59,25 +66,33 @@ fn derive_enum(ast: &DeriveInput, e: &DataEnum) -> Result<TokenStream2, Error> {
     Ok(output)
 }
 
-fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result<TokenStream2, Error> {
+fn construct_fields(fields: &Fields) -> impl ToTokens {
     let mut construction = TokenStream2::new();
-    let mut body = TokenStream2::new();
-    for (index, field) in s.fields.iter().enumerate() {
-        let varname = format_ident!("field{}", index);
-        let ty = &field.ty;
-        body.extend(quote! { let #varname = <#ty>::from_tokens(cursor)?; });
-        let name = field.ident.as_ref().unwrap();
-        if index > 0 {
-            construction.extend(quote! { , });
+    for field in fields {
+        let value = quote! { FromTokens::from_tokens(cursor)? };
+        if let Some(name) = field.ident.as_ref() {
+            construction.extend(quote! { #name: #value, });
+        } else {
+            construction.extend(quote! { #value, });
         }
-        construction.extend(quote! { #name: #varname });
     }
 
+    match fields {
+        Fields::Named(_) => quote! { { #construction } },
+        Fields::Unnamed(_) => quote! { ( #construction ) },
+        Fields::Unit => quote! {},
+    }
+}
+
+fn derive_struct(ast: &DeriveInput, s: &DataStruct) -> Result<TokenStream2, Error> {
+    let struct_attrs = StructAttrs::parse(&ast.attrs)?;
     let name = &ast.ident;
+    let construction = construct_fields(&s.fields);
+    let lifetime = struct_attrs.lifetime();
     let output = quote! {
-        impl FromTokens for #name {
-            fn from_tokens<'a>(cursor: &Cursor<'a>) -> Result<Self, Diagnostic> {
-                #body Ok(#name { #construction })
+        impl<'a> FromTokens<'a> for #name #lifetime {
+            fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic> {
+                Ok(#name #construction)
             }
         }
     };
@@ -90,23 +105,58 @@ struct FieldAttrs {
     syntax: Option<Literal>,
 }
 
-fn parse_attributes(attributes: &[Attribute]) -> Result<FieldAttrs, Error> {
-    println!("{:?}", &attributes);
-    let mut field_attrs = FieldAttrs::default();
-    for attr in attributes {
-        if !attr.path().is_ident("pspp") {
-            continue;
+impl FieldAttrs {
+    fn parse(attributes: &[Attribute]) -> Result<Self, Error> {
+        let mut field_attrs = Self::default();
+        for attr in attributes {
+            if !attr.path().is_ident("pspp") {
+                continue;
+            }
+            attr.parse_nested_meta(|meta| {
+                if meta.path.is_ident("syntax") {
+                    //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+                    meta.input.parse::<Token![=]>()?;
+                    let syntax = meta.input.parse::<Literal>()?;
+                    //println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
+                    field_attrs.syntax = Some(syntax);
+                } else {
+                    return Err(Error::new(meta.path.span(), "Unknown attribute"));
+                }
+                Ok(())
+            })?;
         }
-        attr.parse_nested_meta(|meta| {
-            if meta.path.is_ident("syntax") {
-                println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
-                meta.input.parse::<Token![=]>()?;
-                let syntax = meta.input.parse::<Literal>()?;
-                println!("{}:{} {:?} {:?}", file!(), line!(), meta.path, meta.input);
-                field_attrs.syntax = Some(syntax);
+        Ok(field_attrs)
+    }
+}
+
+#[derive(Default)]
+struct StructAttrs {
+    add_lifetime: bool,
+    required_equals: bool,
+}
+
+impl StructAttrs {
+    fn lifetime(&self) -> Option<TokenStream2> {
+        self.add_lifetime.then(|| quote! { <'a> })
+    }
+    fn parse(attributes: &[Attribute]) -> Result<Self, Error> {
+        //println!("{:?}", &attributes);
+        let mut field_attrs = Self::default();
+        for attr in attributes {
+            if !attr.path().is_ident("pspp") {
+                continue;
             }
-            Ok(())
-        })?;
+            attr.parse_nested_meta(|meta| {
+                if meta.path.is_ident("add_lifetime") {
+                    field_attrs.add_lifetime = true;
+                } else if meta.path.is_ident("required_equals") {
+                    field_attrs.required_equals = true;
+                } else {
+                    return Err(Error::new(meta.path.span(), "Unknown attribute"));
+                }
+                Ok(())
+            })?;
+        }
+        Ok(field_attrs)
     }
-    Ok(field_attrs)
 }
index 2f43b1e93a389182510bd2c6eecf11d2cf3347be..6a20e85473018f35927b180c21eea1451aecb3ee 100644 (file)
@@ -5,6 +5,7 @@ use flagset::{flags, FlagSet};
 use pspp_derive::FromTokens;
 
 use crate::{
+    identifier::Identifier,
     integer::ToInteger,
     lex::{
         command_name::CommandMatcher,
@@ -97,16 +98,6 @@ trait ParsedCommand {
  */
 
 /*
-struct Descriptives<'a> {
-    subcommands: Vec<DescriptivesSubcommand<'a>>
-}
-enum DescriptivesSubcommand<'a> {
-    Variables(TokenSlice<'a>),
-    Missing(TokenSlice<'a>),
-    Save,
-    Statistics(Vec<Statistic>),
-    Sort(Sort),
-}
 
 
 
@@ -115,34 +106,210 @@ struct Subcommand {
     name: &str,
 }*/
 
-trait FromTokens {
-    fn from_tokens<'a>(tokens: &Cursor<'a>) -> Result<Self, Diagnostic>
+trait FromTokens<'a> {
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
     where
         Self: Sized;
 }
 
-impl<T> FromTokens for Option<T>
+impl<'a, T> FromTokens<'a> for Option<T>
 where
-    T: FromTokens,
+    T: FromTokens<'a>,
 {
-    fn from_tokens<'a>(tokens: &Cursor<'a>) -> Result<Self, Diagnostic>
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
     where
         Self: Sized,
     {
-        match T::from_tokens(tokens) {
+        let saved_position = cursor.get_pos();
+        match T::from_tokens(cursor) {
             Ok(result) => Ok(Some(result)),
-            Err(_error) => Ok(None)
+            Err(_error) => {
+                cursor.set_pos(saved_position);
+                Ok(None)
+            }
+        }
+    }
+}
+
+impl<'a, T> FromTokens<'a> for Vec<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        let mut vector = Vec::new();
+        while let Ok(result) = cursor.with_pos(|| T::from_tokens(cursor)) {
+            vector.push(result);
+        }
+        Ok(vector)
+    }
+}
+
+impl<'a> FromTokens<'a> for TokenSlice<'a> {
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        Ok(cursor.take_remainder())
+    }
+}
+
+/*
+impl < 'a > FromTokens < 'a > for DescriptivesSubcommand<'a>
+{
+    fn from_tokens(cursor : & Cursor < 'a >) -> Result < Self, Diagnostic >
+    {
+        Ok(if cursor.match_keyword("Variables") { Self :: Variables(cursor.take_remainder()) } else if
+        cursor.match_keyword("Missing") { Self :: Missing(cursor.take_remainder()) } else if
+        cursor.match_keyword("Save") { Self :: Save } else if
+        cursor.match_keyword("Statistics") { Self :: Statistics(Vec::new()) } else if
+        cursor.match_keyword("Sort") { Self :: Sort(Sort::from_tokens(cursor)?) } else
+        { return Err(cursor.error("Syntax error.")); })
+    }
+}*/
+
+#[derive(FromTokens, Debug)]
+#[pspp(add_lifetime)]
+struct Descriptives<'a> {
+    subcommands: Vec<Subcommand<DescriptivesSubcommand<'a>>>,
+}
+
+#[derive(Debug)]
+struct Subcommand<T>(pub T);
+
+impl<'a, T> FromTokens<'a> for Subcommand<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        cursor.advance_until(|token| token != &Token::Punct(Punct::Slash));
+        if cursor.at_end() {
+            return Err(cursor.error("Syntax error at end of input."));
+        }
+        let start = cursor.get_pos();
+        cursor.advance_until(|token| token == &Token::Punct(Punct::Slash));
+        let subcommand = cursor.subcursor(start..cursor.get_pos());
+        match T::from_tokens(&subcommand) {
+            Ok(result) => Ok(Self(result)),
+            Err(error) => {
+                cursor.set_pos(start);
+                Err(error)
+            }
         }
     }
 }
 
-#[derive(FromTokens)]
+/*
+#[derive(FromTokens, Debug)]
+#[pspp(add_lifetime, required_equals)]*/
+#[derive(Debug)]
+enum DescriptivesSubcommand<'a> {
+    Variables(Vec<DescriptivesVarRange<'a>>),
+    Missing(Vec<Missing>),
+    Save,
+    Statistics(Vec<Statistic>),
+    Sort(Sort),
+}
+
+impl<'a> FromTokens<'a> for DescriptivesSubcommand<'a> {
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic> {
+            println!("{}:{}", file!(), line!());
+        Ok(if cursor.match_keyword("Variables") {
+            println!("{}:{}", file!(), line!());
+            cursor.force(&Token::Punct(Punct::Equals))?;
+            println!("{}:{}", file!(), line!());
+            Self::Variables(FromTokens::from_tokens(cursor)?)
+        } else if cursor.match_keyword("Missing") {
+            cursor.force(&Token::Punct(Punct::Equals))?;
+            Self::Missing(FromTokens::from_tokens(cursor)?)
+        } else if cursor.match_keyword("Save") {
+            Self::Save
+        } else if cursor.match_keyword("Statistics") {
+            cursor.force(&Token::Punct(Punct::Equals))?;
+            Self::Statistics(FromTokens::from_tokens(cursor)?)
+        } else if cursor.match_keyword("Sort") {
+            cursor.force(&Token::Punct(Punct::Equals))?;
+            Self::Sort(FromTokens::from_tokens(cursor)?)
+        } else {
+            return Err(cursor.error("Syntax error."));
+        })
+    }
+}
+
+#[derive(FromTokens, Debug)]
+enum Missing {
+    Variable,
+    Listwise,
+    Include,
+}
+
+#[derive(FromTokens, Debug)]
+#[pspp(add_lifetime)]
+struct DescriptivesVarRange<'a> {
+    vars: VarRange<'a>,
+    z_name: Option<InParens<&'a Identifier>>,
+}
+
+#[derive(Debug)]
+struct InParens<T>(pub T);
+
+impl<'a, T> FromTokens<'a> for InParens<T>
+where
+    T: FromTokens<'a>,
+{
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        cursor.force(&Token::Punct(Punct::LParen))?;
+        let inner = T::from_tokens(cursor)?;
+        cursor.force(&Token::Punct(Punct::RParen))?;
+        Ok(Self(inner))
+    }
+}
+
+#[derive(Debug)]
+struct VarRange<'a> {
+    from: &'a Identifier,
+    to: Option<&'a Identifier>,
+}
+
+impl<'a> FromTokens<'a> for VarRange<'a> {
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        Ok(Self {
+            from: cursor.force_id()?,
+            to: cursor
+                .match_(&Token::Punct(Punct::To))
+                .then(|| cursor.force_id())
+                .transpose()?,
+        })
+    }
+}
+
+impl<'a> FromTokens<'a> for &'a Identifier {
+    fn from_tokens(cursor: &Cursor<'a>) -> Result<Self, Diagnostic>
+    where
+        Self: Sized,
+    {
+        cursor.force_id()
+    }
+}
+
+#[derive(FromTokens, Debug)]
 struct Sort {
     key: SortKey,
     direction: Option<Direction>,
 }
 
-#[derive(FromTokens)]
+#[derive(FromTokens, Debug)]
 enum SortKey {
     Mean,
     SMean,
@@ -157,7 +324,7 @@ enum SortKey {
     Name,
 }
 
-#[derive(FromTokens)]
+#[derive(FromTokens, Debug)]
 enum Direction {
     #[pspp(syntax = "(A)")]
     Ascending,
@@ -165,7 +332,7 @@ enum Direction {
     Descending,
 }
 
-#[derive(FromTokens)]
+#[derive(FromTokens, Debug)]
 enum Statistic {
     Default,
     Mean,
@@ -180,41 +347,6 @@ enum Statistic {
     Kurtosis,
     All,
 }
-/*
-impl FromTokens for Statistic {
-    fn from_tokens<'a>(tokens: &TokenSlice<'a>) -> Result<Self, Diagnostic> {
-        let cursor = Cursor::new(&tokens);
-        let statistic = if cursor.match_keyword("default") {
-            Self::Default
-        } else if cursor.match_keyword("stddev") {
-            Self::Stddev
-        } else if cursor.match_keyword("variance") {
-            Self::Variance
-        } else if cursor.match_keyword("mean") {
-            Self::Mean
-        } else if cursor.match_keyword("semean") {
-            Self::SeMean
-        } else if cursor.match_keyword("sum") {
-            Self::Sum
-        } else if cursor.match_keyword("min") {
-            Self::Min
-        } else if cursor.match_keyword("max") {
-            Self::Max
-        } else if cursor.match_keyword("skewness") {
-            Self::Skewness
-        } else if cursor.match_keyword("kurtosis") {
-            Self::Kurtosis
-        } else if cursor.match_(&Token::Punct(Punct::All)) {
-            Self::All
-        } else {
-            return Err(tokens.error("Syntax error expecting statistic."))
-        };
-        // XXX warn for trailing tokens
-        Ok(statistic)
-    }
-}*/
-
-struct Foo;
 
 fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec<TokenSlice<'a>> {
     src.split(|token| token.token == Token::Punct(Punct::Slash))
@@ -225,6 +357,25 @@ fn collect_subcommands<'a>(src: &'a TokenSlice) -> Vec<TokenSlice<'a>> {
 fn commands() -> &'static [Command] {
     fn new_commands() -> Vec<Command> {
         vec![
+            Command {
+                allowed_states: FlagSet::full(),
+                enhanced_only: false,
+                testing_only: false,
+                no_abbrev: false,
+                name: "DESCRIPTIVES",
+                run: Box::new(|context| {
+                    let cursor = context.lexer.cursor();
+                    println!("{}:{}", file!(), line!());
+                    while let Ok(subcommand) = <Subcommand<TokenSlice>>::from_tokens(&cursor) {
+                        println!("{subcommand:?}");
+                        println!(
+                            "{:?}",
+                            DescriptivesSubcommand::from_tokens(&subcommand.0.cursor())
+                        );
+                    }
+                    println!("{}:{}", file!(), line!());
+                }),
+            },
             Command {
                 allowed_states: FlagSet::full(),
                 enhanced_only: false,
@@ -353,9 +504,9 @@ pub fn end_of_command(context: &Context, range: RangeFrom<usize>) -> Result<Succ
 fn parse_in_state(mut lexer: TokenSlice, error: &Box<dyn Fn(Diagnostic)>, _state: State) {
     println!("{}:{}", file!(), line!());
     match lexer.get_token(0) {
-        None | Some(Token::End) => (),
-        _ => {
-            if let Ok((command, n_tokens)) = parse_command_name(&mut lexer, error) {
+        None | Some(Token::End) => println!("{}:{}", file!(), line!()),
+        _ => match parse_command_name(&mut lexer, error) {
+            Ok((command, n_tokens)) => {
                 let mut context = Context {
                     error,
                     lexer: lexer.subslice(n_tokens..lexer.len()),
@@ -363,7 +514,8 @@ fn parse_in_state(mut lexer: TokenSlice, error: &Box<dyn Fn(Diagnostic)>, _state
                 };
                 (command.run)(&mut context);
             }
-        }
+            Err(error) => println!("{error:?}"),
+        },
     }
 }
 
index 6ee46233031e5989db289051a7128c5aa702eca6..6e9248dbb3e3b555995c5d85d3905e1ffb1965e1 100644 (file)
@@ -1,8 +1,6 @@
 use crate::{
     command::parse_command,
-    lex::{
-        lexer::{TokenSlice, Source},
-    },
+    lex::lexer::{Source, TokenSlice},
     macros::MacroSet,
     message::Diagnostic,
 };
@@ -15,17 +13,23 @@ impl Engine {
     }
     pub fn run(&mut self, mut source: Source) {
         let macros = MacroSet::new();
+        println!("{}:{}", file!(), line!());
         while let Some(tokens) = source.read_command(&macros) {
+            println!("{}:{}", file!(), line!());
             let error: Box<dyn Fn(Diagnostic)> = Box::new(|diagnostic| {
                 println!("{diagnostic}");
             });
+            println!("{}:{}", file!(), line!());
             parse_command(TokenSlice::new(&tokens), &error);
+            println!("{}:{}", file!(), line!());
         }
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
     use encoding_rs::UTF_8;
 
     use crate::lex::lexer::{Source, SourceFile};
@@ -35,10 +39,25 @@ mod tests {
     #[test]
     fn test_echo() {
         let mut engine = Engine::new();
-        engine.run(Source::new_default(&SourceFile::for_file_contents(
-            "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(),
-            Some("test.sps".to_string()),
-            UTF_8,
+        engine.run(Source::new_default(&Arc::new(
+            SourceFile::for_file_contents(
+                "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(),
+                Some("test.sps".to_string()),
+                UTF_8,
+            ),
+        )));
+    }
+
+    #[test]
+    fn test_descriptives() {
+        println!("{}:{}", file!(), line!());
+        let mut engine = Engine::new();
+        engine.run(Source::new_default(&Arc::new(
+            SourceFile::for_file_contents(
+                "DESCRIPTIVES VARIABLES=a (za) b to c/MISSING=x y z/MISSING=VARIABLE INCLUDE/STATISTICS=DEFAULT/SAVE/SORT=SKEWNESS(A)\n".to_string(),
+                Some("test.sps".to_string()),
+                UTF_8,
+            ),
         )));
     }
 }
index 51721cbe0ca8d7d11b3490606c9239989fa7c05c..dd514acc05cc5aebd4a80be6b9e498b113dc30b0 100644 (file)
@@ -9,7 +9,6 @@ use std::{
     mem::take,
     ops::{Range, RangeInclusive},
     path::Path,
-    ptr,
     sync::Arc,
 };
 
@@ -19,6 +18,8 @@ use thiserror::Error as ThisError;
 use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
 use crate::{
+    identifier::Identifier,
+    lex::scan::StringScanner,
     macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser},
     message::{Category, Diagnostic, Location, Point, Severity},
     settings::Settings,
@@ -186,11 +187,11 @@ fn ellipsize(s: &str) -> Cow<str> {
 }
 
 /// A token in a [`Source`].
-pub struct LexToken<'a> {
+pub struct LexToken {
     /// The regular token.
     pub token: Token,
 
-    pub file: &'a SourceFile,
+    pub file: Arc<SourceFile>,
 
     /// For a token obtained through the lexer in an ordinary way, this is the
     /// location of the token in the [`Source`]'s buffer.
@@ -207,19 +208,25 @@ pub struct LexToken<'a> {
     macro_rep: Option<MacroRepresentation>,
 }
 
+impl Debug for LexToken {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        self.token.fmt(f)
+    }
+}
+
 #[allow(dead_code)]
 struct LexError {
     error: ScanError,
     pos: Range<usize>,
 }
 
-impl Borrow<Token> for LexToken<'_> {
+impl Borrow<Token> for LexToken {
     fn borrow(&self) -> &Token {
         &self.token
     }
 }
 
-impl LexToken<'_> {
+impl LexToken {
     fn representation(&self) -> &str {
         &self.file.buffer[self.pos.clone()]
     }
@@ -330,18 +337,18 @@ lis|.\0",
     }
 }
 */
-pub struct Tokens<'a> {
-    tokens: Vec<LexToken<'a>>,
+pub struct Tokens {
+    tokens: Vec<LexToken>,
 }
 
-impl<'a> Tokens<'a> {
-    fn new(tokens: Vec<LexToken<'a>>) -> Self {
+impl Tokens {
+    fn new(tokens: Vec<LexToken>) -> Self {
         assert!(matches!(tokens.last().unwrap().token, Token::End));
         Self { tokens }
     }
 }
 
-impl Debug for Tokens<'_> {
+impl Debug for Tokens {
     fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
         write!(f, "Tokens {{ ")?;
         for (index, token) in self.tokens.iter().enumerate() {
@@ -354,8 +361,22 @@ impl Debug for Tokens<'_> {
     }
 }
 
+#[derive(Clone)]
 pub struct TokenSlice<'a> {
-    tokens: &'a [LexToken<'a>],
+    tokens: &'a [LexToken],
+}
+
+impl<'a> Debug for TokenSlice<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "TokenSlice {{ ")?;
+        for (index, token) in self.tokens[..self.tokens.len() - 1].iter().enumerate() {
+            if index > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{:?}", token.representation())?;
+        }
+        write!(f, " }}")
+    }
 }
 
 impl<'a> TokenSlice<'a> {
@@ -369,11 +390,16 @@ impl<'a> TokenSlice<'a> {
         Cursor::new(self)
     }
 
-    pub fn get_token(&self, index: usize) -> Option<&Token> {
-        self.get(index).map(|token| &token.token)
+    pub fn get_token(&self, index: usize) -> Option<&'a Token> {
+        //self.get(index).map(|token| &token.token)
+        if index < self.len() {
+            Some(&self.tokens[index].token)
+        } else {
+            None
+        }
     }
 
-    pub fn get(&self, index: usize) -> Option<&LexToken> {
+    pub fn get(&self, index: usize) -> Option<&'a LexToken> {
         if index < self.len() {
             Some(&self.tokens[index])
         } else {
@@ -403,11 +429,11 @@ impl<'a> TokenSlice<'a> {
         self.tokens.last().unwrap()
     }
 
-    fn file(&self) -> Option<&SourceFile> {
+    fn file(&self) -> Option<&Arc<SourceFile>> {
         let first = self.first();
         let last = self.last();
-        if ptr::eq(first.file, last.file) {
-            Some(first.file)
+        if Arc::ptr_eq(&first.file, &last.file) {
+            Some(&first.file)
         } else {
             None
         }
@@ -520,7 +546,7 @@ impl<'a> TokenSlice<'a> {
 
 #[derive(Clone)]
 pub struct Cursor<'a> {
-    slice: &'a TokenSlice<'a>,
+    slice: TokenSlice<'a>,
 
     /// This allows [Self::force_string] etc. to advance while returning the
     /// token without cloning it.
@@ -528,27 +554,75 @@ pub struct Cursor<'a> {
 }
 
 impl<'a> Cursor<'a> {
-    pub fn new(slice: &'a TokenSlice<'a>) -> Self {
+    pub fn new(slice: &TokenSlice<'a>) -> Self {
         Self {
-            slice,
+            slice: slice.clone(),
             pos: Cell::new(0),
         }
     }
 
+    pub fn get_pos(&self) -> usize {
+        self.pos.get()
+    }
+
+    pub fn set_pos(&self, position: usize) {
+        self.pos.set(position);
+    }
+
+    pub fn with_pos<F, T, E>(&self, f: F) -> Result<T, E>
+    where
+        F: FnOnce() -> Result<T, E>,
+    {
+        let position = self.get_pos();
+        let retval = f();
+        if retval.is_err() {
+            self.set_pos(position);
+        }
+        retval
+    }
+
+    pub fn subcursor(&self, range: Range<usize>) -> Cursor<'a> {
+        Self::new(&self.slice.subslice(range))
+    }
+
     pub fn remainder(&self) -> TokenSlice<'a> {
         self.slice.subslice(self.pos.get()..self.slice.len())
     }
 
+    pub fn take_remainder(&self) -> TokenSlice<'a> {
+        let remainder = self.remainder();
+        self.pos.set(self.slice.len());
+        remainder
+    }
+
     pub fn force_string(&self) -> Result<&str, Diagnostic> {
-        let pos = self.pos.get();
-        if let Some(Token::String(s)) = self.slice.get_token(pos) {
-            self.pos.set(pos + 1);
+        if let Some(Token::String(s)) = self.token() {
+            self.next();
             Ok(s.as_str())
         } else {
             Err(self.error("Syntax error expecting string."))
         }
     }
 
+    pub fn force_id(&self) -> Result<&'a Identifier, Diagnostic> {
+        if let Some(Token::Id(id)) = self.token() {
+            self.next();
+            Ok(id)
+        } else {
+            Err(self.error("Syntax error expecting identifier."))
+        }
+    }
+
+    pub fn force(&self, token: &Token) -> Result<(), Diagnostic> {
+        match self.token() {
+            Some(t) if t == token => {
+                self.next();
+                Ok(())
+            }
+            _ => Err(self.error(format!("Syntax error expecting {token}."))),
+        }
+    }
+
     pub fn error<S>(&self, text: S) -> Diagnostic
     where
         S: ToString,
@@ -590,15 +664,20 @@ impl<'a> Cursor<'a> {
     }
 
     pub fn match_keyword(&self, keyword: &str) -> bool {
-        self.token()
-            .map_or(false, |token| token.matches_keyword(keyword))
+        if let Some(token) = self.token() {
+            if token.matches_keyword(keyword) {
+                self.next();
+                return true;
+            }
+        }
+        false
     }
 
     pub fn at_end(&self) -> bool {
         self.pos.get() >= self.slice.len()
     }
 
-    pub fn token(&self) -> Option<&Token> {
+    pub fn token(&self) -> Option<&'a Token> {
         self.slice.get_token(self.pos.get())
     }
 
@@ -614,26 +693,38 @@ impl<'a> Cursor<'a> {
         }
     }
 
-    pub fn match_syntax(&self, _syntax: &str) -> bool {
-        todo!()
+    pub fn match_syntax(&self, syntax: &str) -> bool {
+        self.with_pos(|| {
+            let syntax_scanner = StringScanner::new(syntax, Syntax::Interactive, true);
+            for scan_token in syntax_scanner {
+                let ScanToken::Token(token) = scan_token else {
+                    unreachable!()
+                };
+                if !self.match_(&token) {
+                    return Err(());
+                };
+            }
+            Ok(())
+        })
+        .is_ok()
     }
 }
 
-pub struct Source<'a> {
-    file: &'a SourceFile,
+pub struct Source {
+    file: Arc<SourceFile>,
     segmenter: Segmenter,
     seg_pos: usize,
-    lookahead: VecDeque<LexToken<'a>>,
+    lookahead: VecDeque<LexToken>,
 }
 
-impl<'a> Source<'a> {
-    pub fn new_default(file: &'a SourceFile) -> Self {
+impl Source {
+    pub fn new_default(file: &Arc<SourceFile>) -> Self {
         Self::new(file, Syntax::default())
     }
 
-    pub fn new(file: &'a SourceFile, syntax: Syntax) -> Self {
+    pub fn new(file: &Arc<SourceFile>, syntax: Syntax) -> Self {
         Self {
-            file,
+            file: file.clone(),
             segmenter: Segmenter::new(syntax, false),
             seg_pos: 0,
             lookahead: VecDeque::new(),
@@ -642,6 +733,10 @@ impl<'a> Source<'a> {
 
     pub fn read_command(&mut self, macros: &MacroSet) -> Option<Tokens> {
         loop {
+            println!("{}:{}", file!(), line!());
+            for token in self.lookahead.iter() {
+                println!("{}", &token.token);
+            }
             if let Some(end) = self
                 .lookahead
                 .iter()
@@ -649,8 +744,18 @@ impl<'a> Source<'a> {
             {
                 return Some(Tokens::new(self.lookahead.drain(..=end).collect()));
             }
+            println!("{}:{}", file!(), line!());
             if !self.read_lookahead(macros) {
-                return None;
+                if self.lookahead.is_empty() {
+                    return None;
+                }
+                let len = self.file.buffer.len();
+                self.lookahead.push_back(LexToken {
+                    token: Token::End,
+                    file: self.file.clone(),
+                    pos: len..len,
+                    macro_rep: None,
+                });
             }
         }
     }
@@ -671,7 +776,7 @@ impl<'a> Source<'a> {
                 Some(ScanToken::Token(token)) => {
                     let end = token == Token::End;
                     pp.push_back(LexToken {
-                        file: self.file,
+                        file: self.file.clone(),
                         token,
                         pos,
                         macro_rep: None,
@@ -707,7 +812,7 @@ impl<'a> Source<'a> {
                     let first = &merge[0];
                     let last = &merge[n - 1];
                     self.lookahead.push_back(LexToken {
-                        file: self.file,
+                        file: self.file.clone(),
                         token,
                         pos: first.pos.start..last.pos.end,
                         macro_rep: match (&first.macro_rep, &last.macro_rep) {
@@ -730,8 +835,8 @@ impl<'a> Source<'a> {
     fn expand_macro(
         &self,
         macros: &MacroSet,
-        src: &mut VecDeque<LexToken<'a>>,
-        dst: &mut VecDeque<LexToken<'a>>,
+        src: &mut VecDeque<LexToken>,
+        dst: &mut VecDeque<LexToken>,
     ) {
         // Now pass tokens one-by-one to the macro expander.
         let Some(mut parser) = Parser::new(macros, &src[0].token) else {
@@ -781,7 +886,7 @@ impl<'a> Source<'a> {
         let macro_rep = Arc::new(macro_rep);
         for (index, token) in expansion.into_iter().enumerate() {
             let lt = LexToken {
-                file: self.file,
+                file: self.file.clone(),
                 token: token.token,
                 pos: c0.pos.start..c1.pos.end,
                 macro_rep: Some(MacroRepresentation {
@@ -797,6 +902,8 @@ impl<'a> Source<'a> {
 
 #[cfg(test)]
 mod new_lexer_tests {
+    use std::sync::Arc;
+
     use encoding_rs::UTF_8;
 
     use crate::macros::MacroSet;
@@ -812,11 +919,11 @@ END DATA.
 
 CROSSTABS VARIABLES X (1,7) Y (1,7) /TABLES X BY Y.
 "#;
-        let file = SourceFile::for_file_contents(
+        let file = Arc::new(SourceFile::for_file_contents(
             String::from(code),
             Some(String::from("crosstabs.sps")),
             UTF_8,
-        );
+        ));
         let mut source = Source::new_default(&file);
         while let Some(tokens) = source.read_command(&MacroSet::new()) {
             println!("{tokens:?}");
index 3e01ee3ee86673457d0532656f1dd3588881afbe..79f92fed3637c0fd586cbe6b5d527b0bc62fbe96 100644 (file)
@@ -122,6 +122,17 @@ fn print_segmentation(mut input: &str) {
     }
 }
 
+#[test]
+fn test_end_command() {
+    check_segmentation(
+        r#"DATA LIST/ X 1
+"#,
+        Syntax::Auto,
+        &[],
+        &[],
+    );
+}
+
 #[test]
 fn test_identifiers() {
     check_segmentation(
index ffc09c142c1b8b55f8820ec6ec7ceba7a2f43404..aa7aef640a18a2905385966351d39314461d5289 100644 (file)
@@ -1,6 +1,6 @@
 use std::{
     cmp::{max, min},
-    fmt::{Display, Formatter, Result as FmtResult},
+    fmt::{Debug, Display, Formatter, Result as FmtResult},
     ops::Range,
     sync::Arc,
 };
@@ -250,3 +250,9 @@ impl Display for Diagnostic {
         Ok(())
     }
 }
+
+impl Debug for Diagnostic {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        Display::fmt(&self, f)
+    }
+}