work
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 24 Aug 2024 15:32:09 +0000 (08:32 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 24 Aug 2024 15:32:09 +0000 (08:32 -0700)
rust/pspp/src/command.rs
rust/pspp/src/engine.rs
rust/pspp/src/lex/lexer.rs
rust/pspp/src/lex/scan/mod.rs
rust/pspp/src/lex/scan/test.rs
rust/pspp/src/lex/segment/mod.rs
rust/pspp/src/lex/segment/test.rs

index a40aae5f2a1aa64eb1b1176feed2836143e30bef..ce6bde716c00f530f61527a8f9322694819e7289 100644 (file)
@@ -86,7 +86,7 @@ struct Variable {
 fn parse_descriptives(context: &mut Context) {
     let subcommands = collect_subcommands(context);
     for subcommand in subcommands {
-        
+
     }
 }
 
@@ -108,16 +108,16 @@ fn commands() -> &'static [Command] {
                     println!("hi");
                 }),
             },
-/*
-            Command {
-                allowed_states: State::Data.into(),
-                enhanced_only: false,
-                testing_only: false,
-                no_abbrev: false,
-                name: "DESCRIPTIVES",
-                run: Box::new(parse_descriptives),
-            },
-*/
+            /*
+                        Command {
+                            allowed_states: State::Data.into(),
+                            enhanced_only: false,
+                            testing_only: false,
+                            no_abbrev: false,
+                            name: "DESCRIPTIVES",
+                            run: Box::new(parse_descriptives),
+                        },
+            */
         ]
     }
 
@@ -215,28 +215,37 @@ pub fn end_of_command(context: &Context) -> Result<Success, ()> {
 }
 
 fn parse_in_state(lexer: &mut Lexer, error: &Box<dyn Fn(Diagnostic)>, _state: State) {
+    println!("{}:{}", file!(), line!());
     match lexer.token() {
         Token::End | Token::EndCommand => (),
         _ => {
+            println!("{}:{}", file!(), line!());
             if let Ok((command, n_tokens)) = parse_command_name(lexer, error) {
                 for _ in 0..n_tokens {
                     lexer.get();
                 }
+                println!("{}:{}", file!(), line!());
                 let mut context = Context {
                     error,
                     lexer,
                     command_name: Some(command.name),
                 };
+                println!("{}:{}", file!(), line!());
                 (command.run)(&mut context);
+                println!("{}:{}", file!(), line!());
                 end_of_command(&context);
+                println!("{}:{}", file!(), line!());
             }
-            lexer.interactive_reset();
+            println!("{}:{}", file!(), line!());
             lexer.discard_rest_of_command();
+            println!("{}:{}", file!(), line!());
         }
     }
+    println!("{}:{}", file!(), line!());
     while let Token::EndCommand = lexer.token() {
         lexer.get();
     }
+    println!("{}:{}", file!(), line!());
 }
 
 pub fn parse(lexer: &mut Lexer, error: &Box<dyn Fn(Diagnostic)>) {
index f48c1948c1c60f3d9230fefb38c6fdd79fd216b6..2ccb1a68320ea852bc2b96bbe3e3a6a64089c0eb 100644 (file)
@@ -18,6 +18,7 @@ impl Engine {
         self.lexer.append(source);
         self.lexer.get();
         while self.lexer.token() != &Token::End {
+            println!("{}:{}", file!(), line!());
             let error: Box<dyn Fn(Diagnostic)> = Box::new(|diagnostic| {
                 println!("{diagnostic}");
             });
index f592da1144184332bf9002a8ecf48c0a6297ddd8..928115af616524927a1ba4f74c3e3b8ce6ab8d91 100644 (file)
@@ -16,9 +16,9 @@ use thiserror::Error as ThisError;
 use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
 use crate::{
+    lex::scan::Incomplete,
     macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser},
     message::{Category, Diagnostic, Location, Point, Severity},
-    prompt::PromptStyle,
     settings::Settings,
 };
 
@@ -74,16 +74,6 @@ pub struct Source {
     /// `None` if this reader is not associated with a file.
     file_name: Option<Arc<String>>,
 
-    /// True if we've reached EOF already.
-    eof: bool,
-
-    /// Read some input from the source. If successful, returns the input that
-    /// was read.  At end of file or on error, returns an empty string.
-    ///
-    /// `prompt` provides a hint to interactive readers as to what kind of
-    /// syntax is being read right now.
-    read: Box<dyn Fn(PromptStyle) -> String>,
-
     /// Source file contents.
     buffer: String,
 
@@ -104,6 +94,8 @@ pub struct Source {
     ///  end up in `parse`.
     merge: VecDeque<LexToken>,
 
+    eof: bool,
+
     /// Tokens available to the client for parsing.
     parse: Vec<LexToken>,
 
@@ -121,14 +113,13 @@ impl Default for Source {
             error_handling: ErrorHandling::default(),
             encoding: UTF_8,
             file_name: None,
-            eof: false,
-            read: Box::new(|_| String::new()),
             buffer: String::new(),
             journal_line: 0,
             seg_pos: 0,
             lines: vec![0],
             pp: VecDeque::new(),
             merge: VecDeque::new(),
+            eof: false,
             parse: Vec::new(),
             parse_ofs: 0,
             segmenter: Segmenter::new(Mode::default(), false),
@@ -188,45 +179,13 @@ impl Source {
         }
     }
 
-    pub fn for_function(
-        read: Box<dyn Fn(PromptStyle) -> String>,
-        file_name: Option<String>,
-        encoding: &'static Encoding,
-        syntax: Mode,
-        error_handling: ErrorHandling,
-    ) -> Self {
-        Self {
-            read,
-            file_name: file_name.map(Arc::new),
-            encoding,
-            segmenter: Segmenter::new(syntax, false),
-            error_handling,
-            ..Self::default()
-        }
-    }
-
-    fn read(&mut self) {
-        loop {
-            let prompt = self.segmenter.prompt();
-            let s = (self.read)(prompt);
-            if s.is_empty() {
-                self.eof = true;
-                return;
-            }
-            self.buffer.push_str(&s);
-            if self.buffer[self.seg_pos..].contains('\n') {
-                return;
-            }
-        }
-    }
-    fn try_get_pp(&mut self, context: &Context) -> bool {
-        let (seg_len, seg_type) = loop {
-            if let Ok(result) = self.segmenter.push(&self.buffer[self.seg_pos..], self.eof) {
-                break result;
-            }
-
-            debug_assert!(!self.eof);
-            self.read();
+    fn get_pp(&mut self, context: &Context) -> bool {
+        let Some((seg_len, seg_type)) = self
+            .segmenter
+            .push(&self.buffer[self.seg_pos..], true)
+            .unwrap()
+        else {
+            return false;
         };
 
         let pos = self.seg_pos..self.seg_pos + seg_len;
@@ -267,16 +226,12 @@ impl Source {
         let pos = pos.start..pos.end;
         match scan_token {
             None => false,
-            Some(ScanToken::Token(Token::End)) => {
-                self.pp.push_back(LexToken {
-                    token: Token::EndCommand,
-                    pos,
-                    macro_rep: None,
-                });
-                self.eof = true;
-                true
-            }
             Some(ScanToken::Token(token)) => {
+                let token = if let Token::End = token {
+                    Token::EndCommand
+                } else {
+                    token
+                };
                 self.pp.push_back(LexToken {
                     token,
                     pos,
@@ -298,19 +253,15 @@ impl Source {
         }
     }
 
-    fn get_pp(&mut self, context: &Context) -> bool {
-        while !self.eof {
-            if self.try_get_pp(context) {
-                return true;
-            }
-        }
-        false
-    }
-
-    fn try_get_merge(&mut self, context: &Context) -> bool {
+    fn get_merge(&mut self, context: &Context) -> bool {
+        println!("{}:{}", file!(), line!());
         if self.pp.is_empty() && !self.get_pp(context) {
             return false;
         }
+        println!("{}:{} pp.len()={}", file!(), line!(), self.pp.len());
+        for pp in &self.pp {
+            println!("{:?}", &pp.token);
+        }
 
         if !Settings::global().macros.expand {
             self.merge.append(&mut self.pp);
@@ -385,35 +336,23 @@ impl Source {
         retval
     }
 
-    /// Attempts to obtain at least one new token into `self.merge`.
-    ///
-    /// Returns true if successful, false on failure.  In the latter case, this source
-    /// exhausted and 'self.eof' is now true.
-    fn get_merge(&mut self, context: &Context) -> bool {
-        while !self.eof {
-            if self.try_get_merge(context) {
-                return true;
-            }
-        }
-        false
-    }
-
-    fn get_parse__(&mut self, context: &Context) -> bool {
-        for i in 0.. {
-            if self.merge.len() <= i && !self.get_merge(context) {
-                // We always get a `Token::EndCommand` at the end of an input
-                // file and the merger should return `Some(...)` for that token.
-                debug_assert_eq!(self.merge.len(), 0);
-                return false;
-            }
-
-            match ScanToken::merge(&self.merge) {
-                None => (),
-                Some(MergeResult::Copy) => {
+    fn get_parse(&mut self, context: &Context) -> bool {
+        loop {
+            match ScanToken::merge(|index| {
+                if let Some(token) = self.merge.get(index) {
+                    Ok(Some(&token.token))
+                } else if self.eof {
+                    Ok(None)
+                } else {
+                    Err(Incomplete)
+                }
+            }) {
+                Ok(Some(MergeResult::Copy)) => {
+                    println!("{}:{}", file!(), line!());
                     self.parse.push(self.merge.pop_front().unwrap());
                     return true;
                 }
-                Some(MergeResult::Expand { n, token }) => {
+                Ok(Some(MergeResult::Expand { n, token })) => {
                     let first = &self.merge[0];
                     let last = &self.merge[n - 1];
                     self.parse.push(LexToken {
@@ -430,16 +369,18 @@ impl Source {
                         },
                     });
                     self.merge.drain(..n);
+                    println!("{}:{}", file!(), line!());
                     return true;
                 }
+                Ok(None) => return false,
+                Err(Incomplete) => {
+                    debug_assert!(!self.eof);
+                    if !self.get_merge(context) {
+                        self.eof = true;
+                    }
+                }
             }
         }
-        unreachable!();
-    }
-
-    fn get_parse(&mut self, context: &Context) -> bool {
-        // XXX deal with accumulated messages
-        self.get_parse__(context)
     }
 
     fn offset_to_point(&self, offset: usize) -> Point {
@@ -549,7 +490,7 @@ impl Source {
     }
 
     fn is_empty(&self) -> bool {
-        self.buffer.is_empty() && self.eof
+        self.buffer.is_empty()
     }
 
     fn diagnostic(
@@ -602,23 +543,6 @@ impl Source {
             text: s,
         }
     }
-
-    fn interactive_reset(&mut self) {
-        if self.error_handling == ErrorHandling::Terminal {
-            let Source {
-                error_handling,
-                encoding,
-                read,
-                ..
-            } = mem::take(self);
-            *self = Self {
-                error_handling,
-                encoding,
-                read,
-                ..Source::default()
-            };
-        }
-    }
 }
 
 fn ellipsize(s: &str) -> Cow<str> {
@@ -706,13 +630,20 @@ impl Lexer {
         }
 
         while self.source.parse_ofs == self.source.parse.len() {
+            println!("{}:{}", file!(), line!());
             let context = Context {
                 macros: &self.macros,
                 error: &self.error,
             };
-            if !self.source.get_parse(&context) && !self.pop_stack() {
-                return &Token::End;
+            println!("{}:{}", file!(), line!());
+            if !self.source.get_parse(&context) {
+                println!("{}:{}", file!(), line!());
+                if !self.pop_stack() {
+                    println!("{}:{}", file!(), line!());
+                    return &Token::End;
+                }
             }
+            println!("{}:{}", file!(), line!());
         }
         self.source.token()
     }
@@ -810,18 +741,6 @@ impl Lexer {
         }
     }
 
-    /// If the source that the lexer is currently reading has error mode
-    /// [ErrorHandling::Terminal], discards all buffered input and tokens, so
-    /// that the next token to be read comes directly from whatever is next read
-    /// from the stream.
-    ///
-    /// It makes sense to call this function after encountering an error in a
-    /// command entered on the console, because usually the user would prefer
-    /// not to have cascading errors.
-    pub fn interactive_reset(&mut self) {
-        self.source.interactive_reset()
-    }
-
     /// Advances past any tokens up to [Token::EndCommand] or [Token::End].
     pub fn discard_rest_of_command(&mut self) {
         while !matches!(self.token(), Token::EndCommand | Token::End) {
index 05577a92591377cceb59016266fb7959c85c1e45..c4d081483509968431a2e2198497785a8a8d744e 100644 (file)
@@ -16,7 +16,7 @@ use super::{
     segment::{Mode, Segment, Segmenter},
     token::{Punct, Token},
 };
-use std::{borrow::Borrow, collections::VecDeque};
+use std::collections::VecDeque;
 use thiserror::Error as ThisError;
 
 #[derive(ThisError, Clone, Debug, PartialEq, Eq)]
@@ -89,6 +89,9 @@ pub enum MergeResult {
     },
 }
 
+#[derive(Copy, Clone, Debug)]
+pub struct Incomplete;
+
 impl ScanToken {
     pub fn from_segment(s: &str, segment: Segment) -> Option<Self> {
         match segment {
@@ -236,7 +239,6 @@ impl ScanToken {
             Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
                 Some(Self::Token(Token::EndCommand))
             }
-            Segment::End => Some(Self::Token(Token::End)),
             Segment::ExpectedQuote => Some(Self::Error(ScanError::ExpectedQuote)),
             Segment::ExpectedExponent => {
                 Some(Self::Error(ScanError::ExpectedExponent(String::from(s))))
@@ -267,62 +269,52 @@ impl ScanToken {
     ///     white space, as a negative number.  It's only needed if we want
     ///     intervening comments to be allowed or for part of the negative number
     ///     token to be produced by macro expansion.
-    pub fn merge<T>(tokens: &T) -> Option<MergeResult>
+    pub fn merge<'a, F>(get_token: F) -> Result<Option<MergeResult>, Incomplete>
     where
-        T: Tokens,
+        F: Fn(usize) -> Result<Option<&'a Token>, Incomplete>,
     {
-        match tokens.get(0)? {
-            Token::Punct(Punct::Dash) => match tokens.get(1)? {
-                Token::Number(number) if number.is_sign_positive() => {
+        let Some(token) = get_token(0)? else {
+            return Ok(None);
+        };
+        match token {
+            Token::Punct(Punct::Dash) => match get_token(1)? {
+                Some(Token::Number(number)) if number.is_sign_positive() => {
                     let number = *number;
-                    return Some(MergeResult::Expand {
+                    Ok(Some(MergeResult::Expand {
                         n: 2,
                         token: Token::Number(-number),
-                    });
+                    }))
                 }
-                _ => Some(MergeResult::Copy),
+                _ => Ok(Some(MergeResult::Copy)),
             },
             Token::String(_) => {
                 let mut i = 0;
-                while matches!(tokens.get(i * 2 + 1)?, Token::Punct(Punct::Plus))
-                    && matches!(tokens.get(i * 2 + 2)?, Token::String(_))
+                while matches!(get_token(i * 2 + 1)?, Some(Token::Punct(Punct::Plus)))
+                    && matches!(get_token(i * 2 + 2)?, Some(Token::String(_)))
                 {
                     i += 1;
                 }
                 if i == 0 {
-                    Some(MergeResult::Copy)
+                    Ok(Some(MergeResult::Copy))
                 } else {
                     let mut output = String::new();
                     for i in 0..=i {
-                        let Token::String(s) = tokens.get(i * 2).unwrap() else {
+                        let Token::String(s) = get_token(i * 2).unwrap().unwrap() else {
                             unreachable!()
                         };
                         output.push_str(&s);
                     }
-                    Some(MergeResult::Expand {
+                    Ok(Some(MergeResult::Expand {
                         n: i * 2 + 1,
                         token: Token::String(output),
-                    })
+                    }))
                 }
             }
-            _ => Some(MergeResult::Copy),
+            _ => Ok(Some(MergeResult::Copy)),
         }
     }
 }
 
-pub trait Tokens {
-    fn get(&self, index: usize) -> Option<&Token>;
-}
-
-impl<T> Tokens for VecDeque<T>
-where
-    T: Borrow<Token>,
-{
-    fn get(&self, index: usize) -> Option<&Token> {
-        self.get(index).map(|token| token.borrow())
-    }
-}
-
 pub struct StringSegmenter<'a> {
     input: &'a str,
     segmenter: Segmenter,
@@ -342,10 +334,7 @@ impl<'a> Iterator for StringSegmenter<'a> {
 
     fn next(&mut self) -> Option<Self::Item> {
         loop {
-            let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap();
-            if seg_type == Segment::End {
-                return None;
-            }
+            let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap()?;
             let (s, rest) = self.input.split_at(seg_len);
             self.input = rest;
 
@@ -358,6 +347,7 @@ impl<'a> Iterator for StringSegmenter<'a> {
 
 pub struct StringScanner<'a> {
     input: &'a str,
+    eof: bool,
     segmenter: Segmenter,
     tokens: VecDeque<Token>,
 }
@@ -366,19 +356,28 @@ impl<'a> StringScanner<'a> {
     pub fn new(input: &'a str, mode: Mode, is_snippet: bool) -> Self {
         Self {
             input,
+            eof: false,
             segmenter: Segmenter::new(mode, is_snippet),
             tokens: VecDeque::with_capacity(1),
         }
     }
 
-    fn merge(&mut self) -> Option<ScanToken> {
-        let result = ScanToken::merge(&self.tokens)?;
-        match result {
-            MergeResult::Copy => Some(ScanToken::Token(self.tokens.pop_front().unwrap())),
-            MergeResult::Expand { n, token } => {
+    fn merge(&mut self, eof: bool) -> Result<Option<ScanToken>, Incomplete> {
+        match ScanToken::merge(|index| {
+            if let Some(token) = self.tokens.get(index) {
+                Ok(Some(token))
+            } else if eof {
+                Ok(None)
+            } else {
+                Err(Incomplete)
+            }
+        })? {
+            Some(MergeResult::Copy) => Ok(Some(ScanToken::Token(self.tokens.pop_front().unwrap()))),
+            Some(MergeResult::Expand { n, token }) => {
                 self.tokens.drain(..n);
-                Some(ScanToken::Token(token))
+                Ok(Some(ScanToken::Token(token)))
             }
+            None => Ok(None),
         }
     }
 }
@@ -387,14 +386,18 @@ impl<'a> Iterator for StringScanner<'a> {
     type Item = ScanToken;
 
     fn next(&mut self) -> Option<Self::Item> {
-        if let Some(token) = self.merge() {
-            return Some(token);
-        }
         loop {
-            let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap();
-            if seg_type == Segment::End && self.tokens.is_empty() {
-                return None;
+            if let Ok(Some(token)) = self.merge(self.eof) {
+                return Some(token);
             }
+
+            let Some((seg_len, seg_type)) = self.segmenter.push(self.input, true).unwrap() else {
+                if !self.eof {
+                    self.eof = true;
+                    self.tokens.push_back(Token::End);
+                }
+                return self.merge(true).unwrap();
+            };
             let (s, rest) = self.input.split_at(seg_len);
             self.input = rest;
 
@@ -402,9 +405,6 @@ impl<'a> Iterator for StringScanner<'a> {
                 Some(ScanToken::Error(error)) => return Some(ScanToken::Error(error)),
                 Some(ScanToken::Token(token)) => {
                     self.tokens.push_back(token);
-                    if let Some(token) = self.merge() {
-                        return Some(token);
-                    }
                 }
                 None => (),
             }
index 0ed9be6555fb60e8bc20fe1e100c5c4305c0f802..1e0f1007e2c51900a285cad7473b37b225b3205f 100644 (file)
@@ -1,7 +1,10 @@
-use crate::{identifier::Identifier, lex::{
-    segment::Mode,
-    token::{Punct, Token},
-}};
+use crate::{
+    identifier::Identifier,
+    lex::{
+        segment::Mode,
+        token::{Punct, Token},
+    },
+};
 
 use super::{ScanError, ScanToken, StringScanner};
 
@@ -16,6 +19,7 @@ fn print_token(token: &Token) {
     }
 }
 
+#[track_caller]
 fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) {
     let tokens = StringScanner::new(input, mode, false).collect::<Vec<_>>();
 
@@ -84,6 +88,7 @@ WXYZ. /* unterminated end of line comment
             ScanToken::Token(Token::EndCommand),
             ScanToken::Error(ScanError::UnexpectedChar('�')),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -140,6 +145,7 @@ and. with.
             ScanToken::Token(Token::Id(Identifier::new("and.").unwrap())),
             ScanToken::Token(Token::Punct(Punct::With)),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -202,6 +208,7 @@ fn test_punctuation() {
             ScanToken::Token(Token::Punct(Punct::LCurly)),
             ScanToken::Token(Token::Punct(Punct::RCurly)),
             ScanToken::Token(Token::Punct(Punct::Not)),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -253,6 +260,7 @@ fn test_positive_numbers() {
             ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())),
             ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))),
             ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -308,6 +316,7 @@ fn test_negative_numbers() {
             ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))),
             ScanToken::Token(Token::Number(-1.0)),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -396,6 +405,7 @@ fn test_shbang() {
             ScanToken::Token(Token::Id(Identifier::new("bin").unwrap())),
             ScanToken::Token(Token::Punct(Punct::Slash)),
             ScanToken::Token(Token::Id(Identifier::new("pspp").unwrap())),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -443,6 +453,7 @@ next command.
             ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
             ScanToken::Token(Token::EndCommand),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -482,6 +493,7 @@ second paragraph.
             ScanToken::Token(Token::String(String::from("second paragraph."))),
             ScanToken::Token(Token::EndCommand),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -510,6 +522,7 @@ FILE /*
             ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())),
             ScanToken::Token(Token::String(String::from("not quoted here either"))),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -548,6 +561,7 @@ end data
             ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
             ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -590,6 +604,7 @@ end
             ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
             ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -646,6 +661,7 @@ end repeat
             ScanToken::Token(Token::String(String::from("  inner command"))),
             ScanToken::Token(Token::Id(Identifier::new("end").unwrap())),
             ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
+            ScanToken::Token(Token::End),
         ],
     );
 }
@@ -683,16 +699,20 @@ fourth command.
             ScanToken::Token(Token::Id(Identifier::new("fifth").unwrap())),
             ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
             ScanToken::Token(Token::EndCommand),
+            ScanToken::Token(Token::End),
         ],
     );
 }
 
 mod define {
-    use crate::{identifier::Identifier, lex::{
-        scan::ScanToken,
-        segment::Mode,
-        token::{Punct, Token},
-    }};
+    use crate::{
+        identifier::Identifier,
+        lex::{
+            scan::ScanToken,
+            segment::Mode,
+            token::{Punct, Token},
+        },
+    };
 
     use super::check_scan;
 
@@ -712,6 +732,7 @@ var1 var2 var3
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -731,6 +752,7 @@ var1 var2 var3
                 ScanToken::Token(Token::String(String::from(" var1 var2 var3"))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -750,6 +772,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -768,6 +791,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -786,6 +810,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -808,6 +833,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::String(String::from(""))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -837,6 +863,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -870,6 +897,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -899,6 +927,7 @@ content 2
                 ScanToken::Token(Token::String(String::from("content 2"))),
                 ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -920,6 +949,7 @@ data list /x 1.
                 ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
                 ScanToken::Token(Token::Number(1.0)),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -943,6 +973,7 @@ data list /x 1.
                 ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
                 ScanToken::Token(Token::Number(1.0)),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -968,6 +999,7 @@ data list /x 1.
                 ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
                 ScanToken::Token(Token::Number(1.0)),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -991,6 +1023,7 @@ data list /x 1.
                 ScanToken::Token(Token::Id(Identifier::new("x").unwrap())),
                 ScanToken::Token(Token::Number(1.0)),
                 ScanToken::Token(Token::EndCommand),
+                ScanToken::Token(Token::End),
             ],
         );
     }
@@ -1011,6 +1044,7 @@ content line 2
                 ScanToken::Token(Token::String(String::from("content line 1"))),
                 ScanToken::Token(Token::String(String::from("content line 2"))),
                 ScanToken::Token(Token::End),
+                ScanToken::Token(Token::End),
             ],
         );
     }
index befe5b0c5331fc9e819809fa877bc325b7fb8a25..7aed90e16412d31275dc4c557dba2c11358c206f 100644 (file)
@@ -79,7 +79,6 @@ pub enum Segment {
     StartCommand,
     SeparateCommands,
     EndCommand,
-    End,
     ExpectedQuote,
     ExpectedExponent,
     UnexpectedChar,
@@ -216,17 +215,17 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         if input.is_empty() {
             if eof {
-                return Ok((input, Segment::End));
+                return Ok(None);
             } else {
                 return Err(Incomplete);
             };
         }
 
         match self.state.0 {
-            State::Shbang => return self.parse_shbang(input, eof),
+            State::Shbang => self.parse_shbang(input, eof),
             State::General => {
                 if self.start_of_line() {
                     self.parse_start_of_line(input, eof)
@@ -259,9 +258,10 @@ impl Segmenter {
         }
     }
 
-    pub fn push(&mut self, input: &str, eof: bool) -> Result<(usize, Segment), Incomplete> {
-        let (rest, seg_type) = self.push_rest(input, eof)?;
-        Ok((input.len() - rest.len(), seg_type))
+    pub fn push(&mut self, input: &str, eof: bool) -> Result<Option<(usize, Segment)>, Incomplete> {
+        Ok(self
+            .push_rest(input, eof)?
+            .map(|(rest, seg_type)| (input.len() - rest.len(), seg_type)))
     }
 }
 
@@ -450,12 +450,12 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         if let (Some('#'), rest) = take(input, eof)? {
             if let (Some('!'), rest) = take(rest, eof)? {
                 let rest = self.parse_full_line(rest, eof)?;
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Segment::Shbang));
+                return Ok(Some((rest, Segment::Shbang)));
             }
         }
 
@@ -476,7 +476,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         debug_assert_eq!(self.state.0, State::General);
         debug_assert!(self.start_of_line());
         debug_assert!(!input.is_empty());
@@ -488,16 +488,16 @@ impl Segmenter {
             '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
                 // This  `+` is punctuation that may separate pieces of a string.
                 self.state = (State::General, Substate::empty());
-                return Ok((rest, Segment::Punct));
+                return Ok(Some((rest, Segment::Punct)));
             }
             '+' | '-' | '.' => {
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Segment::StartCommand));
+                return Ok(Some((rest, Segment::StartCommand)));
             }
             _ if c.is_whitespace() => {
                 if at_end_of_line(input, eof)? {
                     self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Segment::SeparateCommands));
+                    return Ok(Some((input, Segment::SeparateCommands)));
                 }
             }
             _ => {
@@ -505,7 +505,7 @@ impl Segmenter {
                     && !self.state.1.contains(Substate::START_OF_COMMAND)
                 {
                     self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Segment::StartCommand));
+                    return Ok(Some((input, Segment::StartCommand)));
                 }
             }
         }
@@ -516,7 +516,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         debug_assert!(self.state.0 == State::General);
         debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
         let (Some(c), rest) = take(input, eof)? else {
@@ -525,18 +525,18 @@ impl Segmenter {
         match c {
             '\r' | '\n' if is_end_of_line(input, eof)? => {
                 self.state.1 |= Substate::START_OF_LINE;
-                Ok((
+                Ok(Some((
                     self.parse_newline(input, eof).unwrap().unwrap(),
                     Segment::Newline,
-                ))
+                )))
             }
             '/' => {
                 if let (Some('*'), rest) = take(rest, eof)? {
                     let rest = skip_comment(rest, eof)?;
-                    return Ok((rest, Segment::Comment));
+                    return Ok(Some((rest, Segment::Comment)));
                 } else {
                     self.state.1 = Substate::empty();
-                    return Ok((rest, Segment::Punct));
+                    return Ok(Some((rest, Segment::Punct)));
                 }
             }
             '-' => {
@@ -555,11 +555,11 @@ impl Segmenter {
                     None | Some(_) => (),
                 }
                 self.state.1 = Substate::empty();
-                return Ok((rest, Segment::Punct));
+                return Ok(Some((rest, Segment::Punct)));
             }
             '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
                 self.state.1 = Substate::empty();
-                return Ok((rest, Segment::Punct));
+                return Ok(Some((rest, Segment::Punct)));
             }
             '*' => {
                 if self.state.1.contains(Substate::START_OF_COMMAND) {
@@ -574,11 +574,11 @@ impl Segmenter {
             '~' => self.parse_digraph(&['='], rest, eof),
             '.' if at_end_of_line(rest, eof)? => {
                 self.state.1 = Substate::START_OF_COMMAND;
-                Ok((rest, Segment::EndCommand))
+                Ok(Some((rest, Segment::EndCommand)))
             }
             '.' => match take(rest, eof)? {
                 (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
-                _ => Ok((rest, Segment::Punct)),
+                _ => Ok(Some((rest, Segment::Punct))),
             },
             '0'..='9' => self.parse_number(input, eof),
             'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof),
@@ -587,20 +587,20 @@ impl Segmenter {
             '!' => {
                 let (c, rest2) = take(rest, eof)?;
                 match c {
-                    Some('*') => Ok((rest2, Segment::Punct)),
+                    Some('*') => Ok(Some((rest2, Segment::Punct))),
                     Some(_) => self.parse_id(input, eof),
-                    None => Ok((rest, Segment::Punct)),
+                    None => Ok(Some((rest, Segment::Punct))),
                 }
             }
-            c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Segment::Spaces)),
+            c if c.is_whitespace() => Ok(Some((skip_spaces(rest, eof)?, Segment::Spaces))),
             c if c.may_start_id() => self.parse_id(input, eof),
             '#'..='~' if c != '\\' && c != '^' => {
                 self.state.1 = Substate::empty();
-                Ok((rest, Segment::Punct))
+                Ok(Some((rest, Segment::Punct)))
             }
             _ => {
                 self.state.1 = Substate::empty();
-                Ok((rest, Segment::UnexpectedChar))
+                Ok(Some((rest, Segment::UnexpectedChar)))
             }
         }
     }
@@ -610,14 +610,14 @@ impl Segmenter {
         quote: char,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         while let (Some(c), rest) = take(input, eof)? {
             match c {
                 _ if c == quote => {
                     let (c, rest2) = take(rest, eof)?;
                     if c != Some(quote) {
                         self.state.1 = Substate::empty();
-                        return Ok((rest, segment));
+                        return Ok(Some((rest, segment)));
                     }
                     input = rest2;
                 }
@@ -626,14 +626,14 @@ impl Segmenter {
             }
         }
         self.state.1 = Substate::empty();
-        Ok((input, Segment::ExpectedQuote))
+        Ok(Some((input, Segment::ExpectedQuote)))
     }
     fn maybe_parse_string<'a>(
         &mut self,
         segment: Segment,
         input: (&'a str, &'a str),
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         match take(input.1, eof)? {
             (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof),
             _ => self.parse_id(input.0, eof),
@@ -646,7 +646,9 @@ impl Segmenter {
     ) -> Result<(&'a str, &'a str), Incomplete> {
         let mut sub = Segmenter::new(self.mode, true);
         loop {
-            let (seg_len, seg_type) = sub.push(input, eof)?;
+            let Some((seg_len, seg_type)) = sub.push(input, eof)? else {
+                return Ok((input, input));
+            };
             let (segment, rest) = input.split_at(seg_len);
             match seg_type {
                 Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (),
@@ -670,7 +672,6 @@ impl Segmenter {
                 | Segment::StartCommand
                 | Segment::SeparateCommands
                 | Segment::EndCommand
-                | Segment::End
                 | Segment::ExpectedQuote
                 | Segment::ExpectedExponent
                 | Segment::UnexpectedChar => return Ok(("", rest)),
@@ -682,7 +683,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let (Some(_), mut end) = take(input, eof).unwrap() else {
             unreachable!()
         };
@@ -705,18 +706,18 @@ impl Segmenter {
                 return self.parse_comment_1(input, eof);
             } else if id_match("DOCUMENT", identifier) {
                 self.state = (State::Document1, Substate::empty());
-                return Ok((input, Segment::StartDocument));
+                return Ok(Some((input, Segment::StartDocument)));
             } else if id_match_n("DEFINE", identifier, 6) {
                 self.state = (State::Define1, Substate::empty());
             } else if id_match("FILE", identifier) {
                 if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::FileLabel1, Substate::empty());
-                    return Ok((rest, Segment::Identifier));
+                    return Ok(Some((rest, Segment::Identifier)));
                 }
             } else if id_match("DO", identifier) {
                 if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::DoRepeat1, Substate::empty());
-                    return Ok((rest, Segment::Identifier));
+                    return Ok(Some((rest, Segment::Identifier)));
                 }
             } else if id_match("BEGIN", identifier) {
                 let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
@@ -737,43 +738,43 @@ impl Segmenter {
                             },
                             Substate::empty(),
                         );
-                        return Ok((rest, Segment::Identifier));
+                        return Ok(Some((rest, Segment::Identifier)));
                     }
                 }
             }
         }
 
         self.state.1 = Substate::empty();
-        Ok((
+        Ok(Some((
             rest,
             if identifier != "!" {
                 Segment::Identifier
             } else {
                 Segment::Punct
             },
-        ))
+        )))
     }
     fn parse_digraph<'a>(
         &mut self,
         seconds: &[char],
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let (c, rest) = take(input, eof)?;
         self.state.1 = Substate::empty();
-        Ok((
+        Ok(Some((
             match c {
                 Some(c) if seconds.contains(&c) => rest,
                 _ => input,
             },
             Segment::Punct,
-        ))
+        )))
     }
     fn parse_number<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let mut input = skip_digits(input, eof)?;
         if let Some(rest) = match_char(|c| c == '.', input, eof)? {
             let rest2 = skip_digits(rest, eof)?;
@@ -786,18 +787,18 @@ impl Segmenter {
             let rest2 = skip_digits(rest, eof)?;
             if rest2.len() == rest.len() {
                 self.state.1 = Substate::empty();
-                return Ok((rest, Segment::ExpectedExponent));
+                return Ok(Some((rest, Segment::ExpectedExponent)));
             }
             input = rest2;
         }
         self.state.1 = Substate::empty();
-        Ok((input, Segment::Number))
+        Ok(Some((input, Segment::Number)))
     }
     fn parse_comment_1<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         enum CommentState<'a> {
             Blank,
             NotBlank,
@@ -808,7 +809,7 @@ impl Segmenter {
             let (Some(c), rest) = take(input, eof)? else {
                 // End of file.
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((input, Segment::SeparateCommands));
+                return Ok(Some((input, Segment::SeparateCommands)));
             };
             match c {
                 '.' => state = CommentState::Period(input),
@@ -817,17 +818,17 @@ impl Segmenter {
                         CommentState::Blank => {
                             // Blank line ends comment command.
                             self.state = (State::General, Substate::START_OF_COMMAND);
-                            return Ok((input, Segment::SeparateCommands));
+                            return Ok(Some((input, Segment::SeparateCommands)));
                         }
                         CommentState::Period(period) => {
                             // '.' at end of line ends comment command.
                             self.state = (State::General, Substate::empty());
-                            return Ok((period, Segment::CommentCommand));
+                            return Ok(Some((period, Segment::CommentCommand)));
                         }
                         CommentState::NotBlank => {
                             // Comment continues onto next line.
                             self.state = (State::Comment2, Substate::empty());
-                            return Ok((input, Segment::CommentCommand));
+                            return Ok(Some((input, Segment::CommentCommand)));
                         }
                     }
                 }
@@ -841,7 +842,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
 
         let new_command = match take(rest, eof)?.0 {
@@ -857,18 +858,18 @@ impl Segmenter {
         } else {
             self.state = (State::Comment1, Substate::empty());
         }
-        Ok((rest, Segment::Newline))
+        Ok(Some((rest, Segment::Newline)))
     }
     fn parse_document_1<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let mut end_cmd = false;
         loop {
             let (Some(c), rest) = take(input, eof)? else {
                 self.state = (State::Document3, Substate::empty());
-                return Ok((input, Segment::Document));
+                return Ok(Some((input, Segment::Document)));
             };
             match c {
                 '.' => end_cmd = true,
@@ -878,7 +879,7 @@ impl Segmenter {
                     } else {
                         State::Document2
                     };
-                    return Ok((input, Segment::Document));
+                    return Ok(Some((input, Segment::Document)));
                 }
                 c if !c.is_whitespace() => end_cmd = false,
                 _ => (),
@@ -890,21 +891,21 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state = (State::Document1, Substate::empty());
-        Ok((rest, Segment::Newline))
+        Ok(Some((rest, Segment::Newline)))
     }
     fn parse_document_3<'a>(
         &mut self,
         input: &'a str,
         _eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         self.state = (
             State::General,
             Substate::START_OF_COMMAND | Substate::START_OF_LINE,
         );
-        Ok((input, Segment::EndCommand))
+        Ok(Some((input, Segment::EndCommand)))
     }
     fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
         let input = skip_spaces_and_comments(input, eof)?;
@@ -917,12 +918,12 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let mut sub = Segmenter {
             state: (State::General, self.state.1),
             ..*self
         };
-        let (rest, segment) = sub.push_rest(input, eof)?;
+        let (rest, segment) = sub.push_rest(input, eof)?.unwrap();
         if segment == Segment::Identifier {
             let id = &input[..input.len() - rest.len()];
             debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
@@ -934,29 +935,29 @@ impl Segmenter {
         } else {
             self.state.1 = sub.state.1;
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn parse_file_label_2<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let input = skip_spaces(input, eof)?;
         self.state = (State::FileLabel3, Substate::empty());
-        Ok((input, Segment::Spaces))
+        Ok(Some((input, Segment::Spaces)))
     }
     fn parse_file_label_3<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let mut end_cmd = None;
         loop {
             let (c, rest) = take(input, eof)?;
             match c {
                 None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
                     self.state = (State::General, Substate::empty());
-                    return Ok((end_cmd.unwrap_or(input), Segment::UnquotedString));
+                    return Ok(Some((end_cmd.unwrap_or(input), Segment::UnquotedString)));
                 }
                 None => unreachable!(),
                 Some('.') => end_cmd = Some(input),
@@ -970,7 +971,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let mut sub = Segmenter {
             mode: self.mode,
             state: (State::General, self.state.1),
@@ -987,8 +988,8 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         if segment == Segment::SeparateCommands {
             // We reached a blank line that separates the head from the body.
             self.state.0 = State::DoRepeat2;
@@ -997,7 +998,7 @@ impl Segmenter {
             self.state.0 = State::DoRepeat3;
             self.nest = 1;
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     /// We are segmenting a `DO REPEAT` command, currently reading a blank line
     /// that separates the head from the body.
@@ -1005,14 +1006,14 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         if segment == Segment::Newline {
             // We reached the body.
             self.state.0 = State::DoRepeat3;
             self.nest = 1;
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn parse_newline<'a>(
         &mut self,
@@ -1070,9 +1071,9 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         if let Some(rest) = self.parse_newline(input, eof)? {
-            return Ok((rest, Segment::Newline));
+            return Ok(Some((rest, Segment::Newline)));
         }
         let rest = self.parse_full_line(input, eof)?;
         let direction = self.check_repeat_command(input, eof)?;
@@ -1094,11 +1095,11 @@ impl Segmenter {
                 return self.push_rest(input, eof);
             }
         }
-        return Ok((rest, Segment::DoRepeatCommand));
+        return Ok(Some((rest, Segment::DoRepeatCommand)));
     }
-    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Segment), Incomplete> {
+    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         self.state.0 = State::DoRepeat3;
-        Ok((input, Segment::DoRepeatOverflow))
+        Ok(Some((input, Segment::DoRepeatOverflow)))
     }
     /// We are segmenting a `DEFINE` command, which consists of:
     ///
@@ -1121,12 +1122,12 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         match segment {
             Segment::Identifier if self.state.0 == State::Define1 => {
                 self.state.0 = State::Define2;
-                return Ok((rest, Segment::MacroName));
+                return Ok(Some((rest, Segment::MacroName)));
             }
             Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
                 // The DEFINE command is malformed because we reached its end
@@ -1140,14 +1141,14 @@ impl Segmenter {
             }
             _ => (),
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn parse_define_3<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         match segment {
             Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
                 // The DEFINE command is malformed because we reached its end
@@ -1166,7 +1167,7 @@ impl Segmenter {
             }
             _ => (),
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> {
         loop {
@@ -1197,7 +1198,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_full_line(input, eof)?;
         let line = &input[..input.len() - rest.len()];
         if let Some(end) = Self::find_enddefine(line) {
@@ -1209,10 +1210,10 @@ impl Segmenter {
                 self.push_rest(input, eof)
             } else if prefix.trim_start().is_empty() {
                 // Line starts with spaces followed by `!ENDDEFINE`.
-                Ok((rest, Segment::Spaces))
+                Ok(Some((rest, Segment::Spaces)))
             } else {
                 // Line starts with some content followed by `!ENDDEFINE`.
-                Ok((rest, Segment::MacroBody))
+                Ok(Some((rest, Segment::MacroBody)))
             }
         } else {
             // No `!ENDDEFINE`.  We have a full line of macro body.
@@ -1232,39 +1233,39 @@ impl Segmenter {
                 Segment::MacroBody
             };
             self.state.0 = State::Define6;
-            Ok((rest, segment))
+            Ok(Some((rest, segment)))
         }
     }
     fn parse_define_6<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state.0 = State::Define5;
-        Ok((rest, Segment::Newline))
+        Ok(Some((rest, Segment::Newline)))
     }
     fn parse_begin_data_1<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         if segment == Segment::Newline {
             self.state.0 = State::BeginData2;
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn parse_begin_data_2<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?;
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
         if segment == Segment::Newline {
             self.state.0 = State::BeginData3;
         }
-        Ok((rest, segment))
+        Ok(Some((rest, segment)))
     }
     fn is_end_data(line: &str) -> bool {
         let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
@@ -1295,7 +1296,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_full_line(input, eof)?;
         let line = &input[..input.len() - rest.len()];
         if Self::is_end_data(line) {
@@ -1306,17 +1307,17 @@ impl Segmenter {
             self.push_rest(input, eof)
         } else {
             self.state.0 = State::BeginData4;
-            Ok((rest, Segment::InlineData))
+            Ok(Some((rest, Segment::InlineData)))
         }
     }
     fn parse_begin_data_4<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Segment), Incomplete> {
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state.0 = State::BeginData3;
-        Ok((rest, Segment::Newline))
+        Ok(Some((rest, Segment::Newline)))
     }
 }
 
index d8c337dcdfd1536565f9516c0a2fa28a88dab301..0553c7d1a726c71fa4869b1ab65529731ecf9c1d 100644 (file)
@@ -6,7 +6,7 @@ fn push_segment<'a>(
     segmenter: &mut Segmenter,
     input: &'a str,
     one_byte: bool,
-) -> (usize, Segment) {
+) -> Option<(usize, Segment)> {
     if one_byte {
         for len in input.char_indices().map(|(pos, _c)| pos) {
             if let Ok(result) = segmenter.push(&input[..len], false) {
@@ -27,14 +27,11 @@ fn _check_segmentation(
     let mut segments = Vec::with_capacity(expect_segments.len());
     let mut prompts = Vec::new();
     let mut segmenter = Segmenter::new(mode, false);
-    loop {
-        let (seg_len, seg_type) = push_segment(&mut segmenter, input, one_byte);
+    while let Some((seg_len, seg_type)) = push_segment(&mut segmenter, input, one_byte) {
         let (token, rest) = input.split_at(seg_len);
         segments.push((seg_type, token));
-        match seg_type {
-            Segment::End => break,
-            Segment::Newline => prompts.push(segmenter.prompt()),
-            _ => (),
+        if let Segment::Newline = seg_type {
+            prompts.push(segmenter.prompt());
         }
         input = rest;
     }
@@ -94,14 +91,12 @@ fn check_segmentation(
         if let Some(input) = input.strip_suffix('\n') {
             println!("running {one_byte_name} segmentation test without final newline...");
             let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect();
-            assert_eq!(expect_segments.pop(), Some((Segment::End, "")));
             assert_eq!(expect_segments.pop(), Some((Segment::Newline, "\n")));
             while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) =
                 expect_segments.last()
             {
                 expect_segments.pop();
             }
-            expect_segments.push((Segment::End, ""));
             _check_segmentation(
                 input,
                 mode,
@@ -116,14 +111,11 @@ fn check_segmentation(
 #[allow(dead_code)]
 fn print_segmentation(mut input: &str) {
     let mut segmenter = Segmenter::new(Mode::Interactive, false);
-    loop {
-        let (seg_len, seg_type) = segmenter.push(input, true).unwrap();
+    while let Some((seg_len, seg_type)) = segmenter.push(input, true).unwrap() {
         let (token, rest) = input.split_at(seg_len);
         print!("{seg_type:?} {token:?}");
-        match seg_type {
-            Segment::Newline => print!(" ({:?})", segmenter.prompt()),
-            Segment::End => break,
-            _ => (),
+        if let Segment::Newline = seg_type {
+            print!(" ({:?})", segmenter.prompt())
         }
         println!();
         input = rest;
@@ -239,7 +231,6 @@ GhIjK
             (Segment::Identifier, "abc"),
             (Segment::Punct, "!"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -356,7 +347,6 @@ WxYz./* unterminated end of line comment
             (Segment::EndCommand, "."),
             (Segment::Comment, "/* unterminated end of line comment "),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -473,7 +463,6 @@ and. with.
             (Segment::Identifier, "with"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -575,7 +564,6 @@ fn test_punctuation() {
             (Segment::Spaces, " "),
             (Segment::Punct, "!*"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later],
     );
@@ -665,7 +653,6 @@ fn test_positive_numbers() {
             (Segment::Number, "1"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -776,7 +763,6 @@ fn test_negative_numbers() {
             (Segment::Number, "-1"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -861,7 +847,6 @@ u'fffd' U"041"
             (Segment::Spaces, " "),
             (Segment::QuotedString, "'new command'"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -908,7 +893,6 @@ title my title.
             (Segment::Punct, "/"),
             (Segment::Identifier, "pspp"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[PromptStyle::First, PromptStyle::First, PromptStyle::Later],
     );
@@ -990,7 +974,6 @@ next command.
             (Segment::Newline, "\n"),
             (Segment::SeparateCommands, ""),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Comment,
@@ -1058,7 +1041,6 @@ second paragraph.
             (Segment::EndCommand, ""),
             (Segment::SeparateCommands, ""),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -1114,7 +1096,6 @@ FILE /*
             (Segment::Newline, "\n"),
             (Segment::SeparateCommands, ""),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -1241,7 +1222,6 @@ not data
             (Segment::Spaces, " "),
             (Segment::Identifier, "data"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Data,
@@ -1348,7 +1328,6 @@ end repeat.
             (Segment::Identifier, "repeat"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -1422,7 +1401,6 @@ fn test_do_repeat_overflow() {
             (Segment::Newline, "\n"),
         ]);
     }
-    expect_output.push((Segment::End, ""));
 
     let expect_prompts: Vec<_> = (0..N * 2 - 3)
         .map(|_| PromptStyle::DoRepeat)
@@ -1506,7 +1484,6 @@ end repeat
             (Segment::Spaces, " "),
             (Segment::Identifier, "repeat"),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -1555,7 +1532,6 @@ var1 var2 var3 "!enddefine"
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First],
         );
@@ -1579,7 +1555,6 @@ var1 var2 var3 "!enddefine"
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1603,7 +1578,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1625,7 +1599,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::First],
         );
@@ -1648,7 +1621,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1677,7 +1649,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Define,
@@ -1718,7 +1689,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1764,7 +1734,6 @@ var1 var2 var3!enddefine.
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Later,
@@ -1809,7 +1778,6 @@ content 2
                 (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Later,
@@ -1845,7 +1813,6 @@ data list /x 1.
                 (Segment::Number, "1"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First],
         );
@@ -1877,7 +1844,6 @@ data list /x 1.
                 (Segment::Number, "1"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Later, PromptStyle::First, PromptStyle::First],
         );
@@ -1911,7 +1877,6 @@ data list /x 1.
                 (Segment::Number, "1"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First, PromptStyle::First],
         );
@@ -1942,7 +1907,6 @@ data list /x 1.
                 (Segment::Number, "1"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First],
         );
@@ -1967,7 +1931,6 @@ content line 2
                 (Segment::Newline, "\n"),
                 (Segment::MacroBody, "content line 2"),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Define,
@@ -1990,7 +1953,6 @@ content line 2
                 (Segment::Punct, "("),
                 (Segment::Punct, ")"),
                 (Segment::Newline, "\n"),
-                (Segment::End, ""),
             ],
             &[PromptStyle::Define],
         );
@@ -2049,7 +2011,6 @@ fourth command.
             (Segment::Identifier, "command"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -2151,7 +2112,6 @@ fourth command.
             (Segment::Identifier, "command"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,