more tests

author Ben Pfaff <blp@cs.stanford.edu>

Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)
diff --git a/rust/src/lex/segment.rs b/rust/src/lex/segment.rs

deleted file mode 100644 (file)

index 53bc26d..0000000
--- a/rust/src/lex/segment.rs
+++ /dev/null
@@ -1,3421 +0,0 @@
-//! Syntax segmentation.
-//!
-//! PSPP divides traditional "lexical analysis" or "tokenization" into two
-//! phases: a lower-level phase called "segmentation" and a higher-level phase
-//! called "scanning".  This module implements the segmentation phase.
-//! [`super::scan`] contains declarations for the scanning phase.
-//!
-//! Segmentation accepts a stream of UTF-8 bytes as input.  It outputs a label
-//! (a segment type) for each byte or contiguous sequence of bytes in the input.
-//! It also, in a few corner cases, outputs zero-width segments that label the
-//! boundary between a pair of bytes in the input.
-//!
-//! Some segment types correspond directly to tokens; for example, an
-//! "identifier" segment (SEG_IDENTIFIER) becomes an identifier token (T_ID)
-//! later in lexical analysis.  Other segments contribute to tokens but do not
-//! correspond directly; for example, multiple quoted string segments
-//! (SEG_QUOTED_STRING) separated by spaces (SEG_SPACES) and "+" punctuators
-//! (SEG_PUNCT) may be combined to form a single string token (T_STRING).  Still
-//! other segments are ignored (e.g. SEG_SPACES) or trigger special behavior
-//! such as error messages later in tokenization (e.g. SEG_EXPECTED_QUOTE).
-
-use crate::{
-    identifier::{id_match, id_match_n, is_reserved_word, IdentifierChar},
-    prompt::PromptStyle,
-};
-use bitflags::bitflags;
-
-use super::command_name::{command_match, COMMAND_NAMES};
-
-/// Segmentation mode.
-///
-/// PSPP syntax is written in one of two modes which are broadly defined as
-/// follows:
-///
-/// - In interactive mode, commands end with a period at the end of the line
-///   or with a blank line.
-///
-/// - In batch mode, the second and subsequent lines of a command are indented
-///   from the left margin.
-///
-/// The segmenter can also try to automatically detect the mode in use, using a
-/// heuristic that is usually correct.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
-pub enum Mode {
-    /// Try to interpret input correctly regardless of whether it is written
-    /// for interactive or batch mode.
-    #[default]
-    Auto,
-
-    /// Interactive syntax mode.
-    Interactive,
-
-    /// Batch syntax mode.
-    Batch,
-}
-
-/// The type of a segment.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum Type {
-    Number,
-    QuotedString,
-    HexString,
-    UnicodeString,
-    UnquotedString,
-    ReservedWord,
-    Identifier,
-    Punct,
-    Shbang,
-    Spaces,
-    Comment,
-    Newline,
-    CommentCommand,
-    DoRepeatCommand,
-    DoRepeatOverflow,
-    InlineData,
-    MacroId,
-    MacroName,
-    MacroBody,
-    StartDocument,
-    Document,
-    StartCommand,
-    SeparateCommands,
-    EndCommand,
-    End,
-    ExpectedQuote,
-    ExpectedExponent,
-    UnexpectedChar,
-}
-
-bitflags! {
-    #[derive(Copy, Clone, Debug)]
-    pub struct Substate: u8 {
-        const START_OF_LINE = 1;
-        const START_OF_COMMAND = 2;
-    }
-}
-
-#[derive(Copy, Clone)]
-pub struct Segmenter {
-    state: (State, Substate),
-    nest: u8,
-    mode: Mode,
-}
-
-#[derive(Copy, Clone, Debug)]
-pub struct Incomplete;
-
-impl Segmenter {
-    /// Returns a segmenter with the given syntax `mode`.
-    ///
-    /// If `is_snippet` is false, then the segmenter will parse as if it's being
-    /// given a whole file.  This means, for example, that it will interpret `-`
-    /// or `+` at the beginning of the syntax as a separator between commands
-    /// (since `-` or `+` at the beginning of a line has this meaning).
-    ///
-    /// If `is_snippet` is true, then the segmenter will parse as if it's being
-    /// given an isolated piece of syntax.  This means that, for example, that
-    /// it will interpret `-` or `+` at the beginning of the syntax as an
-    /// operator token or (if followed by a digit) as part of a number.
-    pub fn new(mode: Mode, is_snippet: bool) -> Self {
-        Self {
-            state: if is_snippet {
-                (State::General, Substate::empty())
-            } else {
-                (State::Shbang, Substate::empty())
-            },
-            mode,
-            nest: 0,
-        }
-    }
-
-    pub fn mode(&self) -> Mode {
-        self.mode
-    }
-
-    fn start_of_line(&self) -> bool {
-        self.state.1.contains(Substate::START_OF_LINE)
-    }
-
-    fn start_of_command(&self) -> bool {
-        self.state.1.contains(Substate::START_OF_COMMAND)
-    }
-
-    /// Returns the style of command prompt to display to an interactive user
-    /// for input in the current state..  The return value is most accurate in
-    /// mode `Mode::Interactive` and at the beginning of a line (that is, if
-    /// [`Segmenter::push`] consumed as much as possible of the input up to a
-    /// new-line).
-    pub fn prompt(&self) -> PromptStyle {
-        match self.state.0 {
-            State::Shbang => PromptStyle::First,
-            State::General => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::Comment1 | State::Comment2 => PromptStyle::Comment,
-            State::Document1 | State::Document2 => PromptStyle::Document,
-            State::Document3 => PromptStyle::First,
-            State::FileLabel1 => PromptStyle::Later,
-            State::FileLabel2 | State::FileLabel3 => PromptStyle::First,
-            State::DoRepeat1 | State::DoRepeat2 => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::DoRepeat3 => PromptStyle::DoRepeat,
-            State::DoRepeat4 => PromptStyle::DoRepeat,
-            State::Define1 | State::Define2 | State::Define3 => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::Define4 | State::Define5 | State::Define6 => PromptStyle::Define,
-            State::BeginData1 => PromptStyle::First,
-            State::BeginData2 => PromptStyle::Later,
-            State::BeginData3 | State::BeginData4 => PromptStyle::Data,
-        }
-    }
-
-    /// Attempts to label a prefix of the remaining input with a segment type.
-    /// The caller supplies a prefix of the remaining input as `input`.  If
-    /// `eof` is true, then `input` is the entire (remainder) of the input; if
-    /// `eof` is false, then further input is potentially available.
-    ///
-    /// The input may contain '\n' or '\r\n' line ends in any combination.
-    ///
-    /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes
-    /// in the segment at the beginning of `input` (a number in
-    /// `0..=input.len()`) and the type of that segment.  The next call should
-    /// not include those bytes in `input`, because they have (figuratively)
-    /// been consumed by the segmenter.
-    ///
-    /// Segments can have zero length, including segment types `Type::End`,
-    /// `Type::SeparateCommands`, `Type::StartDocument`, `Type::InlineData`, and
-    /// `Type::Spaces`.
-    ///
-    /// Failure occurs only if the segment type of the bytes in `input` cannot
-    /// yet be determined.  In this case, this function returns `Err(Incomplete)`.  If
-    /// more input is available, the caller should obtain some more, then call
-    /// again with a longer `input`.  If this is not enough, the process might
-    /// need to repeat again and again.  If input is exhausted, then the caller
-    /// may call again setting `eof` to true.  This function will never return
-    /// `Err(Incomplete)` when `eof` is true.
-    ///
-    /// The caller must not, in a sequence of calls, supply contradictory input.
-    /// That is, bytes provided as part of `input` in one call, but not
-    /// consumed, must not be provided with *different* values on subsequent
-    /// calls.  This is because the function must often make decisions based on
-    /// looking ahead beyond the bytes that it consumes.
-    pub fn push<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
-        if input.is_empty() {
-            if eof {
-                return Ok((input, Type::End));
-            } else {
-                return Err(Incomplete);
-            };
-        }
-
-        match self.state.0 {
-            State::Shbang => return self.parse_shbang(input, eof),
-            State::General => {
-                if self.start_of_line() {
-                    self.parse_start_of_line(input, eof)
-                } else {
-                    self.parse_mid_line(input, eof)
-                }
-            }
-            State::Comment1 => self.parse_comment_1(input, eof),
-            State::Comment2 => self.parse_comment_2(input, eof),
-            State::Document1 => self.parse_document_1(input, eof),
-            State::Document2 => self.parse_document_2(input, eof),
-            State::Document3 => self.parse_document_3(input, eof),
-            State::FileLabel1 => self.parse_file_label_1(input, eof),
-            State::FileLabel2 => self.parse_file_label_2(input, eof),
-            State::FileLabel3 => self.parse_file_label_3(input, eof),
-            State::DoRepeat1 => self.parse_do_repeat_1(input, eof),
-            State::DoRepeat2 => self.parse_do_repeat_2(input, eof),
-            State::DoRepeat3 => self.parse_do_repeat_3(input, eof),
-            State::DoRepeat4 => self.parse_do_repeat_4(input),
-            State::Define1 => self.parse_define_1_2(input, eof),
-            State::Define2 => self.parse_define_1_2(input, eof),
-            State::Define3 => self.parse_define_3(input, eof),
-            State::Define4 => self.parse_define_4_5(input, eof),
-            State::Define5 => self.parse_define_4_5(input, eof),
-            State::Define6 => self.parse_define_6(input, eof),
-            State::BeginData1 => self.parse_begin_data_1(input, eof),
-            State::BeginData2 => self.parse_begin_data_2(input, eof),
-            State::BeginData3 => self.parse_begin_data_3(input, eof),
-            State::BeginData4 => self.parse_begin_data_4(input, eof),
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-enum State {
-    Shbang,
-    General,
-    Comment1,
-    Comment2,
-    Document1,
-    Document2,
-    Document3,
-    FileLabel1,
-    FileLabel2,
-    FileLabel3,
-    DoRepeat1,
-    DoRepeat2,
-    DoRepeat3,
-    DoRepeat4,
-    Define1,
-    Define2,
-    Define3,
-    Define4,
-    Define5,
-    Define6,
-    BeginData1,
-    BeginData2,
-    BeginData3,
-    BeginData4,
-}
-
-fn take(input: &str, eof: bool) -> Result<(Option<char>, &str), Incomplete> {
-    let mut iter = input.chars();
-    match iter.next() {
-        None if !eof => Err(Incomplete),
-        c => Ok((c, iter.as_str())),
-    }
-}
-
-fn skip_comment(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '\n' | '\r' if is_end_of_line(input, eof)? => return Ok(input),
-            '*' => {
-                if let (Some('/'), rest) = take(rest, eof)? {
-                    return Ok(rest);
-                }
-            }
-            _ => (),
-        };
-        input = rest;
-    }
-}
-
-fn skip_matching<F>(f: F, input: &str, eof: bool) -> Result<&str, Incomplete>
-where
-    F: Fn(char) -> bool,
-{
-    let input = input.trim_start_matches(f);
-    if input.is_empty() && !eof {
-        Err(Incomplete)
-    } else {
-        Ok(input)
-    }
-}
-
-fn match_char<F>(f: F, input: &str, eof: bool) -> Result<Option<&str>, Incomplete>
-where
-    F: Fn(char) -> bool,
-{
-    if let (Some(c), rest) = take(input, eof)? {
-        if f(c) {
-            return Ok(Some(rest));
-        }
-    }
-    Ok(None)
-}
-
-fn skip_spaces(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
-            c if c.is_whitespace() => (),
-            _ => return Ok(input),
-        }
-        input = rest;
-    }
-}
-
-fn skip_digits(input: &str, eof: bool) -> Result<&str, Incomplete> {
-    skip_matching(|c| c.is_ascii_digit(), input, eof)
-}
-
-fn skip_spaces_and_comments(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '/' => {
-                let (c, rest2) = take(rest, eof)?;
-                match c {
-                    Some('*') => input = skip_comment(rest2, eof)?,
-                    Some(_) | None => return Ok(rest),
-                }
-            }
-            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
-            c if c.is_whitespace() => input = rest,
-            _ => return Ok(input),
-        };
-    }
-}
-
-fn is_start_of_string(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let (Some(c), _rest) = take(input, eof)? else {
-        return Ok(false);
-    };
-    match c {
-        'x' | 'X' | 'u' | 'U' => Ok({
-            let (c, _rest) = take(input, eof)?;
-            c == Some('\'') || c == Some('"')
-        }),
-        '\'' | '"' | '\n' => Ok(true),
-        _ => Ok(false),
-    }
-}
-
-fn is_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let (Some(c), rest) = take(input, eof)? else {
-        return Ok(true);
-    };
-    Ok(match c {
-        '\n' => true,
-        '\r' => take(rest, eof)?.0 == Some('\n'),
-        _ => false,
-    })
-}
-
-fn at_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    is_end_of_line(skip_spaces_and_comments(input, eof)?, eof)
-}
-
-fn first(s: &str) -> char {
-    s.chars().next().unwrap()
-}
-fn get_command_name_candidates(target: &str) -> &[&'static str] {
-    if target.is_empty() {
-        return &[];
-    }
-    let target_first = first(target).to_ascii_uppercase();
-    let low = COMMAND_NAMES.partition_point(|s| first(s) < target_first);
-    let high = COMMAND_NAMES.partition_point(|s| first(s) <= target_first);
-    &COMMAND_NAMES[low..high]
-}
-
-fn detect_command_name(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let command_name = input
-        .split(|c: char| {
-            !((c.is_whitespace() && c != '\n') || (c.may_continue_id() && c != '.') || c == '-')
-        })
-        .next()
-        .unwrap();
-    if !eof && command_name.len() == input.len() {
-        return Err(Incomplete);
-    }
-    let command_name = command_name.trim_end_matches(|c: char| c.is_whitespace() || c == '.');
-    for command in get_command_name_candidates(command_name) {
-        if let Some(m) = command_match(command, command_name) {
-            if m.missing_words <= 0 {
-                return Ok(true);
-            }
-        }
-    }
-    Ok(false)
-}
-
-impl Segmenter {
-    fn parse_shbang<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        if let (Some('#'), rest) = take(input, eof)? {
-            if let (Some('!'), rest) = take(rest, eof)? {
-                let rest = self.parse_full_line(rest, eof)?;
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Type::Shbang));
-            }
-        }
-
-        self.state = (
-            State::General,
-            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-        );
-        self.push(input, eof)
-    }
-    fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
-        match self.mode {
-            Mode::Auto => detect_command_name(input, eof),
-            Mode::Interactive => Ok(false),
-            Mode::Batch => Ok(true),
-        }
-    }
-    fn parse_start_of_line<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        debug_assert_eq!(self.state.0, State::General);
-        debug_assert!(self.start_of_line());
-        debug_assert!(!input.is_empty());
-
-        let (Some(c), rest) = take(input, eof).unwrap() else {
-            unreachable!()
-        };
-        match c {
-            '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
-                // This  `+` is punctuation that may separate pieces of a string.
-                self.state = (State::General, Substate::empty());
-                return Ok((rest, Type::Punct));
-            }
-            '+' | '-' | '.' => {
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Type::StartCommand));
-            }
-            _ if c.is_whitespace() => {
-                if at_end_of_line(input, eof)? {
-                    self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Type::SeparateCommands));
-                }
-            }
-            _ => {
-                if self.at_command_start(input, eof)?
-                    && !self.state.1.contains(Substate::START_OF_COMMAND)
-                {
-                    self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Type::StartCommand));
-                }
-            }
-        }
-        self.state.1 = Substate::START_OF_COMMAND;
-        self.parse_mid_line(input, eof)
-    }
-    fn parse_mid_line<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        debug_assert!(self.state.0 == State::General);
-        debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
-        let (Some(c), rest) = take(input, eof)? else {
-            unreachable!()
-        };
-        match c {
-            '\r' | '\n' if is_end_of_line(input, eof)? => {
-                self.state.1 |= Substate::START_OF_LINE;
-                Ok((
-                    self.parse_newline(input, eof).unwrap().unwrap(),
-                    Type::Newline,
-                ))
-            }
-            '/' => {
-                if let (Some('*'), rest) = take(rest, eof)? {
-                    let rest = skip_comment(rest, eof)?;
-                    return Ok((rest, Type::Comment));
-                } else {
-                    self.state.1 = Substate::empty();
-                    return Ok((rest, Type::Punct));
-                }
-            }
-            '-' => {
-                let (c, rest2) = take(skip_spaces(rest, eof)?, eof)?;
-                match c {
-                    Some(c) if c.is_ascii_digit() => {
-                        return self.parse_number(rest, eof);
-                    }
-                    Some('.') => {
-                        if let (Some(c), _rest) = take(rest2, eof)? {
-                            if c.is_ascii_digit() {
-                                return self.parse_number(rest, eof);
-                            }
-                        }
-                    }
-                    None | Some(_) => (),
-                }
-                self.state.1 = Substate::empty();
-                return Ok((rest, Type::Punct));
-            }
-            '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
-                self.state.1 = Substate::empty();
-                return Ok((rest, Type::Punct));
-            }
-            '*' => {
-                if self.state.1.contains(Substate::START_OF_COMMAND) {
-                    self.state.0 = State::Comment1;
-                    self.parse_comment_1(input, eof)
-                } else {
-                    self.parse_digraph(&['*'], rest, eof)
-                }
-            }
-            '<' => self.parse_digraph(&['=', '>'], rest, eof),
-            '>' => self.parse_digraph(&['='], rest, eof),
-            '~' => self.parse_digraph(&['='], rest, eof),
-            '.' if at_end_of_line(rest, eof)? => {
-                self.state.1 = Substate::START_OF_COMMAND;
-                Ok((rest, Type::EndCommand))
-            }
-            '.' => match take(rest, eof)? {
-                (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
-                _ => Ok((rest, Type::Punct)),
-            },
-            '0'..='9' => self.parse_number(input, eof),
-            'u' | 'U' => self.maybe_parse_string(Type::UnicodeString, (input, rest), eof),
-            'x' | 'X' => self.maybe_parse_string(Type::HexString, (input, rest), eof),
-            '\'' | '"' => self.parse_string(Type::QuotedString, c, rest, eof),
-            '!' => {
-                let (c, rest2) = take(rest, eof)?;
-                match c {
-                    Some('*') => Ok((rest2, Type::MacroId)),
-                    Some(_) => self.parse_id(input, eof),
-                    None => Ok((rest, Type::Punct)),
-                }
-            }
-            c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Type::Spaces)),
-            c if c.may_start_id() => self.parse_id(input, eof),
-            '!'..='~' if c != '\\' && c != '^' => {
-                self.state.1 = Substate::empty();
-                Ok((rest, Type::Punct))
-            }
-            _ => {
-                self.state.1 = Substate::empty();
-                Ok((rest, Type::UnexpectedChar))
-            }
-        }
-    }
-    fn parse_string<'a>(
-        &mut self,
-        type_: Type,
-        quote: char,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        while let (Some(c), rest) = take(input, eof)? {
-            match c {
-                _ if c == quote => {
-                    let (c, rest2) = take(rest, eof)?;
-                    if c != Some(quote) {
-                        self.state.1 = Substate::empty();
-                        return Ok((rest, type_));
-                    }
-                    input = rest2;
-                }
-                '\r' | '\n' if is_end_of_line(input, eof)? => break,
-                _ => input = rest,
-            }
-        }
-        self.state.1 = Substate::empty();
-        Ok((input, Type::ExpectedQuote))
-    }
-    fn maybe_parse_string<'a>(
-        &mut self,
-        type_: Type,
-        input: (&'a str, &'a str),
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        match take(input.1, eof)? {
-            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(type_, c, rest, eof),
-            _ => self.parse_id(input.0, eof),
-        }
-    }
-    fn next_id_in_command<'a>(
-        &self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, &'a str), Incomplete> {
-        let mut sub = Segmenter::new(self.mode, true);
-        loop {
-            let (rest, type_) = sub.push(input, eof)?;
-            match type_ {
-                Type::Shbang | Type::Spaces | Type::Comment | Type::Newline => (),
-
-                Type::Identifier => return Ok((&input[..input.len() - rest.len()], rest)),
-
-                Type::Number
-                | Type::QuotedString
-                | Type::HexString
-                | Type::UnicodeString
-                | Type::UnquotedString
-                | Type::ReservedWord
-                | Type::Punct
-                | Type::CommentCommand
-                | Type::DoRepeatCommand
-                | Type::DoRepeatOverflow
-                | Type::InlineData
-                | Type::MacroId
-                | Type::MacroName
-                | Type::MacroBody
-                | Type::StartDocument
-                | Type::Document
-                | Type::StartCommand
-                | Type::SeparateCommands
-                | Type::EndCommand
-                | Type::End
-                | Type::ExpectedQuote
-                | Type::ExpectedExponent
-                | Type::UnexpectedChar => return Ok(("", rest)),
-            }
-            input = rest;
-        }
-    }
-    fn parse_id<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
-        let (Some(_), mut end) = take(input, eof).unwrap() else {
-            unreachable!()
-        };
-        while let (Some(c), rest) = take(end, eof)? {
-            if !c.may_continue_id() {
-                break;
-            };
-            end = rest;
-        }
-        let identifier = &input[..input.len() - end.len()];
-        let identifier = match identifier.strip_suffix('.') {
-            Some(without_dot) if at_end_of_line(end, eof)? => without_dot,
-            _ => identifier,
-        };
-        let rest = &input[identifier.len()..];
-
-        if self.state.1.contains(Substate::START_OF_COMMAND) {
-            if id_match_n("COMMENT", identifier, 4) {
-                self.state.0 = State::Comment1;
-                return self.parse_comment_1(input, eof);
-            } else if id_match("DOCUMENT", identifier) {
-                self.state.0 = State::Document1;
-                return Ok((input, Type::StartDocument));
-            } else if id_match_n("DEFINE", identifier, 6) {
-                self.state.0 = State::Define1;
-            } else if id_match("FILE", identifier) {
-                if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
-                    self.state = (State::FileLabel1, Substate::empty());
-                    return Ok((rest, Type::Identifier));
-                }
-            } else if id_match("DO", identifier) {
-                if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
-                    self.state = (State::DoRepeat1, Substate::empty());
-                    return Ok((rest, Type::Identifier));
-                }
-            } else if id_match("BEGIN", identifier) {
-                let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
-                if id_match("DATA", next_id) {
-                    let rest2 = skip_spaces_and_comments(rest2, eof)?;
-                    let rest2 = if let Some(s) = rest2.strip_prefix('.') {
-                        skip_spaces_and_comments(s, eof)?
-                    } else {
-                        rest2
-                    };
-                    if is_end_of_line(rest2, eof)? {
-                        let s = &input[..input.len() - rest2.len()];
-                        self.state = (
-                            if s.contains('\n') {
-                                State::BeginData1
-                            } else {
-                                State::BeginData2
-                            },
-                            Substate::empty(),
-                        );
-                        return Ok((rest, Type::Identifier));
-                    }
-                }
-            }
-        }
-
-        self.state.1 = Substate::empty();
-        let type_ = if is_reserved_word(identifier) {
-            Type::ReservedWord
-        } else if identifier.starts_with('!') {
-            Type::MacroId
-        } else {
-            Type::Identifier
-        };
-        Ok((rest, type_))
-    }
-    fn parse_digraph<'a>(
-        &mut self,
-        seconds: &[char],
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (c, rest) = take(input, eof)?;
-        self.state.1 = Substate::empty();
-        Ok((
-            match c {
-                Some(c) if seconds.contains(&c) => rest,
-                _ => input,
-            },
-            Type::Punct,
-        ))
-    }
-    fn parse_number<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let mut input = skip_digits(input, eof)?;
-        if let Some(rest) = match_char(|c| c == '.', input, eof)? {
-            let rest2 = skip_digits(rest, eof)?;
-            if rest2.len() < rest.len() || !at_end_of_line(rest2, eof)? {
-                input = rest2;
-            }
-        };
-        if let Some(rest) = match_char(|c| c == 'e' || c == 'E', input, eof)? {
-            let rest = match_char(|c| c == '+' || c == '-', rest, eof)?.unwrap_or(rest);
-            let rest2 = skip_digits(rest, eof)?;
-            if rest2.len() == rest.len() {
-                self.state.1 = Substate::empty();
-                return Ok((rest, Type::ExpectedExponent));
-            }
-            input = rest2;
-        }
-        self.state.1 = Substate::empty();
-        Ok((input, Type::Number))
-    }
-    fn parse_comment_1<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        enum CommentState<'a> {
-            Blank,
-            NotBlank,
-            Period(&'a str),
-        }
-        let mut state = CommentState::Blank;
-        loop {
-            let (Some(c), rest) = take(input, eof)? else {
-                // End of file.
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((input, Type::SeparateCommands));
-            };
-            match c {
-                '.' => state = CommentState::Period(input),
-                '\n' | '\r' if is_end_of_line(input, eof)? => {
-                    match state {
-                        CommentState::Blank => {
-                            // Blank line ends comment command.
-                            self.state = (State::General, Substate::START_OF_COMMAND);
-                            return Ok((input, Type::SeparateCommands));
-                        }
-                        CommentState::Period(period) => {
-                            // '.' at end of line ends comment command.
-                            self.state = (State::General, Substate::empty());
-                            return Ok((period, Type::CommentCommand));
-                        }
-                        CommentState::NotBlank => {
-                            // Comment continues onto next line.
-                            self.state = (State::Comment2, Substate::empty());
-                            return Ok((input, Type::CommentCommand));
-                        }
-                    }
-                }
-                c if c.is_whitespace() => (),
-                _ => state = CommentState::NotBlank,
-            }
-            input = rest;
-        }
-    }
-    fn parse_comment_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-
-        let new_command = match take(rest, eof)?.0 {
-            Some('+') | Some('-') | Some('.') => true,
-            Some(c) if !c.is_whitespace() => self.at_command_start(rest, eof)?,
-            None | Some(_) => false,
-        };
-        if new_command {
-            self.state = (
-                State::General,
-                Substate::START_OF_LINE | Substate::START_OF_COMMAND,
-            );
-        } else {
-            self.state.0 = State::Comment1;
-        }
-        Ok((rest, Type::Newline))
-    }
-    fn parse_document_1<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let mut end_cmd = false;
-        loop {
-            let (Some(c), rest) = take(input, eof)? else {
-                self.state.0 = State::Document3;
-                return Ok((input, Type::Document));
-            };
-            match c {
-                '.' => end_cmd = true,
-                '\n' | '\r' if is_end_of_line(input, eof)? => {
-                    self.state.0 = if end_cmd {
-                        State::Document3
-                    } else {
-                        State::Document2
-                    };
-                    return Ok((input, Type::Document));
-                }
-                c if !c.is_whitespace() => end_cmd = false,
-                _ => (),
-            }
-            input = rest;
-        }
-    }
-    fn parse_document_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::Document1;
-        Ok((rest, Type::Newline))
-    }
-    fn parse_document_3<'a>(
-        &mut self,
-        input: &'a str,
-        _eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        self.state = (
-            State::General,
-            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-        );
-        Ok((input, Type::EndCommand))
-    }
-    fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
-        let input = skip_spaces_and_comments(input, eof)?;
-        match take(input, eof)?.0 {
-            Some('\'') | Some('"') | Some('\n') => Ok(true),
-            _ => Ok(false),
-        }
-    }
-    fn parse_file_label_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let mut sub = Segmenter {
-            state: (State::General, self.state.1),
-            ..*self
-        };
-        let (rest, type_) = sub.push(input, eof)?;
-        if type_ == Type::Identifier {
-            let id = &input[..input.len() - rest.len()];
-            debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
-            if Self::quoted_file_label(rest, eof)? {
-                *self = sub;
-            } else {
-                self.state.0 = State::FileLabel2;
-            }
-        } else {
-            self.state.1 = sub.state.1;
-        }
-        Ok((rest, type_))
-    }
-    fn parse_file_label_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let input = skip_spaces(input, eof)?;
-        self.state.0 = State::FileLabel3;
-        Ok((input, Type::Spaces))
-    }
-    fn parse_file_label_3<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let mut end_cmd = None;
-        loop {
-            let (c, rest) = take(input, eof)?;
-            match c {
-                None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
-                    self.state = (State::General, Substate::empty());
-                    return Ok((end_cmd.unwrap_or(input), Type::UnquotedString));
-                }
-                None => unreachable!(),
-                Some('.') => end_cmd = Some(input),
-                Some(c) if !c.is_whitespace() => end_cmd = None,
-                Some(_) => (),
-            }
-            input = rest;
-        }
-    }
-    fn subparse<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
-        let mut sub = Segmenter {
-            mode: self.mode,
-            state: (State::General, self.state.1),
-            nest: 0,
-        };
-        let result = sub.push(input, eof)?;
-        self.state.1 = sub.state.1;
-        Ok(result)
-    }
-    /// We are segmenting a `DO REPEAT` command, currently reading the syntax
-    /// that defines the stand-in variables (the head) before the lines of
-    /// syntax to be repeated (the body).
-    fn parse_do_repeat_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::SeparateCommands {
-            // We reached a blank line that separates the head from the body.
-            self.state.0 = State::DoRepeat2;
-        } else if type_ == Type::EndCommand || type_ == Type::StartCommand {
-            // We reached the body.
-            self.state.0 = State::DoRepeat3;
-            self.nest = 1;
-        }
-        Ok((rest, type_))
-    }
-    /// We are segmenting a `DO REPEAT` command, currently reading a blank line
-    /// that separates the head from the body.
-    fn parse_do_repeat_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
-            // We reached the body.
-            self.state.0 = State::DoRepeat3;
-            self.nest = 1;
-        }
-        Ok((rest, type_))
-    }
-    fn parse_newline<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<&'a str>, Incomplete> {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(None);
-        };
-        match c {
-            '\n' => Ok(Some(rest)),
-            '\r' => {
-                if let (Some('\n'), rest) = take(rest, eof)? {
-                    Ok(Some(rest))
-                } else {
-                    Ok(None)
-                }
-            }
-            _ => Ok(None),
-        }
-    }
-
-    fn parse_full_line<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<&'a str, Incomplete> {
-        loop {
-            if is_end_of_line(input, eof)? {
-                return Ok(input);
-            }
-            input = take(input, eof).unwrap().1;
-        }
-    }
-    fn check_repeat_command<'a>(&mut self, input: &'a str, eof: bool) -> Result<isize, Incomplete> {
-        let input = input.strip_prefix(&['-', '+']).unwrap_or(input);
-        let (id1, input) = self.next_id_in_command(input, eof)?;
-        if id_match("DO", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0) {
-            Ok(1)
-        } else if id_match("END", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0)
-        {
-            Ok(-1)
-        } else {
-            Ok(0)
-        }
-    }
-    /// We are in the body of `DO REPEAT`, segmenting the lines of syntax that
-    /// are to be repeated.  Report each line of syntax as a single
-    /// [`Type::DoRepeatCommand`].
-    ///
-    /// `DO REPEAT` can be nested, so we look for `DO REPEAT...END REPEAT`
-    /// blocks inside the lines we're segmenting.  `self.nest` counts the
-    /// nesting level, starting at 1.
-    fn parse_do_repeat_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        if let Some(rest) = self.parse_newline(input, eof)? {
-            return Ok((rest, Type::Newline));
-        }
-        let rest = self.parse_full_line(input, eof)?;
-        let direction = self.check_repeat_command(input, eof)?;
-        if direction > 0 {
-            if let Some(nest) = self.nest.checked_add(1) {
-                self.nest = nest;
-            } else {
-                self.state.0 = State::DoRepeat4;
-            }
-        } else if direction < 0 {
-            self.nest -= 1;
-            if self.nest == 0 {
-                // Nesting level dropped to 0, so we've finished reading the `DO
-                // REPEAT` body.
-                self.state = (
-                    State::General,
-                    Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-                );
-                return self.push(input, eof);
-            }
-        }
-        return Ok((rest, Type::DoRepeatCommand));
-    }
-    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Type), Incomplete> {
-        self.state.0 = State::DoRepeat3;
-        Ok((input, Type::DoRepeatOverflow))
-    }
-    /// We are segmenting a `DEFINE` command, which consists of:
-    ///
-    ///   - The `DEFINE` keyword.
-    ///
-    ///   - An identifier.  We transform this into `Type::MacroName` instead of
-    ///     `Type::Identifier` or `Type::MacroId` because this identifier must
-    ///     never be macro-expanded.
-    ///
-    ///   - Anything but `(`.
-    ///
-    ///   - `(` followed by a sequence of tokens possibly including balanced
-    ///     parentheses up to a final `)`.
-    ///
-    ///   - A sequence of any number of lines, one string per line, ending with
-    ///     `!ENDDEFINE`.  The first line is usually blank (that is, a newline
-    ///     follows the `(`).  The last line usually just has `!ENDDEFINE.` on
-    ///     it, but it can start with other tokens.  The whole
-    ///     DEFINE...!ENDDEFINE can be on a single line, even.
-    fn parse_define_1_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        match type_ {
-            Type::Identifier | Type::MacroId if self.state.0 == State::Define1 => {
-                self.state.0 = State::Define2;
-                return Ok((rest, Type::MacroName));
-            }
-            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
-                // The DEFINE command is malformed because we reached its end
-                // without ever hitting a `(` token.  Transition back to general
-                // parsing.
-                self.state.0 = State::General;
-            }
-            Type::Punct if input.starts_with('(') => {
-                self.state.0 = State::Define3;
-                self.nest = 1;
-            }
-            _ => (),
-        }
-        Ok((rest, type_))
-    }
-    fn parse_define_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        match type_ {
-            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
-                // The DEFINE command is malformed because we reached its end
-                // without ever hitting a `(` token.  Transition back to general
-                // parsing.
-                self.state.0 = State::General;
-            }
-            Type::Punct if input.starts_with('(') => {
-                self.nest += 1;
-            }
-            Type::Punct if input.starts_with(')') => {
-                self.nest -= 1;
-                if self.nest == 0 {
-                    self.state = (State::Define4, Substate::empty());
-                }
-            }
-            _ => (),
-        }
-        Ok((rest, type_))
-    }
-    fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> {
-        loop {
-            input = skip_spaces_and_comments(input, true).unwrap();
-            let (Some(c), rest) = take(input, true).unwrap() else {
-                return None;
-            };
-            match c {
-                '!' if strip_prefix_ignore_ascii_case(input, "!ENDDEFINE").is_some() => {
-                    return Some(input)
-                }
-                '\'' | '"' => {
-                    let index = rest.find(c)?;
-                    input = &rest[index + 1..];
-                }
-                _ => input = rest,
-            }
-        }
-    }
-
-    /// We are in the body of a macro definition, looking for additional lines
-    /// of the body or `!ENDDEFINE`.
-    ///
-    /// In `State::Define4`, we're parsing the first line of the macro body (the
-    /// same line as the closing parenthesis in the argument definition).  In
-    /// `State::Define5`, we're on a later line.
-    fn parse_define_4_5<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_full_line(input, eof)?;
-        let line = &input[..input.len() - rest.len()];
-        if let Some(end) = Self::find_enddefine(line) {
-            // Macro ends at the !ENDDEFINE on this line.
-            self.state = (State::General, Substate::empty());
-            let (prefix, rest) = input.split_at(line.len() - end.len());
-            if prefix.is_empty() {
-                // Line starts with `!ENDDEFINE`.
-                self.push(input, eof)
-            } else if prefix.trim_start().is_empty() {
-                // Line starts with spaces followed by `!ENDDEFINE`.
-                Ok((rest, Type::Spaces))
-            } else {
-                // Line starts with some content followed by `!ENDDEFINE`.
-                Ok((rest, Type::MacroBody))
-            }
-        } else {
-            // No `!ENDDEFINE`.  We have a full line of macro body.
-            //
-            // If the first line of the macro body is blank, we just report it
-            // as spaces, or not at all if there are no spaces, because it's not
-            // significant.
-            //
-            // However, if it's a later line, we need to report it because blank
-            // lines can have significance.
-            let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
-                if line.is_empty() {
-                    return self.parse_define_6(input, eof);
-                }
-                Type::Spaces
-            } else {
-                Type::MacroBody
-            };
-            self.state.0 = State::Define6;
-            Ok((rest, type_))
-        }
-    }
-    fn parse_define_6<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::Define5;
-        Ok((rest, Type::Newline))
-    }
-    fn parse_begin_data_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
-            self.state.0 = State::BeginData2;
-        }
-        Ok((rest, type_))
-    }
-    fn parse_begin_data_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
-            self.state.0 = State::BeginData3;
-        }
-        Ok((rest, type_))
-    }
-    fn is_end_data(line: &str) -> bool {
-        let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
-            return false;
-        };
-        let (Some(c), rest) = take(rest, true).unwrap() else {
-            return false;
-        };
-        if !c.is_whitespace() {
-            return false;
-        };
-        let Some(rest) = strip_prefix_ignore_ascii_case(rest, "DATA") else {
-            return false;
-        };
-
-        let mut endcmd = false;
-        for c in rest.chars() {
-            match c {
-                '.' if endcmd => return false,
-                '.' => endcmd = true,
-                c if c.is_whitespace() => (),
-                _ => return false,
-            }
-        }
-        true
-    }
-    fn parse_begin_data_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_full_line(input, eof)?;
-        let line = &input[..input.len() - rest.len()];
-        if Self::is_end_data(line) {
-            self.state = (
-                State::General,
-                Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-            );
-            self.push(input, eof)
-        } else {
-            self.state.0 = State::BeginData4;
-            Ok((rest, Type::InlineData))
-        }
-    }
-    fn parse_begin_data_4<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::BeginData3;
-        Ok((rest, Type::Newline))
-    }
-}
-
-fn strip_prefix_ignore_ascii_case<'a>(line: &'a str, pattern: &str) -> Option<&'a str> {
-    line.get(..pattern.len())
-        .map(|prefix| {
-            prefix
-                .eq_ignore_ascii_case(pattern)
-                .then(|| &line[pattern.len()..])
-        })
-        .flatten()
-}
-
-#[cfg(test)]
-mod test {
-    use crate::prompt::PromptStyle;
-
-    use super::{Mode, Segmenter, Type};
-
-    fn check_segmentation(
-        mut input: &str,
-        mode: Mode,
-        expect_segments: &[(Type, &str)],
-        expect_prompts: &[PromptStyle],
-    ) {
-        let mut segments = Vec::with_capacity(expect_segments.len());
-        let mut prompts = Vec::new();
-        let mut segmenter = Segmenter::new(mode, false);
-        loop {
-            let (rest, type_) = segmenter.push(input, true).unwrap();
-            let len = input.len() - rest.len();
-            let token = &input[..len];
-            segments.push((type_, token));
-            match type_ {
-                Type::End => break,
-                Type::Newline => prompts.push(segmenter.prompt()),
-                _ => (),
-            }
-            input = rest;
-        }
-
-        if &segments != expect_segments {
-            eprintln!("segments differ from expected:");
-            let difference = diff::slice(expect_segments, &segments);
-            for result in difference {
-                match result {
-                    diff::Result::Left(left) => eprintln!("-{left:?}"),
-                    diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
-                    diff::Result::Right(right) => eprintln!("+{right:?}"),
-                }
-            }
-            panic!();
-        }
-
-        if &prompts != expect_prompts {
-            eprintln!("prompts differ from expected:");
-            let difference = diff::slice(expect_prompts, &prompts);
-            for result in difference {
-                match result {
-                    diff::Result::Left(left) => eprintln!("-{left:?}"),
-                    diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
-                    diff::Result::Right(right) => eprintln!("+{right:?}"),
-                }
-            }
-            panic!();
-        }
-    }
-
-    fn print_segmentation(mut input: &str) {
-        let mut segmenter = Segmenter::new(Mode::Auto, false);
-        loop {
-            let (rest, type_) = segmenter.push(input, true).unwrap();
-            let len = input.len() - rest.len();
-            let token = &input[..len];
-            print!("{type_:?} {token:?}");
-            match type_ {
-                Type::Newline => print!(" ({:?})", segmenter.prompt()),
-                Type::End => break,
-                _ => (),
-            }
-            println!();
-            input = rest;
-        }
-    }
-
-    #[test]
-    fn test_identifiers() {
-        check_segmentation(
-            r#"a ab abc abcd !abcd
-A AB ABC ABCD !ABCD
-aB aBC aBcD !aBcD
-$x $y $z !$z
-grève Ângstrom poté
-#a #b #c ## #d !#d
-@efg @ @@. @#@ !@ 
-## # #12345 #.#
-f@#_.#6
-GhIjK
-.x 1y _z
-"#,
-            Mode::Auto,
-            &[
-                (Type::Identifier, "a"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ab"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "abc"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "abcd"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!abcd"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "A"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "AB"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ABC"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ABCD"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!ABCD"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "aB"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "aBC"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "aBcD"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!aBcD"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "$x"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "$y"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "$z"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!$z"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "grève"),
-                (Type::Spaces, "\u{00a0}"),
-                (Type::Identifier, "Ângstrom"),
-                (Type::Spaces, "\u{00a0}"),
-                (Type::Identifier, "poté"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "#a"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#b"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#c"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "##"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#d"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!#d"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "@efg"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "@"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "@@."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "@#@"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!@"),
-                (Type::Spaces, " "),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "##"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#12345"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#.#"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "f@#_.#6"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "GhIjK"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::Identifier, "y"),
-                (Type::Spaces, " "),
-                (Type::Punct, "_"),
-                (Type::Identifier, "z"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_identifiers_ending_in_dot() {
-        check_segmentation(
-            r#"abcd. abcd.
-ABCD. ABCD.
-aBcD. aBcD. 
-$y. $z. あいうえお.
-#c. #d..
-@@. @@....
-#.#.
-#abcd.
-.
-. 
-LMNOP. 
-QRSTUV./* end of line comment */
-qrstuv. /* end of line comment */
-QrStUv./* end of line comment */ 
-wxyz./* unterminated end of line comment
-WXYZ. /* unterminated end of line comment
-WxYz./* unterminated end of line comment 
-"#,
-            Mode::Auto,
-            &[
-                (Type::Identifier, "abcd."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "abcd"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "ABCD."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ABCD"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "aBcD."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "aBcD"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "$y."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "$z."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "あいうえお"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "#c."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#d."),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "@@."),
-                (Type::Spaces, " "),
-                (Type::Identifier, "@@..."),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "#.#"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "#abcd"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "LMNOP"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "QRSTUV"),
-                (Type::EndCommand, "."),
-                (Type::Comment, "/* end of line comment */"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "qrstuv"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* end of line comment */"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "QrStUv"),
-                (Type::EndCommand, "."),
-                (Type::Comment, "/* end of line comment */"),
-                (Type::Spaces, " "),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "wxyz"),
-                (Type::EndCommand, "."),
-                (Type::Comment, "/* unterminated end of line comment"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "WXYZ"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* unterminated end of line comment"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "WxYz"),
-                (Type::EndCommand, "."),
-                (Type::Comment, "/* unterminated end of line comment "),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_reserved_words() {
-        check_segmentation(
-            r#"and or not eq ge gt le lt ne all by to with
-AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
-andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
-and. with.
-"#,
-            Mode::Auto,
-            &[
-                (Type::ReservedWord, "and"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "or"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "not"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "eq"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "ge"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "gt"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "le"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "lt"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "ne"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "all"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "by"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "to"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "with"),
-                (Type::Newline, "\n"),
-                (Type::ReservedWord, "AND"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "OR"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "NOT"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "EQ"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "GE"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "GT"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "LE"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "LT"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "NE"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "ALL"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "BY"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "TO"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "WITH"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "andx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "orx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "notx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "eqx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "gex"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "gtx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "lex"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ltx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "nex"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "allx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "byx"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "tox"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "withx"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "and."),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "with"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_punctuation() {
-        check_segmentation(
-            r#"~ & | = >= > <= < ~= <> ( ) , - + * / [ ] **
-~&|=>=><=<~=<>(),-+*/[]**!*
-% : ; ? _ ` { } ~ !*
-"#,
-            Mode::Auto,
-            &[
-                (Type::Punct, "~"),
-                (Type::Spaces, " "),
-                (Type::Punct, "&"),
-                (Type::Spaces, " "),
-                (Type::Punct, "|"),
-                (Type::Spaces, " "),
-                (Type::Punct, "="),
-                (Type::Spaces, " "),
-                (Type::Punct, ">="),
-                (Type::Spaces, " "),
-                (Type::Punct, ">"),
-                (Type::Spaces, " "),
-                (Type::Punct, "<="),
-                (Type::Spaces, " "),
-                (Type::Punct, "<"),
-                (Type::Spaces, " "),
-                (Type::Punct, "~="),
-                (Type::Spaces, " "),
-                (Type::Punct, "<>"),
-                (Type::Spaces, " "),
-                (Type::Punct, "("),
-                (Type::Spaces, " "),
-                (Type::Punct, ")"),
-                (Type::Spaces, " "),
-                (Type::Punct, ","),
-                (Type::Spaces, " "),
-                (Type::Punct, "-"),
-                (Type::Spaces, " "),
-                (Type::Punct, "+"),
-                (Type::Spaces, " "),
-                (Type::Punct, "*"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Spaces, " "),
-                (Type::Punct, "["),
-                (Type::Spaces, " "),
-                (Type::Punct, "]"),
-                (Type::Spaces, " "),
-                (Type::Punct, "**"),
-                (Type::Newline, "\n"),
-                (Type::Punct, "~"),
-                (Type::Punct, "&"),
-                (Type::Punct, "|"),
-                (Type::Punct, "="),
-                (Type::Punct, ">="),
-                (Type::Punct, ">"),
-                (Type::Punct, "<="),
-                (Type::Punct, "<"),
-                (Type::Punct, "~="),
-                (Type::Punct, "<>"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Punct, ","),
-                (Type::Punct, "-"),
-                (Type::Punct, "+"),
-                (Type::Punct, "*"),
-                (Type::Punct, "/"),
-                (Type::Punct, "["),
-                (Type::Punct, "]"),
-                (Type::Punct, "**"),
-                (Type::MacroId, "!*"),
-                (Type::Newline, "\n"),
-                (Type::Punct, "%"),
-                (Type::Spaces, " "),
-                (Type::Punct, ":"),
-                (Type::Spaces, " "),
-                (Type::Punct, ";"),
-                (Type::Spaces, " "),
-                (Type::Punct, "?"),
-                (Type::Spaces, " "),
-                (Type::Punct, "_"),
-                (Type::Spaces, " "),
-                (Type::Punct, "`"),
-                (Type::Spaces, " "),
-                (Type::Punct, "{"),
-                (Type::Spaces, " "),
-                (Type::Punct, "}"),
-                (Type::Spaces, " "),
-                (Type::Punct, "~"),
-                (Type::Spaces, " "),
-                (Type::MacroId, "!*"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later],
-        );
-    }
-
-    #[test]
-    fn test_positive_numbers() {
-        check_segmentation(
-            r#"0 1 01 001. 1.
-123. /* comment 1 */ /* comment 2 */
-.1 0.1 00.1 00.10
-5e1 6E-1 7e+1 6E+01 6e-03
-.3E1 .4e-1 .5E+1 .6e+01 .7E-03
-1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
-. 1e e1 1e+ 1e- 1.
-"#,
-            Mode::Auto,
-            &[
-                (Type::Number, "0"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::Spaces, " "),
-                (Type::Number, "01"),
-                (Type::Spaces, " "),
-                (Type::Number, "001."),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Number, "123"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* comment 1 */"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* comment 2 */"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Number, "1"),
-                (Type::Spaces, " "),
-                (Type::Number, "0.1"),
-                (Type::Spaces, " "),
-                (Type::Number, "00.1"),
-                (Type::Spaces, " "),
-                (Type::Number, "00.10"),
-                (Type::Newline, "\n"),
-                (Type::Number, "5e1"),
-                (Type::Spaces, " "),
-                (Type::Number, "6E-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "7e+1"),
-                (Type::Spaces, " "),
-                (Type::Number, "6E+01"),
-                (Type::Spaces, " "),
-                (Type::Number, "6e-03"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Number, "3E1"),
-                (Type::Spaces, " "),
-                (Type::Number, ".4e-1"),
-                (Type::Spaces, " "),
-                (Type::Number, ".5E+1"),
-                (Type::Spaces, " "),
-                (Type::Number, ".6e+01"),
-                (Type::Spaces, " "),
-                (Type::Number, ".7E-03"),
-                (Type::Newline, "\n"),
-                (Type::Number, "1.23e1"),
-                (Type::Spaces, " "),
-                (Type::Number, "45.6E-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "78.9e+1"),
-                (Type::Spaces, " "),
-                (Type::Number, "99.9E+01"),
-                (Type::Spaces, " "),
-                (Type::Number, "11.2e-03"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "1e"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "e1"),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "1e+"),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "1e-"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_negative_numbers() {
-        check_segmentation(
-            r#" -0 -1 -01 -001. -1.
- -123. /* comment 1 */ /* comment 2 */
- -.1 -0.1 -00.1 -00.10
- -5e1 -6E-1 -7e+1 -6E+01 -6e-03
- -.3E1 -.4e-1 -.5E+1 -.6e+01 -.7E-03
- -1.23e1 -45.6E-1 -78.9e+1 -99.9E+01 -11.2e-03
- -/**/1
- -. -1e -e1 -1e+ -1e- -1.
-"#,
-            Mode::Auto,
-            &[
-                (Type::Spaces, " "),
-                (Type::Number, "-0"),
-                (Type::Spaces, " "),
-                (Type::Number, "-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-01"),
-                (Type::Spaces, " "),
-                (Type::Number, "-001."),
-                (Type::Spaces, " "),
-                (Type::Number, "-1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Number, "-123"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* comment 1 */"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* comment 2 */"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-0.1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-00.1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-00.10"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Number, "-5e1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-6E-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-7e+1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-6E+01"),
-                (Type::Spaces, " "),
-                (Type::Number, "-6e-03"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.3E1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.4e-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.5E+1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.6e+01"),
-                (Type::Spaces, " "),
-                (Type::Number, "-.7E-03"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Number, "-1.23e1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-45.6E-1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-78.9e+1"),
-                (Type::Spaces, " "),
-                (Type::Number, "-99.9E+01"),
-                (Type::Spaces, " "),
-                (Type::Number, "-11.2e-03"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Punct, "-"),
-                (Type::Comment, "/**/"),
-                (Type::Number, "1"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Punct, "-"),
-                (Type::Punct, "."),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "-1e"),
-                (Type::Spaces, " "),
-                (Type::Punct, "-"),
-                (Type::Identifier, "e1"),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "-1e+"),
-                (Type::Spaces, " "),
-                (Type::ExpectedExponent, "-1e-"),
-                (Type::Spaces, " "),
-                (Type::Number, "-1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_strings() {
-        check_segmentation(
-            r#"'x' "y" 'abc'
-'Don''t' "Can't" 'Won''t'
-"""quoted""" '"quoted"'
-'' ""
-'missing end quote
-"missing double quote
-x"4142" X'5152'
-u'fffd' U"041"
-+ new command
-+ /* comment */ 'string continuation'
-+ /* also a punctuator on blank line
-- 'new command'
-"#,
-            Mode::Auto,
-            &[
-                (Type::QuotedString, "'x'"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "\"y\""),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'abc'"),
-                (Type::Newline, "\n"),
-                (Type::QuotedString, "'Don''t'"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "\"Can't\""),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'Won''t'"),
-                (Type::Newline, "\n"),
-                (Type::QuotedString, "\"\"\"quoted\"\"\""),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'\"quoted\"'"),
-                (Type::Newline, "\n"),
-                (Type::QuotedString, "''"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "\"\""),
-                (Type::Newline, "\n"),
-                (Type::ExpectedQuote, "'missing end quote"),
-                (Type::Newline, "\n"),
-                (Type::ExpectedQuote, "\"missing double quote"),
-                (Type::Newline, "\n"),
-                (Type::HexString, "x\"4142\""),
-                (Type::Spaces, " "),
-                (Type::HexString, "X'5152'"),
-                (Type::Newline, "\n"),
-                (Type::UnicodeString, "u'fffd'"),
-                (Type::Spaces, " "),
-                (Type::UnicodeString, "U\"041\""),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "+"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "new"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::Punct, "+"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* comment */"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'string continuation'"),
-                (Type::Newline, "\n"),
-                (Type::Punct, "+"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/* also a punctuator on blank line"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "-"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'new command'"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_shbang() {
-        check_segmentation(
-            r#"#! /usr/bin/pspp
-title my title.
-#! /usr/bin/pspp
-"#,
-            Mode::Interactive,
-            &[
-                (Type::Shbang, "#! /usr/bin/pspp"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "title"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "my"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "title"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "#"),
-                (Type::MacroId, "!"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "usr"),
-                (Type::Punct, "/"),
-                (Type::Identifier, "bin"),
-                (Type::Punct, "/"),
-                (Type::Identifier, "pspp"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[PromptStyle::First, PromptStyle::First, PromptStyle::Later],
-        );
-    }
-
-    #[test]
-    fn test_comment_command() {
-        check_segmentation(
-            r#"* Comment commands "don't
-have to contain valid tokens.
-
-** Check ambiguity with ** token.
-****************.
-
-comment keyword works too.
-COMM also.
-com is ambiguous with COMPUTE.
-
-   * Comment need not start at left margin.
-
-* Comment ends with blank line
-
-next command.
-
-"#,
-            Mode::Interactive,
-            &[
-                (Type::CommentCommand, "* Comment commands \"don't"),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "have to contain valid tokens"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "** Check ambiguity with ** token"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "****************"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "comment keyword works too"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "COMM also"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "com"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "is"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "ambiguous"),
-                (Type::Spaces, " "),
-                (Type::ReservedWord, "with"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "COMPUTE"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "   "),
-                (
-                    Type::CommentCommand,
-                    "* Comment need not start at left margin",
-                ),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::CommentCommand, "* Comment ends with blank line"),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "next"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Comment,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Comment,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_document_command() {
-        check_segmentation(
-            r#"DOCUMENT one line.
-DOC more
-    than
-        one
-            line.
-docu
-first.paragraph
-isn't parsed as tokens
-
-second paragraph.
-"#,
-            Mode::Interactive,
-            &[
-                (Type::StartDocument, ""),
-                (Type::Document, "DOCUMENT one line."),
-                (Type::EndCommand, ""),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::StartDocument, ""),
-                (Type::Document, "DOC more"),
-                (Type::Newline, "\n"),
-                (Type::Document, "    than"),
-                (Type::Newline, "\n"),
-                (Type::Document, "        one"),
-                (Type::Newline, "\n"),
-                (Type::Document, "            line."),
-                (Type::EndCommand, ""),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::StartDocument, ""),
-                (Type::Document, "docu"),
-                (Type::Newline, "\n"),
-                (Type::Document, "first.paragraph"),
-                (Type::Newline, "\n"),
-                (Type::Document, "isn't parsed as tokens"),
-                (Type::Newline, "\n"),
-                (Type::Document, ""),
-                (Type::Newline, "\n"),
-                (Type::Document, "second paragraph."),
-                (Type::EndCommand, ""),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::First,
-                PromptStyle::Document,
-                PromptStyle::Document,
-                PromptStyle::Document,
-                PromptStyle::First,
-                PromptStyle::Document,
-                PromptStyle::Document,
-                PromptStyle::Document,
-                PromptStyle::Document,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_file_label_command() {
-        check_segmentation(
-            r#"FIL label isn't quoted.
-FILE
-  lab 'is quoted'.
-FILE /*
-/**/  lab not quoted here either
-
-"#,
-            Mode::Interactive,
-            &[
-                (Type::Identifier, "FIL"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "label"),
-                (Type::Spaces, " "),
-                (Type::UnquotedString, "isn't quoted"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "FILE"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "lab"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "'is quoted'"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "FILE"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/*"),
-                (Type::Newline, "\n"),
-                (Type::Comment, "/**/"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "lab"),
-                (Type::Spaces, " "),
-                (Type::UnquotedString, "not quoted here either"),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_begin_data() {
-        check_segmentation(
-            r#"begin data.
-end data.
-
-begin data. /*
-123
-xxx
-end data.
-
-BEG /**/ DAT /*
-5 6 7 /* x
-
-end  data
-end data
-.
-
-begin
- data.
-data
-end data.
-
-begin data "xxx".
-begin data 123.
-not data
-"#,
-            Mode::Interactive,
-            &[
-                (Type::Identifier, "begin"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "begin"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, "/*"),
-                (Type::Newline, "\n"),
-                (Type::InlineData, "123"),
-                (Type::Newline, "\n"),
-                (Type::InlineData, "xxx"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "BEG"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/**/"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "DAT"),
-                (Type::Spaces, " "),
-                (Type::Comment, "/*"),
-                (Type::Newline, "\n"),
-                (Type::InlineData, "5 6 7 /* x"),
-                (Type::Newline, "\n"),
-                (Type::InlineData, ""),
-                (Type::Newline, "\n"),
-                (Type::InlineData, "end  data"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "begin"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::InlineData, "data"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "begin"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::QuotedString, "\"xxx\""),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "begin"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Number, "123"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::ReservedWord, "not"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "data"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Data,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::Data,
-                PromptStyle::Data,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Later,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_do_repeat() {
-        check_segmentation(
-            r#"do repeat x=a b c
-          y=d e f.
-  do repeat a=1 thru 5.
-another command.
-second command
-+ third command.
-end /* x */ /* y */ repeat print.
-end
- repeat.
-do
-  repeat #a=1.
-  inner command.
-end repeat.
-"#,
-            Mode::Interactive,
-            &[
-                (Type::Identifier, "do"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "x"),
-                (Type::Punct, "="),
-                (Type::Identifier, "a"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "b"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "c"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "          "),
-                (Type::Identifier, "y"),
-                (Type::Punct, "="),
-                (Type::Identifier, "d"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "e"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "f"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "  do repeat a=1 thru 5."),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "another command."),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "second command"),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "+ third command."),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "do"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "repeat"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#a"),
-                (Type::Punct, "="),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "  inner command."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_do_repeat_overflow() {
-        const N: usize = 257;
-        let do_repeat: Vec<String> = (0..N)
-            .map(|i| format!("do repeat v{i}={i} thru {}.\n", i + 5))
-            .collect();
-        let end_repeat: Vec<String> = (0..N)
-            .rev()
-            .map(|i| format!("end repeat. /* {i}\n"))
-            .collect();
-
-        let s: String = do_repeat
-            .iter()
-            .chain(end_repeat.iter())
-            .map(|s| s.as_str())
-            .collect();
-        let mut expect_output = vec![
-            (Type::Identifier, "do"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "v0"),
-            (Type::Punct, "="),
-            (Type::Number, "0"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "thru"),
-            (Type::Spaces, " "),
-            (Type::Number, "5"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-        ];
-        for i in 1..N {
-            expect_output.push((Type::DoRepeatCommand, &do_repeat[i].trim_end()));
-            if i >= 255 {
-                expect_output.push((Type::DoRepeatOverflow, ""));
-            }
-            expect_output.push((Type::Newline, "\n"));
-        }
-        for i in 0..254 {
-            expect_output.push((Type::DoRepeatCommand, &end_repeat[i].trim_end()));
-            expect_output.push((Type::Newline, "\n"));
-        }
-        let comments: Vec<String> = (0..(N - 254)).rev().map(|i| format!("/* {i}")).collect();
-        for comment in &comments {
-            expect_output.extend([
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::EndCommand, "."),
-                (Type::Spaces, " "),
-                (Type::Comment, comment),
-                (Type::Newline, "\n"),
-            ]);
-        }
-        expect_output.push((Type::End, ""));
-
-        let expect_prompts: Vec<_> = (0..N * 2 - 3)
-            .map(|_| PromptStyle::DoRepeat)
-            .chain([PromptStyle::First, PromptStyle::First, PromptStyle::First])
-            .collect();
-        check_segmentation(&s, Mode::Interactive, &expect_output, &expect_prompts);
-    }
-
-    #[test]
-    fn test_do_repeat_batch() {
-        check_segmentation(
-            r#"do repeat x=a b c
-          y=d e f
-do repeat a=1 thru 5
-another command
-second command
-+ third command
-end /* x */ /* y */ repeat print
-end
- repeat
-do
-  repeat #a=1
-
-  inner command
-end repeat
-"#,
-            Mode::Batch,
-            &[
-                (Type::Identifier, "do"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "x"),
-                (Type::Punct, "="),
-                (Type::Identifier, "a"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "b"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "c"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "          "),
-                (Type::Identifier, "y"),
-                (Type::Punct, "="),
-                (Type::Identifier, "d"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "e"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "f"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::DoRepeatCommand, "do repeat a=1 thru 5"),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "another command"),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "second command"),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "+ third command"),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::Identifier, "do"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "repeat"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "#a"),
-                (Type::Punct, "="),
-                (Type::Number, "1"),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::DoRepeatCommand, "  inner command"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "end"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "repeat"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::DoRepeat,
-                PromptStyle::DoRepeat,
-                PromptStyle::Later,
-            ],
-        );
-    }
-
-    mod define {
-        use crate::{
-            lex::segment::{Mode, Type},
-            prompt::PromptStyle,
-        };
-
-        use super::check_segmentation;
-
-        #[test]
-        fn test_simple() {
-            check_segmentation(
-                r#"define !macro1()
-var1 var2 var3 "!enddefine"
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "var1 var2 var3 \"!enddefine\""),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_no_newline_after_parentheses() {
-            check_segmentation(
-                r#"define !macro1() var1 var2 var3 /* !enddefine
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::MacroBody, " var1 var2 var3 /* !enddefine"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_no_newline_before_enddefine() {
-            check_segmentation(
-                r#"define !macro1()
-var1 var2 var3!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "var1 var2 var3"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_all_on_one_line() {
-            check_segmentation(
-                r#"define !macro1()var1 var2 var3!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::MacroBody, "var1 var2 var3"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_empty() {
-            check_segmentation(
-                r#"define !macro1()
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_blank_lines() {
-            check_segmentation(
-                r#"define !macro1()
-
-
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, ""),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, ""),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                    PromptStyle::First,
-                ],
-            );
-        }
-
-        #[test]
-        fn test_arguments() {
-            check_segmentation(
-                r#"define !macro1(a(), b(), c())
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Identifier, "a"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Punct, ","),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "b"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Punct, ","),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "c"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_multiline_arguments() {
-            check_segmentation(
-                r#"define !macro1(
-  a(), b(
-  ),
-  c()
-)
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Newline, "\n"),
-                    (Type::Spaces, "  "),
-                    (Type::Identifier, "a"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Punct, ","),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "b"),
-                    (Type::Punct, "("),
-                    (Type::Newline, "\n"),
-                    (Type::Spaces, "  "),
-                    (Type::Punct, ")"),
-                    (Type::Punct, ","),
-                    (Type::Newline, "\n"),
-                    (Type::Spaces, "  "),
-                    (Type::Identifier, "c"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[
-                    PromptStyle::Later,
-                    PromptStyle::Later,
-                    PromptStyle::Later,
-                    PromptStyle::Later,
-                    PromptStyle::Define,
-                    PromptStyle::First,
-                ],
-            );
-        }
-
-        #[test]
-        fn test_arguments_start_on_second_line() {
-            check_segmentation(
-                r#"define !macro1
-(x,y,z
-)
-content 1
-content 2
-!enddefine.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Newline, "\n"),
-                    (Type::Punct, "("),
-                    (Type::Identifier, "x"),
-                    (Type::Punct, ","),
-                    (Type::Identifier, "y"),
-                    (Type::Punct, ","),
-                    (Type::Identifier, "z"),
-                    (Type::Newline, "\n"),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "content 1"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "content 2"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroId, "!enddefine"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[
-                    PromptStyle::Later,
-                    PromptStyle::Later,
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                    PromptStyle::First,
-                ],
-            );
-        }
-
-        #[test]
-        fn test_early_end_of_command_1() {
-            check_segmentation(
-                r#"define !macro1.
-data list /x 1.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "data"),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "list"),
-                    (Type::Spaces, " "),
-                    (Type::Punct, "/"),
-                    (Type::Identifier, "x"),
-                    (Type::Spaces, " "),
-                    (Type::Number, "1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::First, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_early_end_of_command_2() {
-            check_segmentation(
-                r#"define !macro1
-x.
-data list /x 1.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "x"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "data"),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "list"),
-                    (Type::Spaces, " "),
-                    (Type::Punct, "/"),
-                    (Type::Identifier, "x"),
-                    (Type::Spaces, " "),
-                    (Type::Number, "1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Later, PromptStyle::First, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_early_end_of_command_3() {
-            check_segmentation(
-                r#"define !macro1(.
-x.
-data list /x 1.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "x"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "data"),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "list"),
-                    (Type::Spaces, " "),
-                    (Type::Punct, "/"),
-                    (Type::Identifier, "x"),
-                    (Type::Spaces, " "),
-                    (Type::Number, "1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::First, PromptStyle::First, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_early_end_of_command_4() {
-            // Notice the command terminator at the end of the `DEFINE` command,
-            // which should not be there and ends it early.
-            check_segmentation(
-                r#"define !macro1.
-data list /x 1.
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::Identifier, "data"),
-                    (Type::Spaces, " "),
-                    (Type::Identifier, "list"),
-                    (Type::Spaces, " "),
-                    (Type::Punct, "/"),
-                    (Type::Identifier, "x"),
-                    (Type::Spaces, " "),
-                    (Type::Number, "1"),
-                    (Type::EndCommand, "."),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::First, PromptStyle::First],
-            );
-        }
-
-        #[test]
-        fn test_missing_enddefine() {
-            check_segmentation(
-                r#"define !macro1()
-content line 1
-content line 2
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "content line 1"),
-                    (Type::Newline, "\n"),
-                    (Type::MacroBody, "content line 2"),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                    PromptStyle::Define,
-                ],
-            );
-        }
-
-        #[test]
-        fn test_missing_enddefine_2() {
-            check_segmentation(
-                r#"define !macro1()
-"#,
-                Mode::Interactive,
-                &[
-                    (Type::Identifier, "define"),
-                    (Type::Spaces, " "),
-                    (Type::MacroName, "!macro1"),
-                    (Type::Punct, "("),
-                    (Type::Punct, ")"),
-                    (Type::Newline, "\n"),
-                    (Type::End, ""),
-                ],
-                &[PromptStyle::Define],
-            );
-        }
-    }
-
-    #[test]
-    fn test_batch_mode() {
-        check_segmentation(
-            r#"first command
-     another line of first command
-+  second command
-third command
-
-fourth command.
-   fifth command.
-"#,
-            Mode::Batch,
-            &[
-                (Type::Identifier, "first"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "     "),
-                (Type::Identifier, "another"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "line"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "of"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "first"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "+"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "second"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::Identifier, "third"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "fourth"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "   "),
-                (Type::Identifier, "fifth"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-            ],
-        );
-    }
-
-    #[test]
-    fn test_auto_mode() {
-        check_segmentation(
-            r#"command
-     another line of command
-2sls
-+  another command
-another line of second command
-data list /x 1
-aggregate.
-print eject.
-twostep cluster
-
-
-fourth command.
-   fifth command.
-"#,
-            Mode::Auto,
-            &[
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "     "),
-                (Type::Identifier, "another"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "line"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "of"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::Number, "2"),
-                (Type::Identifier, "sls"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, "+"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "another"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "another"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "line"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "of"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "second"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "list"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::Newline, "\n"),
-                (Type::StartCommand, ""),
-                (Type::Identifier, "aggregate"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "print"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "eject"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "twostep"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "cluster"),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::SeparateCommands, ""),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "fourth"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "   "),
-                (Type::Identifier, "fifth"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "command"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
-            ],
-            &[
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::Later,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-                PromptStyle::First,
-            ],
-        );
-    }
-}
diff --git a/rust/src/lex/segment/mod.rs b/rust/src/lex/segment/mod.rs

new file mode 100644 (file)

index 0000000..6bf30ba
--- /dev/null
+++ b/rust/src/lex/segment/mod.rs
@@ -0,0 +1,1322 @@
+//! Syntax segmentation.
+//!
+//! PSPP divides traditional "lexical analysis" or "tokenization" into two
+//! phases: a lower-level phase called "segmentation" and a higher-level phase
+//! called "scanning".  This module implements the segmentation phase.
+//! [`super::scan`] contains declarations for the scanning phase.
+//!
+//! Segmentation accepts a stream of UTF-8 bytes as input.  It outputs a label
+//! (a segment type) for each byte or contiguous sequence of bytes in the input.
+//! It also, in a few corner cases, outputs zero-width segments that label the
+//! boundary between a pair of bytes in the input.
+//!
+//! Some segment types correspond directly to tokens; for example, an
+//! "identifier" segment (SEG_IDENTIFIER) becomes an identifier token (T_ID)
+//! later in lexical analysis.  Other segments contribute to tokens but do not
+//! correspond directly; for example, multiple quoted string segments
+//! (SEG_QUOTED_STRING) separated by spaces (SEG_SPACES) and "+" punctuators
+//! (SEG_PUNCT) may be combined to form a single string token (T_STRING).  Still
+//! other segments are ignored (e.g. SEG_SPACES) or trigger special behavior
+//! such as error messages later in tokenization (e.g. SEG_EXPECTED_QUOTE).
+
+use crate::{
+    identifier::{id_match, id_match_n, is_reserved_word, IdentifierChar},
+    prompt::PromptStyle,
+};
+use bitflags::bitflags;
+
+use super::command_name::{command_match, COMMAND_NAMES};
+
+/// Segmentation mode.
+///
+/// PSPP syntax is written in one of two modes which are broadly defined as
+/// follows:
+///
+/// - In interactive mode, commands end with a period at the end of the line
+///   or with a blank line.
+///
+/// - In batch mode, the second and subsequent lines of a command are indented
+///   from the left margin.
+///
+/// The segmenter can also try to automatically detect the mode in use, using a
+/// heuristic that is usually correct.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
+pub enum Mode {
+    /// Try to interpret input correctly regardless of whether it is written
+    /// for interactive or batch mode.
+    #[default]
+    Auto,
+
+    /// Interactive syntax mode.
+    Interactive,
+
+    /// Batch syntax mode.
+    Batch,
+}
+
+/// The type of a segment.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Type {
+    Number,
+    QuotedString,
+    HexString,
+    UnicodeString,
+    UnquotedString,
+    ReservedWord,
+    Identifier,
+    Punct,
+    Shbang,
+    Spaces,
+    Comment,
+    Newline,
+    CommentCommand,
+    DoRepeatCommand,
+    DoRepeatOverflow,
+    InlineData,
+    MacroId,
+    MacroName,
+    MacroBody,
+    StartDocument,
+    Document,
+    StartCommand,
+    SeparateCommands,
+    EndCommand,
+    End,
+    ExpectedQuote,
+    ExpectedExponent,
+    UnexpectedChar,
+}
+
+bitflags! {
+    #[derive(Copy, Clone, Debug)]
+    pub struct Substate: u8 {
+        const START_OF_LINE = 1;
+        const START_OF_COMMAND = 2;
+    }
+}
+
+#[derive(Copy, Clone)]
+pub struct Segmenter {
+    state: (State, Substate),
+    nest: u8,
+    mode: Mode,
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct Incomplete;
+
+impl Segmenter {
+    /// Returns a segmenter with the given syntax `mode`.
+    ///
+    /// If `is_snippet` is false, then the segmenter will parse as if it's being
+    /// given a whole file.  This means, for example, that it will interpret `-`
+    /// or `+` at the beginning of the syntax as a separator between commands
+    /// (since `-` or `+` at the beginning of a line has this meaning).
+    ///
+    /// If `is_snippet` is true, then the segmenter will parse as if it's being
+    /// given an isolated piece of syntax.  This means that, for example, that
+    /// it will interpret `-` or `+` at the beginning of the syntax as an
+    /// operator token or (if followed by a digit) as part of a number.
+    pub fn new(mode: Mode, is_snippet: bool) -> Self {
+        Self {
+            state: if is_snippet {
+                (State::General, Substate::empty())
+            } else {
+                (State::Shbang, Substate::empty())
+            },
+            mode,
+            nest: 0,
+        }
+    }
+
+    pub fn mode(&self) -> Mode {
+        self.mode
+    }
+
+    fn start_of_line(&self) -> bool {
+        self.state.1.contains(Substate::START_OF_LINE)
+    }
+
+    fn start_of_command(&self) -> bool {
+        self.state.1.contains(Substate::START_OF_COMMAND)
+    }
+
+    /// Returns the style of command prompt to display to an interactive user
+    /// for input in the current state..  The return value is most accurate in
+    /// mode `Mode::Interactive` and at the beginning of a line (that is, if
+    /// [`Segmenter::push`] consumed as much as possible of the input up to a
+    /// new-line).
+    pub fn prompt(&self) -> PromptStyle {
+        match self.state.0 {
+            State::Shbang => PromptStyle::First,
+            State::General => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::Comment1 | State::Comment2 => PromptStyle::Comment,
+            State::Document1 | State::Document2 => PromptStyle::Document,
+            State::Document3 => PromptStyle::First,
+            State::FileLabel1 => PromptStyle::Later,
+            State::FileLabel2 | State::FileLabel3 => PromptStyle::First,
+            State::DoRepeat1 | State::DoRepeat2 => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::DoRepeat3 => PromptStyle::DoRepeat,
+            State::DoRepeat4 => PromptStyle::DoRepeat,
+            State::Define1 | State::Define2 | State::Define3 => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::Define4 | State::Define5 | State::Define6 => PromptStyle::Define,
+            State::BeginData1 => PromptStyle::First,
+            State::BeginData2 => PromptStyle::Later,
+            State::BeginData3 | State::BeginData4 => PromptStyle::Data,
+        }
+    }
+
+    /// Attempts to label a prefix of the remaining input with a segment type.
+    /// The caller supplies a prefix of the remaining input as `input`.  If
+    /// `eof` is true, then `input` is the entire (remainder) of the input; if
+    /// `eof` is false, then further input is potentially available.
+    ///
+    /// The input may contain '\n' or '\r\n' line ends in any combination.
+    ///
+    /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes
+    /// in the segment at the beginning of `input` (a number in
+    /// `0..=input.len()`) and the type of that segment.  The next call should
+    /// not include those bytes in `input`, because they have (figuratively)
+    /// been consumed by the segmenter.
+    ///
+    /// Segments can have zero length, including segment types `Type::End`,
+    /// `Type::SeparateCommands`, `Type::StartDocument`, `Type::InlineData`, and
+    /// `Type::Spaces`.
+    ///
+    /// Failure occurs only if the segment type of the bytes in `input` cannot
+    /// yet be determined.  In this case, this function returns `Err(Incomplete)`.  If
+    /// more input is available, the caller should obtain some more, then call
+    /// again with a longer `input`.  If this is not enough, the process might
+    /// need to repeat again and again.  If input is exhausted, then the caller
+    /// may call again setting `eof` to true.  This function will never return
+    /// `Err(Incomplete)` when `eof` is true.
+    ///
+    /// The caller must not, in a sequence of calls, supply contradictory input.
+    /// That is, bytes provided as part of `input` in one call, but not
+    /// consumed, must not be provided with *different* values on subsequent
+    /// calls.  This is because the function must often make decisions based on
+    /// looking ahead beyond the bytes that it consumes.
+    pub fn push<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+        if input.is_empty() {
+            if eof {
+                return Ok((input, Type::End));
+            } else {
+                return Err(Incomplete);
+            };
+        }
+
+        match self.state.0 {
+            State::Shbang => return self.parse_shbang(input, eof),
+            State::General => {
+                if self.start_of_line() {
+                    self.parse_start_of_line(input, eof)
+                } else {
+                    self.parse_mid_line(input, eof)
+                }
+            }
+            State::Comment1 => self.parse_comment_1(input, eof),
+            State::Comment2 => self.parse_comment_2(input, eof),
+            State::Document1 => self.parse_document_1(input, eof),
+            State::Document2 => self.parse_document_2(input, eof),
+            State::Document3 => self.parse_document_3(input, eof),
+            State::FileLabel1 => self.parse_file_label_1(input, eof),
+            State::FileLabel2 => self.parse_file_label_2(input, eof),
+            State::FileLabel3 => self.parse_file_label_3(input, eof),
+            State::DoRepeat1 => self.parse_do_repeat_1(input, eof),
+            State::DoRepeat2 => self.parse_do_repeat_2(input, eof),
+            State::DoRepeat3 => self.parse_do_repeat_3(input, eof),
+            State::DoRepeat4 => self.parse_do_repeat_4(input),
+            State::Define1 => self.parse_define_1_2(input, eof),
+            State::Define2 => self.parse_define_1_2(input, eof),
+            State::Define3 => self.parse_define_3(input, eof),
+            State::Define4 => self.parse_define_4_5(input, eof),
+            State::Define5 => self.parse_define_4_5(input, eof),
+            State::Define6 => self.parse_define_6(input, eof),
+            State::BeginData1 => self.parse_begin_data_1(input, eof),
+            State::BeginData2 => self.parse_begin_data_2(input, eof),
+            State::BeginData3 => self.parse_begin_data_3(input, eof),
+            State::BeginData4 => self.parse_begin_data_4(input, eof),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum State {
+    Shbang,
+    General,
+    Comment1,
+    Comment2,
+    Document1,
+    Document2,
+    Document3,
+    FileLabel1,
+    FileLabel2,
+    FileLabel3,
+    DoRepeat1,
+    DoRepeat2,
+    DoRepeat3,
+    DoRepeat4,
+    Define1,
+    Define2,
+    Define3,
+    Define4,
+    Define5,
+    Define6,
+    BeginData1,
+    BeginData2,
+    BeginData3,
+    BeginData4,
+}
+
+fn take(input: &str, eof: bool) -> Result<(Option<char>, &str), Incomplete> {
+    let mut iter = input.chars();
+    match iter.next() {
+        None if !eof => Err(Incomplete),
+        c => Ok((c, iter.as_str())),
+    }
+}
+
+fn skip_comment(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '\n' | '\r' if is_end_of_line(input, eof)? => return Ok(input),
+            '*' => {
+                if let (Some('/'), rest) = take(rest, eof)? {
+                    return Ok(rest);
+                }
+            }
+            _ => (),
+        };
+        input = rest;
+    }
+}
+
+fn skip_matching<F>(f: F, input: &str, eof: bool) -> Result<&str, Incomplete>
+where
+    F: Fn(char) -> bool,
+{
+    let input = input.trim_start_matches(f);
+    if input.is_empty() && !eof {
+        Err(Incomplete)
+    } else {
+        Ok(input)
+    }
+}
+
+fn match_char<F>(f: F, input: &str, eof: bool) -> Result<Option<&str>, Incomplete>
+where
+    F: Fn(char) -> bool,
+{
+    if let (Some(c), rest) = take(input, eof)? {
+        if f(c) {
+            return Ok(Some(rest));
+        }
+    }
+    Ok(None)
+}
+
+fn skip_spaces(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
+            c if c.is_whitespace() => (),
+            _ => return Ok(input),
+        }
+        input = rest;
+    }
+}
+
+fn skip_digits(input: &str, eof: bool) -> Result<&str, Incomplete> {
+    skip_matching(|c| c.is_ascii_digit(), input, eof)
+}
+
+fn skip_spaces_and_comments(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '/' => {
+                let (c, rest2) = take(rest, eof)?;
+                match c {
+                    Some('*') => input = skip_comment(rest2, eof)?,
+                    Some(_) | None => return Ok(rest),
+                }
+            }
+            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
+            c if c.is_whitespace() => input = rest,
+            _ => return Ok(input),
+        };
+    }
+}
+
+fn is_start_of_string(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let (Some(c), _rest) = take(input, eof)? else {
+        return Ok(false);
+    };
+    match c {
+        'x' | 'X' | 'u' | 'U' => Ok({
+            let (c, _rest) = take(input, eof)?;
+            c == Some('\'') || c == Some('"')
+        }),
+        '\'' | '"' => Ok(true),
+        '\n' | '\r' if is_end_of_line(input, eof)? => Ok(true),
+        _ => Ok(false),
+    }
+}
+
+fn is_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let (Some(c), rest) = take(input, eof)? else {
+        return Ok(true);
+    };
+    Ok(match c {
+        '\n' => true,
+        '\r' => take(rest, eof)?.0 == Some('\n'),
+        _ => false,
+    })
+}
+
+fn at_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    is_end_of_line(skip_spaces_and_comments(input, eof)?, eof)
+}
+
+fn first(s: &str) -> char {
+    s.chars().next().unwrap()
+}
+fn get_command_name_candidates(target: &str) -> &[&'static str] {
+    if target.is_empty() {
+        return &[];
+    }
+    let target_first = first(target).to_ascii_uppercase();
+    let low = COMMAND_NAMES.partition_point(|s| first(s) < target_first);
+    let high = COMMAND_NAMES.partition_point(|s| first(s) <= target_first);
+    &COMMAND_NAMES[low..high]
+}
+
+fn detect_command_name(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let command_name = input
+        .split(|c: char| {
+            !((c.is_whitespace() && c != '\n') || (c.may_continue_id() && c != '.') || c == '-')
+        })
+        .next()
+        .unwrap();
+    if !eof && command_name.len() == input.len() {
+        return Err(Incomplete);
+    }
+    let command_name = command_name.trim_end_matches(|c: char| c.is_whitespace() || c == '.');
+    for command in get_command_name_candidates(command_name) {
+        if let Some(m) = command_match(command, command_name) {
+            if m.missing_words <= 0 {
+                return Ok(true);
+            }
+        }
+    }
+    Ok(false)
+}
+
+impl Segmenter {
+    fn parse_shbang<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        if let (Some('#'), rest) = take(input, eof)? {
+            if let (Some('!'), rest) = take(rest, eof)? {
+                let rest = self.parse_full_line(rest, eof)?;
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok((rest, Type::Shbang));
+            }
+        }
+
+        self.state = (
+            State::General,
+            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+        );
+        self.push(input, eof)
+    }
+    fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
+        match self.mode {
+            Mode::Auto => detect_command_name(input, eof),
+            Mode::Interactive => Ok(false),
+            Mode::Batch => Ok(true),
+        }
+    }
+    fn parse_start_of_line<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        debug_assert_eq!(self.state.0, State::General);
+        debug_assert!(self.start_of_line());
+        debug_assert!(!input.is_empty());
+
+        let (Some(c), rest) = take(input, eof).unwrap() else {
+            unreachable!()
+        };
+        match c {
+            '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
+                // This  `+` is punctuation that may separate pieces of a string.
+                self.state = (State::General, Substate::empty());
+                return Ok((rest, Type::Punct));
+            }
+            '+' | '-' | '.' => {
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok((rest, Type::StartCommand));
+            }
+            _ if c.is_whitespace() => {
+                if at_end_of_line(input, eof)? {
+                    self.state = (State::General, Substate::START_OF_COMMAND);
+                    return Ok((input, Type::SeparateCommands));
+                }
+            }
+            _ => {
+                if self.at_command_start(input, eof)?
+                    && !self.state.1.contains(Substate::START_OF_COMMAND)
+                {
+                    self.state = (State::General, Substate::START_OF_COMMAND);
+                    return Ok((input, Type::StartCommand));
+                }
+            }
+        }
+        self.state.1 = Substate::START_OF_COMMAND;
+        self.parse_mid_line(input, eof)
+    }
+    fn parse_mid_line<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        debug_assert!(self.state.0 == State::General);
+        debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
+        let (Some(c), rest) = take(input, eof)? else {
+            unreachable!()
+        };
+        match c {
+            '\r' | '\n' if is_end_of_line(input, eof)? => {
+                self.state.1 |= Substate::START_OF_LINE;
+                Ok((
+                    self.parse_newline(input, eof).unwrap().unwrap(),
+                    Type::Newline,
+                ))
+            }
+            '/' => {
+                if let (Some('*'), rest) = take(rest, eof)? {
+                    let rest = skip_comment(rest, eof)?;
+                    return Ok((rest, Type::Comment));
+                } else {
+                    self.state.1 = Substate::empty();
+                    return Ok((rest, Type::Punct));
+                }
+            }
+            '-' => {
+                let (c, rest2) = take(skip_spaces(rest, eof)?, eof)?;
+                match c {
+                    Some(c) if c.is_ascii_digit() => {
+                        return self.parse_number(rest, eof);
+                    }
+                    Some('.') => {
+                        if let (Some(c), _rest) = take(rest2, eof)? {
+                            if c.is_ascii_digit() {
+                                return self.parse_number(rest, eof);
+                            }
+                        }
+                    }
+                    None | Some(_) => (),
+                }
+                self.state.1 = Substate::empty();
+                return Ok((rest, Type::Punct));
+            }
+            '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
+                self.state.1 = Substate::empty();
+                return Ok((rest, Type::Punct));
+            }
+            '*' => {
+                if self.state.1.contains(Substate::START_OF_COMMAND) {
+                    self.state.0 = State::Comment1;
+                    self.parse_comment_1(input, eof)
+                } else {
+                    self.parse_digraph(&['*'], rest, eof)
+                }
+            }
+            '<' => self.parse_digraph(&['=', '>'], rest, eof),
+            '>' => self.parse_digraph(&['='], rest, eof),
+            '~' => self.parse_digraph(&['='], rest, eof),
+            '.' if at_end_of_line(rest, eof)? => {
+                self.state.1 = Substate::START_OF_COMMAND;
+                Ok((rest, Type::EndCommand))
+            }
+            '.' => match take(rest, eof)? {
+                (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
+                _ => Ok((rest, Type::Punct)),
+            },
+            '0'..='9' => self.parse_number(input, eof),
+            'u' | 'U' => self.maybe_parse_string(Type::UnicodeString, (input, rest), eof),
+            'x' | 'X' => self.maybe_parse_string(Type::HexString, (input, rest), eof),
+            '\'' | '"' => self.parse_string(Type::QuotedString, c, rest, eof),
+            '!' => {
+                let (c, rest2) = take(rest, eof)?;
+                match c {
+                    Some('*') => Ok((rest2, Type::MacroId)),
+                    Some(_) => self.parse_id(input, eof),
+                    None => Ok((rest, Type::Punct)),
+                }
+            }
+            c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Type::Spaces)),
+            c if c.may_start_id() => self.parse_id(input, eof),
+            '!'..='~' if c != '\\' && c != '^' => {
+                self.state.1 = Substate::empty();
+                Ok((rest, Type::Punct))
+            }
+            _ => {
+                self.state.1 = Substate::empty();
+                Ok((rest, Type::UnexpectedChar))
+            }
+        }
+    }
+    fn parse_string<'a>(
+        &mut self,
+        type_: Type,
+        quote: char,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        println!("{quote:?} {input:?}");
+        while let (Some(c), rest) = take(input, eof)? {
+            match c {
+                _ if c == quote => {
+                    let (c, rest2) = take(rest, eof)?;
+                    if c != Some(quote) {
+                        self.state.1 = Substate::empty();
+                        return Ok((rest, type_));
+                    }
+                    input = rest2;
+                }
+                '\r' | '\n' if is_end_of_line(input, eof)? => break,
+                _ => input = rest,
+            }
+        }
+        self.state.1 = Substate::empty();
+        Ok((input, Type::ExpectedQuote))
+    }
+    fn maybe_parse_string<'a>(
+        &mut self,
+        type_: Type,
+        input: (&'a str, &'a str),
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        match take(input.1, eof)? {
+            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(type_, c, rest, eof),
+            _ => self.parse_id(input.0, eof),
+        }
+    }
+    fn next_id_in_command<'a>(
+        &self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, &'a str), Incomplete> {
+        let mut sub = Segmenter::new(self.mode, true);
+        loop {
+            let (rest, type_) = sub.push(input, eof)?;
+            match type_ {
+                Type::Shbang | Type::Spaces | Type::Comment | Type::Newline => (),
+
+                Type::Identifier => return Ok((&input[..input.len() - rest.len()], rest)),
+
+                Type::Number
+                | Type::QuotedString
+                | Type::HexString
+                | Type::UnicodeString
+                | Type::UnquotedString
+                | Type::ReservedWord
+                | Type::Punct
+                | Type::CommentCommand
+                | Type::DoRepeatCommand
+                | Type::DoRepeatOverflow
+                | Type::InlineData
+                | Type::MacroId
+                | Type::MacroName
+                | Type::MacroBody
+                | Type::StartDocument
+                | Type::Document
+                | Type::StartCommand
+                | Type::SeparateCommands
+                | Type::EndCommand
+                | Type::End
+                | Type::ExpectedQuote
+                | Type::ExpectedExponent
+                | Type::UnexpectedChar => return Ok(("", rest)),
+            }
+            input = rest;
+        }
+    }
+    fn parse_id<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+        let (Some(_), mut end) = take(input, eof).unwrap() else {
+            unreachable!()
+        };
+        while let (Some(c), rest) = take(end, eof)? {
+            if !c.may_continue_id() {
+                break;
+            };
+            end = rest;
+        }
+        let identifier = &input[..input.len() - end.len()];
+        let identifier = match identifier.strip_suffix('.') {
+            Some(without_dot) if at_end_of_line(end, eof)? => without_dot,
+            _ => identifier,
+        };
+        let rest = &input[identifier.len()..];
+
+        if self.state.1.contains(Substate::START_OF_COMMAND) {
+            if id_match_n("COMMENT", identifier, 4) {
+                self.state.0 = State::Comment1;
+                return self.parse_comment_1(input, eof);
+            } else if id_match("DOCUMENT", identifier) {
+                self.state.0 = State::Document1;
+                return Ok((input, Type::StartDocument));
+            } else if id_match_n("DEFINE", identifier, 6) {
+                self.state.0 = State::Define1;
+            } else if id_match("FILE", identifier) {
+                if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
+                    self.state = (State::FileLabel1, Substate::empty());
+                    return Ok((rest, Type::Identifier));
+                }
+            } else if id_match("DO", identifier) {
+                if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
+                    self.state = (State::DoRepeat1, Substate::empty());
+                    return Ok((rest, Type::Identifier));
+                }
+            } else if id_match("BEGIN", identifier) {
+                let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
+                if id_match("DATA", next_id) {
+                    let rest2 = skip_spaces_and_comments(rest2, eof)?;
+                    let rest2 = if let Some(s) = rest2.strip_prefix('.') {
+                        skip_spaces_and_comments(s, eof)?
+                    } else {
+                        rest2
+                    };
+                    if is_end_of_line(rest2, eof)? {
+                        let s = &input[..input.len() - rest2.len()];
+                        self.state = (
+                            if s.contains('\n') {
+                                State::BeginData1
+                            } else {
+                                State::BeginData2
+                            },
+                            Substate::empty(),
+                        );
+                        return Ok((rest, Type::Identifier));
+                    }
+                }
+            }
+        }
+
+        self.state.1 = Substate::empty();
+        let type_ = if is_reserved_word(identifier) {
+            Type::ReservedWord
+        } else if identifier.starts_with('!') {
+            Type::MacroId
+        } else {
+            Type::Identifier
+        };
+        Ok((rest, type_))
+    }
+    fn parse_digraph<'a>(
+        &mut self,
+        seconds: &[char],
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (c, rest) = take(input, eof)?;
+        self.state.1 = Substate::empty();
+        Ok((
+            match c {
+                Some(c) if seconds.contains(&c) => rest,
+                _ => input,
+            },
+            Type::Punct,
+        ))
+    }
+    fn parse_number<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let mut input = skip_digits(input, eof)?;
+        if let Some(rest) = match_char(|c| c == '.', input, eof)? {
+            let rest2 = skip_digits(rest, eof)?;
+            if rest2.len() < rest.len() || !at_end_of_line(rest2, eof)? {
+                input = rest2;
+            }
+        };
+        if let Some(rest) = match_char(|c| c == 'e' || c == 'E', input, eof)? {
+            let rest = match_char(|c| c == '+' || c == '-', rest, eof)?.unwrap_or(rest);
+            let rest2 = skip_digits(rest, eof)?;
+            if rest2.len() == rest.len() {
+                self.state.1 = Substate::empty();
+                return Ok((rest, Type::ExpectedExponent));
+            }
+            input = rest2;
+        }
+        self.state.1 = Substate::empty();
+        Ok((input, Type::Number))
+    }
+    fn parse_comment_1<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        enum CommentState<'a> {
+            Blank,
+            NotBlank,
+            Period(&'a str),
+        }
+        let mut state = CommentState::Blank;
+        loop {
+            let (Some(c), rest) = take(input, eof)? else {
+                // End of file.
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok((input, Type::SeparateCommands));
+            };
+            match c {
+                '.' => state = CommentState::Period(input),
+                '\n' | '\r' if is_end_of_line(input, eof)? => {
+                    match state {
+                        CommentState::Blank => {
+                            // Blank line ends comment command.
+                            self.state = (State::General, Substate::START_OF_COMMAND);
+                            return Ok((input, Type::SeparateCommands));
+                        }
+                        CommentState::Period(period) => {
+                            // '.' at end of line ends comment command.
+                            self.state = (State::General, Substate::empty());
+                            return Ok((period, Type::CommentCommand));
+                        }
+                        CommentState::NotBlank => {
+                            // Comment continues onto next line.
+                            self.state = (State::Comment2, Substate::empty());
+                            return Ok((input, Type::CommentCommand));
+                        }
+                    }
+                }
+                c if c.is_whitespace() => (),
+                _ => state = CommentState::NotBlank,
+            }
+            input = rest;
+        }
+    }
+    fn parse_comment_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+
+        let new_command = match take(rest, eof)?.0 {
+            Some('+') | Some('-') | Some('.') => true,
+            Some(c) if !c.is_whitespace() => self.at_command_start(rest, eof)?,
+            None | Some(_) => false,
+        };
+        if new_command {
+            self.state = (
+                State::General,
+                Substate::START_OF_LINE | Substate::START_OF_COMMAND,
+            );
+        } else {
+            self.state.0 = State::Comment1;
+        }
+        Ok((rest, Type::Newline))
+    }
+    fn parse_document_1<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let mut end_cmd = false;
+        loop {
+            let (Some(c), rest) = take(input, eof)? else {
+                self.state.0 = State::Document3;
+                return Ok((input, Type::Document));
+            };
+            match c {
+                '.' => end_cmd = true,
+                '\n' | '\r' if is_end_of_line(input, eof)? => {
+                    self.state.0 = if end_cmd {
+                        State::Document3
+                    } else {
+                        State::Document2
+                    };
+                    return Ok((input, Type::Document));
+                }
+                c if !c.is_whitespace() => end_cmd = false,
+                _ => (),
+            }
+            input = rest;
+        }
+    }
+    fn parse_document_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state.0 = State::Document1;
+        Ok((rest, Type::Newline))
+    }
+    fn parse_document_3<'a>(
+        &mut self,
+        input: &'a str,
+        _eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        self.state = (
+            State::General,
+            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+        );
+        Ok((input, Type::EndCommand))
+    }
+    fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
+        let input = skip_spaces_and_comments(input, eof)?;
+        match take(input, eof)?.0 {
+            Some('\'') | Some('"') | Some('\n') => Ok(true),
+            _ => Ok(false),
+        }
+    }
+    fn parse_file_label_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let mut sub = Segmenter {
+            state: (State::General, self.state.1),
+            ..*self
+        };
+        let (rest, type_) = sub.push(input, eof)?;
+        if type_ == Type::Identifier {
+            let id = &input[..input.len() - rest.len()];
+            debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
+            if Self::quoted_file_label(rest, eof)? {
+                *self = sub;
+            } else {
+                self.state.0 = State::FileLabel2;
+            }
+        } else {
+            self.state.1 = sub.state.1;
+        }
+        Ok((rest, type_))
+    }
+    fn parse_file_label_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let input = skip_spaces(input, eof)?;
+        self.state.0 = State::FileLabel3;
+        Ok((input, Type::Spaces))
+    }
+    fn parse_file_label_3<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let mut end_cmd = None;
+        loop {
+            let (c, rest) = take(input, eof)?;
+            match c {
+                None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
+                    self.state = (State::General, Substate::empty());
+                    return Ok((end_cmd.unwrap_or(input), Type::UnquotedString));
+                }
+                None => unreachable!(),
+                Some('.') => end_cmd = Some(input),
+                Some(c) if !c.is_whitespace() => end_cmd = None,
+                Some(_) => (),
+            }
+            input = rest;
+        }
+    }
+    fn subparse<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+        let mut sub = Segmenter {
+            mode: self.mode,
+            state: (State::General, self.state.1),
+            nest: 0,
+        };
+        let result = sub.push(input, eof)?;
+        self.state.1 = sub.state.1;
+        Ok(result)
+    }
+    /// We are segmenting a `DO REPEAT` command, currently reading the syntax
+    /// that defines the stand-in variables (the head) before the lines of
+    /// syntax to be repeated (the body).
+    fn parse_do_repeat_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        if type_ == Type::SeparateCommands {
+            // We reached a blank line that separates the head from the body.
+            self.state.0 = State::DoRepeat2;
+        } else if type_ == Type::EndCommand || type_ == Type::StartCommand {
+            // We reached the body.
+            self.state.0 = State::DoRepeat3;
+            self.nest = 1;
+        }
+        Ok((rest, type_))
+    }
+    /// We are segmenting a `DO REPEAT` command, currently reading a blank line
+    /// that separates the head from the body.
+    fn parse_do_repeat_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        if type_ == Type::Newline {
+            // We reached the body.
+            self.state.0 = State::DoRepeat3;
+            self.nest = 1;
+        }
+        Ok((rest, type_))
+    }
+    fn parse_newline<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<&'a str>, Incomplete> {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(None);
+        };
+        match c {
+            '\n' => Ok(Some(rest)),
+            '\r' => {
+                if let (Some('\n'), rest) = take(rest, eof)? {
+                    Ok(Some(rest))
+                } else {
+                    Ok(None)
+                }
+            }
+            _ => Ok(None),
+        }
+    }
+
+    fn parse_full_line<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<&'a str, Incomplete> {
+        loop {
+            if is_end_of_line(input, eof)? {
+                return Ok(input);
+            }
+            input = take(input, eof).unwrap().1;
+        }
+    }
+    fn check_repeat_command<'a>(&mut self, input: &'a str, eof: bool) -> Result<isize, Incomplete> {
+        let input = input.strip_prefix(&['-', '+']).unwrap_or(input);
+        let (id1, input) = self.next_id_in_command(input, eof)?;
+        if id_match("DO", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0) {
+            Ok(1)
+        } else if id_match("END", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0)
+        {
+            Ok(-1)
+        } else {
+            Ok(0)
+        }
+    }
+    /// We are in the body of `DO REPEAT`, segmenting the lines of syntax that
+    /// are to be repeated.  Report each line of syntax as a single
+    /// [`Type::DoRepeatCommand`].
+    ///
+    /// `DO REPEAT` can be nested, so we look for `DO REPEAT...END REPEAT`
+    /// blocks inside the lines we're segmenting.  `self.nest` counts the
+    /// nesting level, starting at 1.
+    fn parse_do_repeat_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        if let Some(rest) = self.parse_newline(input, eof)? {
+            return Ok((rest, Type::Newline));
+        }
+        let rest = self.parse_full_line(input, eof)?;
+        let direction = self.check_repeat_command(input, eof)?;
+        if direction > 0 {
+            if let Some(nest) = self.nest.checked_add(1) {
+                self.nest = nest;
+            } else {
+                self.state.0 = State::DoRepeat4;
+            }
+        } else if direction < 0 {
+            self.nest -= 1;
+            if self.nest == 0 {
+                // Nesting level dropped to 0, so we've finished reading the `DO
+                // REPEAT` body.
+                self.state = (
+                    State::General,
+                    Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+                );
+                return self.push(input, eof);
+            }
+        }
+        return Ok((rest, Type::DoRepeatCommand));
+    }
+    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Type), Incomplete> {
+        self.state.0 = State::DoRepeat3;
+        Ok((input, Type::DoRepeatOverflow))
+    }
+    /// We are segmenting a `DEFINE` command, which consists of:
+    ///
+    ///   - The `DEFINE` keyword.
+    ///
+    ///   - An identifier.  We transform this into `Type::MacroName` instead of
+    ///     `Type::Identifier` or `Type::MacroId` because this identifier must
+    ///     never be macro-expanded.
+    ///
+    ///   - Anything but `(`.
+    ///
+    ///   - `(` followed by a sequence of tokens possibly including balanced
+    ///     parentheses up to a final `)`.
+    ///
+    ///   - A sequence of any number of lines, one string per line, ending with
+    ///     `!ENDDEFINE`.  The first line is usually blank (that is, a newline
+    ///     follows the `(`).  The last line usually just has `!ENDDEFINE.` on
+    ///     it, but it can start with other tokens.  The whole
+    ///     DEFINE...!ENDDEFINE can be on a single line, even.
+    fn parse_define_1_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        match type_ {
+            Type::Identifier | Type::MacroId if self.state.0 == State::Define1 => {
+                self.state.0 = State::Define2;
+                return Ok((rest, Type::MacroName));
+            }
+            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
+                // The DEFINE command is malformed because we reached its end
+                // without ever hitting a `(` token.  Transition back to general
+                // parsing.
+                self.state.0 = State::General;
+            }
+            Type::Punct if input.starts_with('(') => {
+                self.state.0 = State::Define3;
+                self.nest = 1;
+            }
+            _ => (),
+        }
+        Ok((rest, type_))
+    }
+    fn parse_define_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        match type_ {
+            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
+                // The DEFINE command is malformed because we reached its end
+                // without ever hitting a `(` token.  Transition back to general
+                // parsing.
+                self.state.0 = State::General;
+            }
+            Type::Punct if input.starts_with('(') => {
+                self.nest += 1;
+            }
+            Type::Punct if input.starts_with(')') => {
+                self.nest -= 1;
+                if self.nest == 0 {
+                    self.state = (State::Define4, Substate::empty());
+                }
+            }
+            _ => (),
+        }
+        Ok((rest, type_))
+    }
+    fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> {
+        loop {
+            input = skip_spaces_and_comments(input, true).unwrap();
+            let (Some(c), rest) = take(input, true).unwrap() else {
+                return None;
+            };
+            match c {
+                '!' if strip_prefix_ignore_ascii_case(input, "!ENDDEFINE").is_some() => {
+                    return Some(input)
+                }
+                '\'' | '"' => {
+                    let index = rest.find(c)?;
+                    input = &rest[index + 1..];
+                }
+                _ => input = rest,
+            }
+        }
+    }
+
+    /// We are in the body of a macro definition, looking for additional lines
+    /// of the body or `!ENDDEFINE`.
+    ///
+    /// In `State::Define4`, we're parsing the first line of the macro body (the
+    /// same line as the closing parenthesis in the argument definition).  In
+    /// `State::Define5`, we're on a later line.
+    fn parse_define_4_5<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_full_line(input, eof)?;
+        let line = &input[..input.len() - rest.len()];
+        if let Some(end) = Self::find_enddefine(line) {
+            // Macro ends at the !ENDDEFINE on this line.
+            self.state = (State::General, Substate::empty());
+            let (prefix, rest) = input.split_at(line.len() - end.len());
+            if prefix.is_empty() {
+                // Line starts with `!ENDDEFINE`.
+                self.push(input, eof)
+            } else if prefix.trim_start().is_empty() {
+                // Line starts with spaces followed by `!ENDDEFINE`.
+                Ok((rest, Type::Spaces))
+            } else {
+                // Line starts with some content followed by `!ENDDEFINE`.
+                Ok((rest, Type::MacroBody))
+            }
+        } else {
+            // No `!ENDDEFINE`.  We have a full line of macro body.
+            //
+            // If the first line of the macro body is blank, we just report it
+            // as spaces, or not at all if there are no spaces, because it's not
+            // significant.
+            //
+            // However, if it's a later line, we need to report it because blank
+            // lines can have significance.
+            let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
+                if line.is_empty() {
+                    return self.parse_define_6(input, eof);
+                }
+                Type::Spaces
+            } else {
+                Type::MacroBody
+            };
+            self.state.0 = State::Define6;
+            Ok((rest, type_))
+        }
+    }
+    fn parse_define_6<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state.0 = State::Define5;
+        Ok((rest, Type::Newline))
+    }
+    fn parse_begin_data_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        if type_ == Type::Newline {
+            self.state.0 = State::BeginData2;
+        }
+        Ok((rest, type_))
+    }
+    fn parse_begin_data_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let (rest, type_) = self.subparse(input, eof)?;
+        if type_ == Type::Newline {
+            self.state.0 = State::BeginData3;
+        }
+        Ok((rest, type_))
+    }
+    fn is_end_data(line: &str) -> bool {
+        let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
+            return false;
+        };
+        let (Some(c), rest) = take(rest, true).unwrap() else {
+            return false;
+        };
+        if !c.is_whitespace() {
+            return false;
+        };
+        let Some(rest) = strip_prefix_ignore_ascii_case(rest, "DATA") else {
+            return false;
+        };
+
+        let mut endcmd = false;
+        for c in rest.chars() {
+            match c {
+                '.' if endcmd => return false,
+                '.' => endcmd = true,
+                c if c.is_whitespace() => (),
+                _ => return false,
+            }
+        }
+        true
+    }
+    fn parse_begin_data_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_full_line(input, eof)?;
+        let line = &input[..input.len() - rest.len()];
+        if Self::is_end_data(line) {
+            self.state = (
+                State::General,
+                Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+            );
+            self.push(input, eof)
+        } else {
+            self.state.0 = State::BeginData4;
+            Ok((rest, Type::InlineData))
+        }
+    }
+    fn parse_begin_data_4<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Type), Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state.0 = State::BeginData3;
+        Ok((rest, Type::Newline))
+    }
+}
+
+fn strip_prefix_ignore_ascii_case<'a>(line: &'a str, pattern: &str) -> Option<&'a str> {
+    line.get(..pattern.len())
+        .map(|prefix| {
+            prefix
+                .eq_ignore_ascii_case(pattern)
+                .then(|| &line[pattern.len()..])
+        })
+        .flatten()
+}
+
+#[cfg(test)]
+mod test;
diff --git a/rust/src/lex/segment/test.rs b/rust/src/lex/segment/test.rs

new file mode 100644 (file)

index 0000000..d24523f
--- /dev/null
+++ b/rust/src/lex/segment/test.rs
@@ -0,0 +1,2158 @@
+use crate::prompt::PromptStyle;
+
+use super::{Mode, Segmenter, Type};
+
+fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Type) {
+    if one_byte {
+        for len in input.char_indices().map(|(pos, _c)| pos) {
+            if let Ok((rest, type_)) = segmenter.push(&input[..len], false) {
+                return (&input[len - rest.len()..], type_);
+            }
+        }
+    }
+    segmenter.push(input, true).unwrap()
+}
+
+fn _check_segmentation(
+    mut input: &str,
+    mode: Mode,
+    expect_segments: &[(Type, &str)],
+    expect_prompts: &[PromptStyle],
+    one_byte: bool,
+) {
+    let mut segments = Vec::with_capacity(expect_segments.len());
+    let mut prompts = Vec::new();
+    let mut segmenter = Segmenter::new(mode, false);
+    loop {
+        let (rest, type_) = push_segment(&mut segmenter, input, one_byte);
+        let len = input.len() - rest.len();
+        let token = &input[..len];
+        segments.push((type_, token));
+        match type_ {
+            Type::End => break,
+            Type::Newline => prompts.push(segmenter.prompt()),
+            _ => (),
+        }
+        input = rest;
+    }
+
+    if &segments != expect_segments {
+        eprintln!("segments differ from expected:");
+        let difference = diff::slice(expect_segments, &segments);
+        for result in difference {
+            match result {
+                diff::Result::Left(left) => eprintln!("-{left:?}"),
+                diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
+                diff::Result::Right(right) => eprintln!("+{right:?}"),
+            }
+        }
+        panic!();
+    }
+
+    if &prompts != expect_prompts {
+        eprintln!("prompts differ from expected:");
+        let difference = diff::slice(expect_prompts, &prompts);
+        for result in difference {
+            match result {
+                diff::Result::Left(left) => eprintln!("-{left:?}"),
+                diff::Result::Both(left, _right) => eprintln!(" {left:?}"),
+                diff::Result::Right(right) => eprintln!("+{right:?}"),
+            }
+        }
+        panic!();
+    }
+}
+
+fn check_segmentation(
+    input: &str,
+    mode: Mode,
+    expect_segments: &[(Type, &str)],
+    expect_prompts: &[PromptStyle],
+) {
+    for (one_byte, one_byte_name) in [(false, "full-string"), (true, "byte-by-byte")] {
+        println!("running {one_byte_name} segmentation test with LF newlines...");
+        _check_segmentation(input, mode, expect_segments, expect_prompts, one_byte);
+
+        println!("running {one_byte_name} segmentation test with CRLF newlines...");
+        _check_segmentation(
+            &input.replace('\n', "\r\n"),
+            mode,
+            &expect_segments
+                .iter()
+                .map(|(type_, s)| match *type_ {
+                    Type::Newline => (Type::Newline, "\r\n"),
+                    _ => (*type_, *s),
+                })
+                .collect::<Vec<_>>(),
+            expect_prompts,
+            one_byte,
+        );
+
+        if let Some(input) = input.strip_suffix('\n') {
+            println!("running {one_byte_name} segmentation test without final newline...");
+            let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect();
+            assert_eq!(expect_segments.pop(), Some((Type::End, "")));
+            assert_eq!(expect_segments.pop(), Some((Type::Newline, "\n")));
+            while let Some((Type::SeparateCommands | Type::EndCommand, "")) = expect_segments.last()
+            {
+                expect_segments.pop();
+            }
+            expect_segments.push((Type::End, ""));
+            _check_segmentation(
+                input,
+                mode,
+                &expect_segments,
+                &expect_prompts[..expect_prompts.len() - 1],
+                one_byte,
+            );
+        }
+    }
+}
+
+fn print_segmentation(mut input: &str) {
+    let mut segmenter = Segmenter::new(Mode::Auto, false);
+    loop {
+        let (rest, type_) = segmenter.push(input, true).unwrap();
+        let len = input.len() - rest.len();
+        let token = &input[..len];
+        print!("{type_:?} {token:?}");
+        match type_ {
+            Type::Newline => print!(" ({:?})", segmenter.prompt()),
+            Type::End => break,
+            _ => (),
+        }
+        println!();
+        input = rest;
+    }
+}
+
+#[test]
+fn test_identifiers() {
+    check_segmentation(
+        r#"a ab abc abcd !abcd
+A AB ABC ABCD !ABCD
+aB aBC aBcD !aBcD
+$x $y $z !$z
+grève Ângstrom poté
+#a #b #c ## #d !#d
+@efg @ @@. @#@ !@ 
+## # #12345 #.#
+f@#_.#6
+GhIjK
+.x 1y _z
+"#,
+        Mode::Auto,
+        &[
+            (Type::Identifier, "a"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ab"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "abc"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "abcd"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!abcd"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "A"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "AB"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ABC"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ABCD"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!ABCD"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "aB"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "aBC"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "aBcD"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!aBcD"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "$x"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "$y"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "$z"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!$z"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "grève"),
+            (Type::Spaces, "\u{00a0}"),
+            (Type::Identifier, "Ângstrom"),
+            (Type::Spaces, "\u{00a0}"),
+            (Type::Identifier, "poté"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "#a"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#b"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#c"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "##"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#d"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!#d"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "@efg"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "@"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "@@."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "@#@"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!@"),
+            (Type::Spaces, " "),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "##"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#12345"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#.#"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "f@#_.#6"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "GhIjK"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Identifier, "x"),
+            (Type::Spaces, " "),
+            (Type::Number, "1"),
+            (Type::Identifier, "y"),
+            (Type::Spaces, " "),
+            (Type::Punct, "_"),
+            (Type::Identifier, "z"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+        ],
+    );
+}
+
+#[test]
+fn test_identifiers_ending_in_dot() {
+    check_segmentation(
+        r#"abcd. abcd.
+ABCD. ABCD.
+aBcD. aBcD. 
+$y. $z. あいうえお.
+#c. #d..
+@@. @@....
+#.#.
+#abcd.
+.
+. 
+LMNOP. 
+QRSTUV./* end of line comment */
+qrstuv. /* end of line comment */
+QrStUv./* end of line comment */ 
+wxyz./* unterminated end of line comment
+WXYZ. /* unterminated end of line comment
+WxYz./* unterminated end of line comment 
+"#,
+        Mode::Auto,
+        &[
+            (Type::Identifier, "abcd."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "abcd"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "ABCD."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ABCD"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "aBcD."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "aBcD"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "$y."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "$z."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "あいうえお"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "#c."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#d."),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "@@."),
+            (Type::Spaces, " "),
+            (Type::Identifier, "@@..."),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "#.#"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "#abcd"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "LMNOP"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "QRSTUV"),
+            (Type::EndCommand, "."),
+            (Type::Comment, "/* end of line comment */"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "qrstuv"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* end of line comment */"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "QrStUv"),
+            (Type::EndCommand, "."),
+            (Type::Comment, "/* end of line comment */"),
+            (Type::Spaces, " "),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "wxyz"),
+            (Type::EndCommand, "."),
+            (Type::Comment, "/* unterminated end of line comment"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "WXYZ"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* unterminated end of line comment"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "WxYz"),
+            (Type::EndCommand, "."),
+            (Type::Comment, "/* unterminated end of line comment "),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_reserved_words() {
+    check_segmentation(
+        r#"and or not eq ge gt le lt ne all by to with
+AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
+andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
+and. with.
+"#,
+        Mode::Auto,
+        &[
+            (Type::ReservedWord, "and"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "or"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "not"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "eq"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "ge"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "gt"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "le"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "lt"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "ne"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "all"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "by"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "to"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "with"),
+            (Type::Newline, "\n"),
+            (Type::ReservedWord, "AND"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "OR"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "NOT"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "EQ"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "GE"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "GT"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "LE"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "LT"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "NE"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "ALL"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "BY"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "TO"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "WITH"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "andx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "orx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "notx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "eqx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "gex"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "gtx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "lex"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ltx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "nex"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "allx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "byx"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "tox"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "withx"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "and."),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "with"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_punctuation() {
+    check_segmentation(
+        r#"~ & | = >= > <= < ~= <> ( ) , - + * / [ ] **
+~&|=>=><=<~=<>(),-+*/[]**!*
+% : ; ? _ ` { } ~ !*
+"#,
+        Mode::Auto,
+        &[
+            (Type::Punct, "~"),
+            (Type::Spaces, " "),
+            (Type::Punct, "&"),
+            (Type::Spaces, " "),
+            (Type::Punct, "|"),
+            (Type::Spaces, " "),
+            (Type::Punct, "="),
+            (Type::Spaces, " "),
+            (Type::Punct, ">="),
+            (Type::Spaces, " "),
+            (Type::Punct, ">"),
+            (Type::Spaces, " "),
+            (Type::Punct, "<="),
+            (Type::Spaces, " "),
+            (Type::Punct, "<"),
+            (Type::Spaces, " "),
+            (Type::Punct, "~="),
+            (Type::Spaces, " "),
+            (Type::Punct, "<>"),
+            (Type::Spaces, " "),
+            (Type::Punct, "("),
+            (Type::Spaces, " "),
+            (Type::Punct, ")"),
+            (Type::Spaces, " "),
+            (Type::Punct, ","),
+            (Type::Spaces, " "),
+            (Type::Punct, "-"),
+            (Type::Spaces, " "),
+            (Type::Punct, "+"),
+            (Type::Spaces, " "),
+            (Type::Punct, "*"),
+            (Type::Spaces, " "),
+            (Type::Punct, "/"),
+            (Type::Spaces, " "),
+            (Type::Punct, "["),
+            (Type::Spaces, " "),
+            (Type::Punct, "]"),
+            (Type::Spaces, " "),
+            (Type::Punct, "**"),
+            (Type::Newline, "\n"),
+            (Type::Punct, "~"),
+            (Type::Punct, "&"),
+            (Type::Punct, "|"),
+            (Type::Punct, "="),
+            (Type::Punct, ">="),
+            (Type::Punct, ">"),
+            (Type::Punct, "<="),
+            (Type::Punct, "<"),
+            (Type::Punct, "~="),
+            (Type::Punct, "<>"),
+            (Type::Punct, "("),
+            (Type::Punct, ")"),
+            (Type::Punct, ","),
+            (Type::Punct, "-"),
+            (Type::Punct, "+"),
+            (Type::Punct, "*"),
+            (Type::Punct, "/"),
+            (Type::Punct, "["),
+            (Type::Punct, "]"),
+            (Type::Punct, "**"),
+            (Type::MacroId, "!*"),
+            (Type::Newline, "\n"),
+            (Type::Punct, "%"),
+            (Type::Spaces, " "),
+            (Type::Punct, ":"),
+            (Type::Spaces, " "),
+            (Type::Punct, ";"),
+            (Type::Spaces, " "),
+            (Type::Punct, "?"),
+            (Type::Spaces, " "),
+            (Type::Punct, "_"),
+            (Type::Spaces, " "),
+            (Type::Punct, "`"),
+            (Type::Spaces, " "),
+            (Type::Punct, "{"),
+            (Type::Spaces, " "),
+            (Type::Punct, "}"),
+            (Type::Spaces, " "),
+            (Type::Punct, "~"),
+            (Type::Spaces, " "),
+            (Type::MacroId, "!*"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later],
+    );
+}
+
+#[test]
+fn test_positive_numbers() {
+    check_segmentation(
+        r#"0 1 01 001. 1.
+123. /* comment 1 */ /* comment 2 */
+.1 0.1 00.1 00.10
+5e1 6E-1 7e+1 6E+01 6e-03
+.3E1 .4e-1 .5E+1 .6e+01 .7E-03
+1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
+. 1e e1 1e+ 1e- 1.
+"#,
+        Mode::Auto,
+        &[
+            (Type::Number, "0"),
+            (Type::Spaces, " "),
+            (Type::Number, "1"),
+            (Type::Spaces, " "),
+            (Type::Number, "01"),
+            (Type::Spaces, " "),
+            (Type::Number, "001."),
+            (Type::Spaces, " "),
+            (Type::Number, "1"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Number, "123"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* comment 1 */"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* comment 2 */"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Number, "1"),
+            (Type::Spaces, " "),
+            (Type::Number, "0.1"),
+            (Type::Spaces, " "),
+            (Type::Number, "00.1"),
+            (Type::Spaces, " "),
+            (Type::Number, "00.10"),
+            (Type::Newline, "\n"),
+            (Type::Number, "5e1"),
+            (Type::Spaces, " "),
+            (Type::Number, "6E-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "7e+1"),
+            (Type::Spaces, " "),
+            (Type::Number, "6E+01"),
+            (Type::Spaces, " "),
+            (Type::Number, "6e-03"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Number, "3E1"),
+            (Type::Spaces, " "),
+            (Type::Number, ".4e-1"),
+            (Type::Spaces, " "),
+            (Type::Number, ".5E+1"),
+            (Type::Spaces, " "),
+            (Type::Number, ".6e+01"),
+            (Type::Spaces, " "),
+            (Type::Number, ".7E-03"),
+            (Type::Newline, "\n"),
+            (Type::Number, "1.23e1"),
+            (Type::Spaces, " "),
+            (Type::Number, "45.6E-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "78.9e+1"),
+            (Type::Spaces, " "),
+            (Type::Number, "99.9E+01"),
+            (Type::Spaces, " "),
+            (Type::Number, "11.2e-03"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "1e"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "e1"),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "1e+"),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "1e-"),
+            (Type::Spaces, " "),
+            (Type::Number, "1"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_negative_numbers() {
+    check_segmentation(
+        r#" -0 -1 -01 -001. -1.
+ -123. /* comment 1 */ /* comment 2 */
+ -.1 -0.1 -00.1 -00.10
+ -5e1 -6E-1 -7e+1 -6E+01 -6e-03
+ -.3E1 -.4e-1 -.5E+1 -.6e+01 -.7E-03
+ -1.23e1 -45.6E-1 -78.9e+1 -99.9E+01 -11.2e-03
+ -/**/1
+ -. -1e -e1 -1e+ -1e- -1.
+"#,
+        Mode::Auto,
+        &[
+            (Type::Spaces, " "),
+            (Type::Number, "-0"),
+            (Type::Spaces, " "),
+            (Type::Number, "-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-01"),
+            (Type::Spaces, " "),
+            (Type::Number, "-001."),
+            (Type::Spaces, " "),
+            (Type::Number, "-1"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Number, "-123"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* comment 1 */"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* comment 2 */"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-0.1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-00.1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-00.10"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Number, "-5e1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-6E-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-7e+1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-6E+01"),
+            (Type::Spaces, " "),
+            (Type::Number, "-6e-03"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.3E1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.4e-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.5E+1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.6e+01"),
+            (Type::Spaces, " "),
+            (Type::Number, "-.7E-03"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Number, "-1.23e1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-45.6E-1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-78.9e+1"),
+            (Type::Spaces, " "),
+            (Type::Number, "-99.9E+01"),
+            (Type::Spaces, " "),
+            (Type::Number, "-11.2e-03"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Punct, "-"),
+            (Type::Comment, "/**/"),
+            (Type::Number, "1"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Punct, "-"),
+            (Type::Punct, "."),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "-1e"),
+            (Type::Spaces, " "),
+            (Type::Punct, "-"),
+            (Type::Identifier, "e1"),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "-1e+"),
+            (Type::Spaces, " "),
+            (Type::ExpectedExponent, "-1e-"),
+            (Type::Spaces, " "),
+            (Type::Number, "-1"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_strings() {
+    check_segmentation(
+        r#"'x' "y" 'abc'
+'Don''t' "Can't" 'Won''t'
+"""quoted""" '"quoted"'
+'' ""
+'missing end quote
+"missing double quote
+x"4142" X'5152'
+u'fffd' U"041"
++ new command
++ /* comment */ 'string continuation'
++ /* also a punctuator on blank line
+- 'new command'
+"#,
+        Mode::Auto,
+        &[
+            (Type::QuotedString, "'x'"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "\"y\""),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'abc'"),
+            (Type::Newline, "\n"),
+            (Type::QuotedString, "'Don''t'"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "\"Can't\""),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'Won''t'"),
+            (Type::Newline, "\n"),
+            (Type::QuotedString, "\"\"\"quoted\"\"\""),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'\"quoted\"'"),
+            (Type::Newline, "\n"),
+            (Type::QuotedString, "''"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "\"\""),
+            (Type::Newline, "\n"),
+            (Type::ExpectedQuote, "'missing end quote"),
+            (Type::Newline, "\n"),
+            (Type::ExpectedQuote, "\"missing double quote"),
+            (Type::Newline, "\n"),
+            (Type::HexString, "x\"4142\""),
+            (Type::Spaces, " "),
+            (Type::HexString, "X'5152'"),
+            (Type::Newline, "\n"),
+            (Type::UnicodeString, "u'fffd'"),
+            (Type::Spaces, " "),
+            (Type::UnicodeString, "U\"041\""),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "+"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "new"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::Punct, "+"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* comment */"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'string continuation'"),
+            (Type::Newline, "\n"),
+            (Type::Punct, "+"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/* also a punctuator on blank line"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "-"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'new command'"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+        ],
+    );
+}
+
+#[test]
+fn test_shbang() {
+    check_segmentation(
+        r#"#! /usr/bin/pspp
+title my title.
+#! /usr/bin/pspp
+"#,
+        Mode::Interactive,
+        &[
+            (Type::Shbang, "#! /usr/bin/pspp"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "title"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "my"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "title"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "#"),
+            (Type::MacroId, "!"),
+            (Type::Spaces, " "),
+            (Type::Punct, "/"),
+            (Type::Identifier, "usr"),
+            (Type::Punct, "/"),
+            (Type::Identifier, "bin"),
+            (Type::Punct, "/"),
+            (Type::Identifier, "pspp"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[PromptStyle::First, PromptStyle::First, PromptStyle::Later],
+    );
+}
+
+#[test]
+fn test_comment_command() {
+    check_segmentation(
+        r#"* Comment commands "don't
+have to contain valid tokens.
+
+** Check ambiguity with ** token.
+****************.
+
+comment keyword works too.
+COMM also.
+com is ambiguous with COMPUTE.
+
+   * Comment need not start at left margin.
+
+* Comment ends with blank line
+
+next command.
+
+"#,
+        Mode::Interactive,
+        &[
+            (Type::CommentCommand, "* Comment commands \"don't"),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "have to contain valid tokens"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "** Check ambiguity with ** token"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "****************"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "comment keyword works too"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "COMM also"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "com"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "is"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "ambiguous"),
+            (Type::Spaces, " "),
+            (Type::ReservedWord, "with"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "COMPUTE"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "   "),
+            (
+                Type::CommentCommand,
+                "* Comment need not start at left margin",
+            ),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::CommentCommand, "* Comment ends with blank line"),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "next"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Comment,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Comment,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_document_command() {
+    check_segmentation(
+        r#"DOCUMENT one line.
+DOC more
+    than
+        one
+            line.
+docu
+first.paragraph
+isn't parsed as tokens
+
+second paragraph.
+"#,
+        Mode::Interactive,
+        &[
+            (Type::StartDocument, ""),
+            (Type::Document, "DOCUMENT one line."),
+            (Type::EndCommand, ""),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::StartDocument, ""),
+            (Type::Document, "DOC more"),
+            (Type::Newline, "\n"),
+            (Type::Document, "    than"),
+            (Type::Newline, "\n"),
+            (Type::Document, "        one"),
+            (Type::Newline, "\n"),
+            (Type::Document, "            line."),
+            (Type::EndCommand, ""),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::StartDocument, ""),
+            (Type::Document, "docu"),
+            (Type::Newline, "\n"),
+            (Type::Document, "first.paragraph"),
+            (Type::Newline, "\n"),
+            (Type::Document, "isn't parsed as tokens"),
+            (Type::Newline, "\n"),
+            (Type::Document, ""),
+            (Type::Newline, "\n"),
+            (Type::Document, "second paragraph."),
+            (Type::EndCommand, ""),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::First,
+            PromptStyle::Document,
+            PromptStyle::Document,
+            PromptStyle::Document,
+            PromptStyle::First,
+            PromptStyle::Document,
+            PromptStyle::Document,
+            PromptStyle::Document,
+            PromptStyle::Document,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_file_label_command() {
+    check_segmentation(
+        r#"FIL label isn't quoted.
+FILE
+  lab 'is quoted'.
+FILE /*
+/**/  lab not quoted here either
+
+"#,
+        Mode::Interactive,
+        &[
+            (Type::Identifier, "FIL"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "label"),
+            (Type::Spaces, " "),
+            (Type::UnquotedString, "isn't quoted"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "FILE"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "lab"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "'is quoted'"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "FILE"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/*"),
+            (Type::Newline, "\n"),
+            (Type::Comment, "/**/"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "lab"),
+            (Type::Spaces, " "),
+            (Type::UnquotedString, "not quoted here either"),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_begin_data() {
+    check_segmentation(
+        r#"begin data.
+end data.
+
+begin data. /*
+123
+xxx
+end data.
+
+BEG /**/ DAT /*
+5 6 7 /* x
+
+end  data
+end data
+.
+
+begin
+ data.
+data
+end data.
+
+begin data "xxx".
+begin data 123.
+not data
+"#,
+        Mode::Interactive,
+        &[
+            (Type::Identifier, "begin"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "begin"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, "/*"),
+            (Type::Newline, "\n"),
+            (Type::InlineData, "123"),
+            (Type::Newline, "\n"),
+            (Type::InlineData, "xxx"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "BEG"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/**/"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "DAT"),
+            (Type::Spaces, " "),
+            (Type::Comment, "/*"),
+            (Type::Newline, "\n"),
+            (Type::InlineData, "5 6 7 /* x"),
+            (Type::Newline, "\n"),
+            (Type::InlineData, ""),
+            (Type::Newline, "\n"),
+            (Type::InlineData, "end  data"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "begin"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::InlineData, "data"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "begin"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::Spaces, " "),
+            (Type::QuotedString, "\"xxx\""),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "begin"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::Spaces, " "),
+            (Type::Number, "123"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::ReservedWord, "not"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "data"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Data,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::Data,
+            PromptStyle::Data,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Later,
+        ],
+    );
+}
+
+#[test]
+fn test_do_repeat() {
+    check_segmentation(
+        r#"do repeat x=a b c
+          y=d e f.
+  do repeat a=1 thru 5.
+another command.
+second command
++ third command.
+end /* x */ /* y */ repeat print.
+end
+ repeat.
+do
+  repeat #a=1.
+  inner command.
+end repeat.
+"#,
+        Mode::Interactive,
+        &[
+            (Type::Identifier, "do"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "x"),
+            (Type::Punct, "="),
+            (Type::Identifier, "a"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "b"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "c"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "          "),
+            (Type::Identifier, "y"),
+            (Type::Punct, "="),
+            (Type::Identifier, "d"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "e"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "f"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "  do repeat a=1 thru 5."),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "another command."),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "second command"),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "+ third command."),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "do"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "repeat"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#a"),
+            (Type::Punct, "="),
+            (Type::Number, "1"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "  inner command."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_do_repeat_overflow() {
+    const N: usize = 257;
+    let do_repeat: Vec<String> = (0..N)
+        .map(|i| format!("do repeat v{i}={i} thru {}.\n", i + 5))
+        .collect();
+    let end_repeat: Vec<String> = (0..N)
+        .rev()
+        .map(|i| format!("end repeat. /* {i}\n"))
+        .collect();
+
+    let s: String = do_repeat
+        .iter()
+        .chain(end_repeat.iter())
+        .map(|s| s.as_str())
+        .collect();
+    let mut expect_output = vec![
+        (Type::Identifier, "do"),
+        (Type::Spaces, " "),
+        (Type::Identifier, "repeat"),
+        (Type::Spaces, " "),
+        (Type::Identifier, "v0"),
+        (Type::Punct, "="),
+        (Type::Number, "0"),
+        (Type::Spaces, " "),
+        (Type::Identifier, "thru"),
+        (Type::Spaces, " "),
+        (Type::Number, "5"),
+        (Type::EndCommand, "."),
+        (Type::Newline, "\n"),
+    ];
+    for i in 1..N {
+        expect_output.push((Type::DoRepeatCommand, &do_repeat[i].trim_end()));
+        if i >= 255 {
+            expect_output.push((Type::DoRepeatOverflow, ""));
+        }
+        expect_output.push((Type::Newline, "\n"));
+    }
+    for i in 0..254 {
+        expect_output.push((Type::DoRepeatCommand, &end_repeat[i].trim_end()));
+        expect_output.push((Type::Newline, "\n"));
+    }
+    let comments: Vec<String> = (0..(N - 254)).rev().map(|i| format!("/* {i}")).collect();
+    for comment in &comments {
+        expect_output.extend([
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::EndCommand, "."),
+            (Type::Spaces, " "),
+            (Type::Comment, comment),
+            (Type::Newline, "\n"),
+        ]);
+    }
+    expect_output.push((Type::End, ""));
+
+    let expect_prompts: Vec<_> = (0..N * 2 - 3)
+        .map(|_| PromptStyle::DoRepeat)
+        .chain([PromptStyle::First, PromptStyle::First, PromptStyle::First])
+        .collect();
+    check_segmentation(&s, Mode::Interactive, &expect_output, &expect_prompts);
+}
+
+#[test]
+fn test_do_repeat_batch() {
+    check_segmentation(
+        r#"do repeat x=a b c
+          y=d e f
+do repeat a=1 thru 5
+another command
+second command
++ third command
+end /* x */ /* y */ repeat print
+end
+ repeat
+do
+  repeat #a=1
+
+  inner command
+end repeat
+"#,
+        Mode::Batch,
+        &[
+            (Type::Identifier, "do"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "x"),
+            (Type::Punct, "="),
+            (Type::Identifier, "a"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "b"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "c"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "          "),
+            (Type::Identifier, "y"),
+            (Type::Punct, "="),
+            (Type::Identifier, "d"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "e"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "f"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::DoRepeatCommand, "do repeat a=1 thru 5"),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "another command"),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "second command"),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "+ third command"),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::Identifier, "do"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "repeat"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "#a"),
+            (Type::Punct, "="),
+            (Type::Number, "1"),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::DoRepeatCommand, "  inner command"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "end"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "repeat"),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::DoRepeat,
+            PromptStyle::DoRepeat,
+            PromptStyle::Later,
+        ],
+    );
+}
+
+mod define {
+    use crate::{
+        lex::segment::{Mode, Type},
+        prompt::PromptStyle,
+    };
+
+    use super::check_segmentation;
+
+    #[test]
+    fn test_simple() {
+        check_segmentation(
+            r#"define !macro1()
+var1 var2 var3 "!enddefine"
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "var1 var2 var3 \"!enddefine\""),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_no_newline_after_parentheses() {
+        check_segmentation(
+            r#"define !macro1() var1 var2 var3 /* !enddefine
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::MacroBody, " var1 var2 var3 /* !enddefine"),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_no_newline_before_enddefine() {
+        check_segmentation(
+            r#"define !macro1()
+var1 var2 var3!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "var1 var2 var3"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_all_on_one_line() {
+        check_segmentation(
+            r#"define !macro1()var1 var2 var3!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::MacroBody, "var1 var2 var3"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_empty() {
+        check_segmentation(
+            r#"define !macro1()
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_blank_lines() {
+        check_segmentation(
+            r#"define !macro1()
+
+
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, ""),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, ""),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Define,
+                PromptStyle::Define,
+                PromptStyle::Define,
+                PromptStyle::First,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_arguments() {
+        check_segmentation(
+            r#"define !macro1(a(), b(), c())
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Identifier, "a"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Punct, ","),
+                (Type::Spaces, " "),
+                (Type::Identifier, "b"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Punct, ","),
+                (Type::Spaces, " "),
+                (Type::Identifier, "c"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_multiline_arguments() {
+        check_segmentation(
+            r#"define !macro1(
+  a(), b(
+  ),
+  c()
+)
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "a"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Punct, ","),
+                (Type::Spaces, " "),
+                (Type::Identifier, "b"),
+                (Type::Punct, "("),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Punct, ")"),
+                (Type::Punct, ","),
+                (Type::Newline, "\n"),
+                (Type::Spaces, "  "),
+                (Type::Identifier, "c"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Define,
+                PromptStyle::First,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_arguments_start_on_second_line() {
+        check_segmentation(
+            r#"define !macro1
+(x,y,z
+)
+content 1
+content 2
+!enddefine.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Newline, "\n"),
+                (Type::Punct, "("),
+                (Type::Identifier, "x"),
+                (Type::Punct, ","),
+                (Type::Identifier, "y"),
+                (Type::Punct, ","),
+                (Type::Identifier, "z"),
+                (Type::Newline, "\n"),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "content 1"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "content 2"),
+                (Type::Newline, "\n"),
+                (Type::MacroId, "!enddefine"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Later,
+                PromptStyle::Later,
+                PromptStyle::Define,
+                PromptStyle::Define,
+                PromptStyle::Define,
+                PromptStyle::First,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_1() {
+        check_segmentation(
+            r#"define !macro1.
+data list /x 1.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "list"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::First, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_2() {
+        check_segmentation(
+            r#"define !macro1
+x.
+data list /x 1.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "x"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "list"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Later, PromptStyle::First, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_3() {
+        check_segmentation(
+            r#"define !macro1(.
+x.
+data list /x 1.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "x"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "list"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::First, PromptStyle::First, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_4() {
+        // Notice the command terminator at the end of the `DEFINE` command,
+        // which should not be there and ends it early.
+        check_segmentation(
+            r#"define !macro1.
+data list /x 1.
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::Identifier, "data"),
+                (Type::Spaces, " "),
+                (Type::Identifier, "list"),
+                (Type::Spaces, " "),
+                (Type::Punct, "/"),
+                (Type::Identifier, "x"),
+                (Type::Spaces, " "),
+                (Type::Number, "1"),
+                (Type::EndCommand, "."),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::First, PromptStyle::First],
+        );
+    }
+
+    #[test]
+    fn test_missing_enddefine() {
+        check_segmentation(
+            r#"define !macro1()
+content line 1
+content line 2
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "content line 1"),
+                (Type::Newline, "\n"),
+                (Type::MacroBody, "content line 2"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[
+                PromptStyle::Define,
+                PromptStyle::Define,
+                PromptStyle::Define,
+            ],
+        );
+    }
+
+    #[test]
+    fn test_missing_enddefine_2() {
+        check_segmentation(
+            r#"define !macro1()
+"#,
+            Mode::Interactive,
+            &[
+                (Type::Identifier, "define"),
+                (Type::Spaces, " "),
+                (Type::MacroName, "!macro1"),
+                (Type::Punct, "("),
+                (Type::Punct, ")"),
+                (Type::Newline, "\n"),
+                (Type::End, ""),
+            ],
+            &[PromptStyle::Define],
+        );
+    }
+}
+
+#[test]
+fn test_batch_mode() {
+    check_segmentation(
+        r#"first command
+     another line of first command
++  second command
+third command
+
+fourth command.
+   fifth command.
+"#,
+        Mode::Batch,
+        &[
+            (Type::Identifier, "first"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "     "),
+            (Type::Identifier, "another"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "line"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "of"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "first"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "+"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "second"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::Identifier, "third"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "fourth"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "   "),
+            (Type::Identifier, "fifth"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+        ],
+    );
+}
+
+#[test]
+fn test_auto_mode() {
+    check_segmentation(
+        r#"command
+     another line of command
+2sls
++  another command
+another line of second command
+data list /x 1
+aggregate.
+print eject.
+twostep cluster
+
+
+fourth command.
+   fifth command.
+"#,
+        Mode::Auto,
+        &[
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "     "),
+            (Type::Identifier, "another"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "line"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "of"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::Number, "2"),
+            (Type::Identifier, "sls"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, "+"),
+            (Type::Spaces, "  "),
+            (Type::Identifier, "another"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "another"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "line"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "of"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "second"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::Identifier, "data"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "list"),
+            (Type::Spaces, " "),
+            (Type::Punct, "/"),
+            (Type::Identifier, "x"),
+            (Type::Spaces, " "),
+            (Type::Number, "1"),
+            (Type::Newline, "\n"),
+            (Type::StartCommand, ""),
+            (Type::Identifier, "aggregate"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "print"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "eject"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "twostep"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "cluster"),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::SeparateCommands, ""),
+            (Type::Newline, "\n"),
+            (Type::Identifier, "fourth"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::Spaces, "   "),
+            (Type::Identifier, "fifth"),
+            (Type::Spaces, " "),
+            (Type::Identifier, "command"),
+            (Type::EndCommand, "."),
+            (Type::Newline, "\n"),
+            (Type::End, ""),
+        ],
+        &[
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::Later,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+            PromptStyle::First,
+        ],
+    );
+}
author	Ben Pfaff <blp@cs.stanford.edu>
	Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Thu, 11 Jul 2024 19:01:13 +0000 (12:01 -0700)
rust/src/lex/segment.rs	[deleted file]	patch \| blob \| history
rust/src/lex/segment/mod.rs	[new file with mode: 0644]	patch \| blob
rust/src/lex/segment/test.rs	[new file with mode: 0644]	patch \| blob