work on scan
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 13 Jul 2024 22:18:57 +0000 (15:18 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 13 Jul 2024 22:18:57 +0000 (15:18 -0700)
rust/src/lex/mod.rs
rust/src/lex/scan/mod.rs [new file with mode: 0644]
rust/src/lex/segment/mod.rs
rust/src/lex/segment/test.rs
rust/src/lex/token.rs [new file with mode: 0644]
src/language/lexer/scan.c

index 2047837df405a9dc656362a524983e13fe10fc6d..732cf3a09c6d2fa571b8c247545edf8a50e64339 100644 (file)
@@ -1,2 +1,16 @@
+//! PSPP syntax scanning.
+//!
+//! PSPP divides traditional "lexical analysis" or "tokenization" into two
+//! phases: a lower-level phase called "segmentation" and a higher-level phase
+//! called "scanning".  [super::segment] implements the segmentation phase and
+//! this module the scanning phase.
+//!
+//! Scanning accepts as input a stream of segments, which are UTF-8 strings each
+//! labeled with a segment type.  It outputs a stream of "scan tokens", which
+//! are the same as the tokens used by the PSPP parser with a few additional
+//! types.
+
 pub mod segment;
+pub mod scan;
 pub mod command_name;
+pub mod token;
diff --git a/rust/src/lex/scan/mod.rs b/rust/src/lex/scan/mod.rs
new file mode 100644 (file)
index 0000000..343bde8
--- /dev/null
@@ -0,0 +1,12 @@
+//! PSPP lexical analysis.
+//!
+//! PSPP divides traditional "lexical analysis" or "tokenization" into two
+//! phases: a lower-level phase called "segmentation" and a higher-level phase
+//! called "scanning".  [segment] implements the segmentation phase and [scan]
+//! the scanning phase.
+//!
+//! Scanning accepts as input a stream of segments, which are UTF-8 strings each
+//! labeled with a segment type.  It outputs a stream of "scan tokens", which
+//! are the same as the tokens used by the PSPP parser with a few additional
+//! types.
+
index 6bf30ba2e5afd60b31166c0adb84b8a6bc83fdd0..401d5238253bb90476c84d360cb3292ad3b7c43c 100644 (file)
@@ -56,7 +56,7 @@ pub enum Mode {
 
 /// The type of a segment.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum Type {
+pub enum Segment {
     Number,
     QuotedString,
     HexString,
@@ -214,10 +214,14 @@ impl Segmenter {
     /// consumed, must not be provided with *different* values on subsequent
     /// calls.  This is because the function must often make decisions based on
     /// looking ahead beyond the bytes that it consumes.
-    pub fn push<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+    pub fn push<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Segment), Incomplete> {
         if input.is_empty() {
             if eof {
-                return Ok((input, Type::End));
+                return Ok((input, Segment::End));
             } else {
                 return Err(Incomplete);
             };
@@ -443,12 +447,12 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         if let (Some('#'), rest) = take(input, eof)? {
             if let (Some('!'), rest) = take(rest, eof)? {
                 let rest = self.parse_full_line(rest, eof)?;
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Type::Shbang));
+                return Ok((rest, Segment::Shbang));
             }
         }
 
@@ -469,7 +473,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         debug_assert_eq!(self.state.0, State::General);
         debug_assert!(self.start_of_line());
         debug_assert!(!input.is_empty());
@@ -481,16 +485,16 @@ impl Segmenter {
             '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
                 // This  `+` is punctuation that may separate pieces of a string.
                 self.state = (State::General, Substate::empty());
-                return Ok((rest, Type::Punct));
+                return Ok((rest, Segment::Punct));
             }
             '+' | '-' | '.' => {
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((rest, Type::StartCommand));
+                return Ok((rest, Segment::StartCommand));
             }
             _ if c.is_whitespace() => {
                 if at_end_of_line(input, eof)? {
                     self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Type::SeparateCommands));
+                    return Ok((input, Segment::SeparateCommands));
                 }
             }
             _ => {
@@ -498,7 +502,7 @@ impl Segmenter {
                     && !self.state.1.contains(Substate::START_OF_COMMAND)
                 {
                     self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok((input, Type::StartCommand));
+                    return Ok((input, Segment::StartCommand));
                 }
             }
         }
@@ -509,7 +513,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         debug_assert!(self.state.0 == State::General);
         debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
         let (Some(c), rest) = take(input, eof)? else {
@@ -520,16 +524,16 @@ impl Segmenter {
                 self.state.1 |= Substate::START_OF_LINE;
                 Ok((
                     self.parse_newline(input, eof).unwrap().unwrap(),
-                    Type::Newline,
+                    Segment::Newline,
                 ))
             }
             '/' => {
                 if let (Some('*'), rest) = take(rest, eof)? {
                     let rest = skip_comment(rest, eof)?;
-                    return Ok((rest, Type::Comment));
+                    return Ok((rest, Segment::Comment));
                 } else {
                     self.state.1 = Substate::empty();
-                    return Ok((rest, Type::Punct));
+                    return Ok((rest, Segment::Punct));
                 }
             }
             '-' => {
@@ -548,11 +552,11 @@ impl Segmenter {
                     None | Some(_) => (),
                 }
                 self.state.1 = Substate::empty();
-                return Ok((rest, Type::Punct));
+                return Ok((rest, Segment::Punct));
             }
             '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
                 self.state.1 = Substate::empty();
-                return Ok((rest, Type::Punct));
+                return Ok((rest, Segment::Punct));
             }
             '*' => {
                 if self.state.1.contains(Substate::START_OF_COMMAND) {
@@ -567,43 +571,43 @@ impl Segmenter {
             '~' => self.parse_digraph(&['='], rest, eof),
             '.' if at_end_of_line(rest, eof)? => {
                 self.state.1 = Substate::START_OF_COMMAND;
-                Ok((rest, Type::EndCommand))
+                Ok((rest, Segment::EndCommand))
             }
             '.' => match take(rest, eof)? {
                 (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
-                _ => Ok((rest, Type::Punct)),
+                _ => Ok((rest, Segment::Punct)),
             },
             '0'..='9' => self.parse_number(input, eof),
-            'u' | 'U' => self.maybe_parse_string(Type::UnicodeString, (input, rest), eof),
-            'x' | 'X' => self.maybe_parse_string(Type::HexString, (input, rest), eof),
-            '\'' | '"' => self.parse_string(Type::QuotedString, c, rest, eof),
+            'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof),
+            'x' | 'X' => self.maybe_parse_string(Segment::HexString, (input, rest), eof),
+            '\'' | '"' => self.parse_string(Segment::QuotedString, c, rest, eof),
             '!' => {
                 let (c, rest2) = take(rest, eof)?;
                 match c {
-                    Some('*') => Ok((rest2, Type::MacroId)),
+                    Some('*') => Ok((rest2, Segment::MacroId)),
                     Some(_) => self.parse_id(input, eof),
-                    None => Ok((rest, Type::Punct)),
+                    None => Ok((rest, Segment::Punct)),
                 }
             }
-            c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Type::Spaces)),
+            c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Segment::Spaces)),
             c if c.may_start_id() => self.parse_id(input, eof),
             '!'..='~' if c != '\\' && c != '^' => {
                 self.state.1 = Substate::empty();
-                Ok((rest, Type::Punct))
+                Ok((rest, Segment::Punct))
             }
             _ => {
                 self.state.1 = Substate::empty();
-                Ok((rest, Type::UnexpectedChar))
+                Ok((rest, Segment::UnexpectedChar))
             }
         }
     }
     fn parse_string<'a>(
         &mut self,
-        type_: Type,
+        segment: Segment,
         quote: char,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         println!("{quote:?} {input:?}");
         while let (Some(c), rest) = take(input, eof)? {
             match c {
@@ -611,7 +615,7 @@ impl Segmenter {
                     let (c, rest2) = take(rest, eof)?;
                     if c != Some(quote) {
                         self.state.1 = Substate::empty();
-                        return Ok((rest, type_));
+                        return Ok((rest, segment));
                     }
                     input = rest2;
                 }
@@ -620,16 +624,16 @@ impl Segmenter {
             }
         }
         self.state.1 = Substate::empty();
-        Ok((input, Type::ExpectedQuote))
+        Ok((input, Segment::ExpectedQuote))
     }
     fn maybe_parse_string<'a>(
         &mut self,
-        type_: Type,
+        segment: Segment,
         input: (&'a str, &'a str),
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         match take(input.1, eof)? {
-            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(type_, c, rest, eof),
+            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof),
             _ => self.parse_id(input.0, eof),
         }
     }
@@ -640,40 +644,44 @@ impl Segmenter {
     ) -> Result<(&'a str, &'a str), Incomplete> {
         let mut sub = Segmenter::new(self.mode, true);
         loop {
-            let (rest, type_) = sub.push(input, eof)?;
-            match type_ {
-                Type::Shbang | Type::Spaces | Type::Comment | Type::Newline => (),
+            let (rest, segment) = sub.push(input, eof)?;
+            match segment {
+                Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (),
 
-                Type::Identifier => return Ok((&input[..input.len() - rest.len()], rest)),
+                Segment::Identifier => return Ok((&input[..input.len() - rest.len()], rest)),
 
-                Type::Number
-                | Type::QuotedString
-                | Type::HexString
-                | Type::UnicodeString
-                | Type::UnquotedString
-                | Type::ReservedWord
-                | Type::Punct
-                | Type::CommentCommand
-                | Type::DoRepeatCommand
-                | Type::DoRepeatOverflow
-                | Type::InlineData
-                | Type::MacroId
-                | Type::MacroName
-                | Type::MacroBody
-                | Type::StartDocument
-                | Type::Document
-                | Type::StartCommand
-                | Type::SeparateCommands
-                | Type::EndCommand
-                | Type::End
-                | Type::ExpectedQuote
-                | Type::ExpectedExponent
-                | Type::UnexpectedChar => return Ok(("", rest)),
+                Segment::Number
+                | Segment::QuotedString
+                | Segment::HexString
+                | Segment::UnicodeString
+                | Segment::UnquotedString
+                | Segment::ReservedWord
+                | Segment::Punct
+                | Segment::CommentCommand
+                | Segment::DoRepeatCommand
+                | Segment::DoRepeatOverflow
+                | Segment::InlineData
+                | Segment::MacroId
+                | Segment::MacroName
+                | Segment::MacroBody
+                | Segment::StartDocument
+                | Segment::Document
+                | Segment::StartCommand
+                | Segment::SeparateCommands
+                | Segment::EndCommand
+                | Segment::End
+                | Segment::ExpectedQuote
+                | Segment::ExpectedExponent
+                | Segment::UnexpectedChar => return Ok(("", rest)),
             }
             input = rest;
         }
     }
-    fn parse_id<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+    fn parse_id<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let (Some(_), mut end) = take(input, eof).unwrap() else {
             unreachable!()
         };
@@ -696,18 +704,18 @@ impl Segmenter {
                 return self.parse_comment_1(input, eof);
             } else if id_match("DOCUMENT", identifier) {
                 self.state.0 = State::Document1;
-                return Ok((input, Type::StartDocument));
+                return Ok((input, Segment::StartDocument));
             } else if id_match_n("DEFINE", identifier, 6) {
                 self.state.0 = State::Define1;
             } else if id_match("FILE", identifier) {
                 if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::FileLabel1, Substate::empty());
-                    return Ok((rest, Type::Identifier));
+                    return Ok((rest, Segment::Identifier));
                 }
             } else if id_match("DO", identifier) {
                 if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::DoRepeat1, Substate::empty());
-                    return Ok((rest, Type::Identifier));
+                    return Ok((rest, Segment::Identifier));
                 }
             } else if id_match("BEGIN", identifier) {
                 let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
@@ -728,28 +736,28 @@ impl Segmenter {
                             },
                             Substate::empty(),
                         );
-                        return Ok((rest, Type::Identifier));
+                        return Ok((rest, Segment::Identifier));
                     }
                 }
             }
         }
 
         self.state.1 = Substate::empty();
-        let type_ = if is_reserved_word(identifier) {
-            Type::ReservedWord
+        let segment = if is_reserved_word(identifier) {
+            Segment::ReservedWord
         } else if identifier.starts_with('!') {
-            Type::MacroId
+            Segment::MacroId
         } else {
-            Type::Identifier
+            Segment::Identifier
         };
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn parse_digraph<'a>(
         &mut self,
         seconds: &[char],
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let (c, rest) = take(input, eof)?;
         self.state.1 = Substate::empty();
         Ok((
@@ -757,14 +765,14 @@ impl Segmenter {
                 Some(c) if seconds.contains(&c) => rest,
                 _ => input,
             },
-            Type::Punct,
+            Segment::Punct,
         ))
     }
     fn parse_number<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let mut input = skip_digits(input, eof)?;
         if let Some(rest) = match_char(|c| c == '.', input, eof)? {
             let rest2 = skip_digits(rest, eof)?;
@@ -777,18 +785,18 @@ impl Segmenter {
             let rest2 = skip_digits(rest, eof)?;
             if rest2.len() == rest.len() {
                 self.state.1 = Substate::empty();
-                return Ok((rest, Type::ExpectedExponent));
+                return Ok((rest, Segment::ExpectedExponent));
             }
             input = rest2;
         }
         self.state.1 = Substate::empty();
-        Ok((input, Type::Number))
+        Ok((input, Segment::Number))
     }
     fn parse_comment_1<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         enum CommentState<'a> {
             Blank,
             NotBlank,
@@ -799,7 +807,7 @@ impl Segmenter {
             let (Some(c), rest) = take(input, eof)? else {
                 // End of file.
                 self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok((input, Type::SeparateCommands));
+                return Ok((input, Segment::SeparateCommands));
             };
             match c {
                 '.' => state = CommentState::Period(input),
@@ -808,17 +816,17 @@ impl Segmenter {
                         CommentState::Blank => {
                             // Blank line ends comment command.
                             self.state = (State::General, Substate::START_OF_COMMAND);
-                            return Ok((input, Type::SeparateCommands));
+                            return Ok((input, Segment::SeparateCommands));
                         }
                         CommentState::Period(period) => {
                             // '.' at end of line ends comment command.
                             self.state = (State::General, Substate::empty());
-                            return Ok((period, Type::CommentCommand));
+                            return Ok((period, Segment::CommentCommand));
                         }
                         CommentState::NotBlank => {
                             // Comment continues onto next line.
                             self.state = (State::Comment2, Substate::empty());
-                            return Ok((input, Type::CommentCommand));
+                            return Ok((input, Segment::CommentCommand));
                         }
                     }
                 }
@@ -832,7 +840,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
 
         let new_command = match take(rest, eof)?.0 {
@@ -848,18 +856,18 @@ impl Segmenter {
         } else {
             self.state.0 = State::Comment1;
         }
-        Ok((rest, Type::Newline))
+        Ok((rest, Segment::Newline))
     }
     fn parse_document_1<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let mut end_cmd = false;
         loop {
             let (Some(c), rest) = take(input, eof)? else {
                 self.state.0 = State::Document3;
-                return Ok((input, Type::Document));
+                return Ok((input, Segment::Document));
             };
             match c {
                 '.' => end_cmd = true,
@@ -869,7 +877,7 @@ impl Segmenter {
                     } else {
                         State::Document2
                     };
-                    return Ok((input, Type::Document));
+                    return Ok((input, Segment::Document));
                 }
                 c if !c.is_whitespace() => end_cmd = false,
                 _ => (),
@@ -881,21 +889,21 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state.0 = State::Document1;
-        Ok((rest, Type::Newline))
+        Ok((rest, Segment::Newline))
     }
     fn parse_document_3<'a>(
         &mut self,
         input: &'a str,
         _eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         self.state = (
             State::General,
             Substate::START_OF_COMMAND | Substate::START_OF_LINE,
         );
-        Ok((input, Type::EndCommand))
+        Ok((input, Segment::EndCommand))
     }
     fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
         let input = skip_spaces_and_comments(input, eof)?;
@@ -908,13 +916,13 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let mut sub = Segmenter {
             state: (State::General, self.state.1),
             ..*self
         };
-        let (rest, type_) = sub.push(input, eof)?;
-        if type_ == Type::Identifier {
+        let (rest, segment) = sub.push(input, eof)?;
+        if segment == Segment::Identifier {
             let id = &input[..input.len() - rest.len()];
             debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
             if Self::quoted_file_label(rest, eof)? {
@@ -925,29 +933,29 @@ impl Segmenter {
         } else {
             self.state.1 = sub.state.1;
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn parse_file_label_2<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let input = skip_spaces(input, eof)?;
         self.state.0 = State::FileLabel3;
-        Ok((input, Type::Spaces))
+        Ok((input, Segment::Spaces))
     }
     fn parse_file_label_3<'a>(
         &mut self,
         mut input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let mut end_cmd = None;
         loop {
             let (c, rest) = take(input, eof)?;
             match c {
                 None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
                     self.state = (State::General, Substate::empty());
-                    return Ok((end_cmd.unwrap_or(input), Type::UnquotedString));
+                    return Ok((end_cmd.unwrap_or(input), Segment::UnquotedString));
                 }
                 None => unreachable!(),
                 Some('.') => end_cmd = Some(input),
@@ -957,7 +965,11 @@ impl Segmenter {
             input = rest;
         }
     }
-    fn subparse<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> {
+    fn subparse<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let mut sub = Segmenter {
             mode: self.mode,
             state: (State::General, self.state.1),
@@ -974,17 +986,17 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::SeparateCommands {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        if segment == Segment::SeparateCommands {
             // We reached a blank line that separates the head from the body.
             self.state.0 = State::DoRepeat2;
-        } else if type_ == Type::EndCommand || type_ == Type::StartCommand {
+        } else if segment == Segment::EndCommand || segment == Segment::StartCommand {
             // We reached the body.
             self.state.0 = State::DoRepeat3;
             self.nest = 1;
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     /// We are segmenting a `DO REPEAT` command, currently reading a blank line
     /// that separates the head from the body.
@@ -992,14 +1004,14 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        if segment == Segment::Newline {
             // We reached the body.
             self.state.0 = State::DoRepeat3;
             self.nest = 1;
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn parse_newline<'a>(
         &mut self,
@@ -1057,9 +1069,9 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         if let Some(rest) = self.parse_newline(input, eof)? {
-            return Ok((rest, Type::Newline));
+            return Ok((rest, Segment::Newline));
         }
         let rest = self.parse_full_line(input, eof)?;
         let direction = self.check_repeat_command(input, eof)?;
@@ -1081,11 +1093,11 @@ impl Segmenter {
                 return self.push(input, eof);
             }
         }
-        return Ok((rest, Type::DoRepeatCommand));
+        return Ok((rest, Segment::DoRepeatCommand));
     }
-    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Type), Incomplete> {
+    fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Segment), Incomplete> {
         self.state.0 = State::DoRepeat3;
-        Ok((input, Type::DoRepeatOverflow))
+        Ok((input, Segment::DoRepeatOverflow))
     }
     /// We are segmenting a `DEFINE` command, which consists of:
     ///
@@ -1109,44 +1121,44 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        match type_ {
-            Type::Identifier | Type::MacroId if self.state.0 == State::Define1 => {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        match segment {
+            Segment::Identifier | Segment::MacroId if self.state.0 == State::Define1 => {
                 self.state.0 = State::Define2;
-                return Ok((rest, Type::MacroName));
+                return Ok((rest, Segment::MacroName));
             }
-            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
+            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
                 // The DEFINE command is malformed because we reached its end
                 // without ever hitting a `(` token.  Transition back to general
                 // parsing.
                 self.state.0 = State::General;
             }
-            Type::Punct if input.starts_with('(') => {
+            Segment::Punct if input.starts_with('(') => {
                 self.state.0 = State::Define3;
                 self.nest = 1;
             }
             _ => (),
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn parse_define_3<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        match type_ {
-            Type::SeparateCommands | Type::EndCommand | Type::StartCommand => {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        match segment {
+            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
                 // The DEFINE command is malformed because we reached its end
                 // without ever hitting a `(` token.  Transition back to general
                 // parsing.
                 self.state.0 = State::General;
             }
-            Type::Punct if input.starts_with('(') => {
+            Segment::Punct if input.starts_with('(') => {
                 self.nest += 1;
             }
-            Type::Punct if input.starts_with(')') => {
+            Segment::Punct if input.starts_with(')') => {
                 self.nest -= 1;
                 if self.nest == 0 {
                     self.state = (State::Define4, Substate::empty());
@@ -1154,7 +1166,7 @@ impl Segmenter {
             }
             _ => (),
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> {
         loop {
@@ -1185,7 +1197,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_full_line(input, eof)?;
         let line = &input[..input.len() - rest.len()];
         if let Some(end) = Self::find_enddefine(line) {
@@ -1197,10 +1209,10 @@ impl Segmenter {
                 self.push(input, eof)
             } else if prefix.trim_start().is_empty() {
                 // Line starts with spaces followed by `!ENDDEFINE`.
-                Ok((rest, Type::Spaces))
+                Ok((rest, Segment::Spaces))
             } else {
                 // Line starts with some content followed by `!ENDDEFINE`.
-                Ok((rest, Type::MacroBody))
+                Ok((rest, Segment::MacroBody))
             }
         } else {
             // No `!ENDDEFINE`.  We have a full line of macro body.
@@ -1211,48 +1223,48 @@ impl Segmenter {
             //
             // However, if it's a later line, we need to report it because blank
             // lines can have significance.
-            let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
+            let segment = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
                 if line.is_empty() {
                     return self.parse_define_6(input, eof);
                 }
-                Type::Spaces
+                Segment::Spaces
             } else {
-                Type::MacroBody
+                Segment::MacroBody
             };
             self.state.0 = State::Define6;
-            Ok((rest, type_))
+            Ok((rest, segment))
         }
     }
     fn parse_define_6<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state.0 = State::Define5;
-        Ok((rest, Type::Newline))
+        Ok((rest, Segment::Newline))
     }
     fn parse_begin_data_1<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        if segment == Segment::Newline {
             self.state.0 = State::BeginData2;
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn parse_begin_data_2<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
-        let (rest, type_) = self.subparse(input, eof)?;
-        if type_ == Type::Newline {
+    ) -> Result<(&'a str, Segment), Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?;
+        if segment == Segment::Newline {
             self.state.0 = State::BeginData3;
         }
-        Ok((rest, type_))
+        Ok((rest, segment))
     }
     fn is_end_data(line: &str) -> bool {
         let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
@@ -1283,7 +1295,7 @@ impl Segmenter {
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_full_line(input, eof)?;
         let line = &input[..input.len() - rest.len()];
         if Self::is_end_data(line) {
@@ -1294,17 +1306,17 @@ impl Segmenter {
             self.push(input, eof)
         } else {
             self.state.0 = State::BeginData4;
-            Ok((rest, Type::InlineData))
+            Ok((rest, Segment::InlineData))
         }
     }
     fn parse_begin_data_4<'a>(
         &mut self,
         input: &'a str,
         eof: bool,
-    ) -> Result<(&'a str, Type), Incomplete> {
+    ) -> Result<(&'a str, Segment), Incomplete> {
         let rest = self.parse_newline(input, eof)?.unwrap();
         self.state.0 = State::BeginData3;
-        Ok((rest, Type::Newline))
+        Ok((rest, Segment::Newline))
     }
 }
 
index d24523f56ed114ef110e2d9856dce00afa885a3c..d01a80d7796f106455b410cfa980ae1013c30583 100644 (file)
@@ -1,12 +1,12 @@
 use crate::prompt::PromptStyle;
 
-use super::{Mode, Segmenter, Type};
+use super::{Mode, Segmenter, Segment};
 
-fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Type) {
+fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Segment) {
     if one_byte {
         for len in input.char_indices().map(|(pos, _c)| pos) {
-            if let Ok((rest, type_)) = segmenter.push(&input[..len], false) {
-                return (&input[len - rest.len()..], type_);
+            if let Ok((rest, segment)) = segmenter.push(&input[..len], false) {
+                return (&input[len - rest.len()..], segment);
             }
         }
     }
@@ -16,7 +16,7 @@ fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -
 fn _check_segmentation(
     mut input: &str,
     mode: Mode,
-    expect_segments: &[(Type, &str)],
+    expect_segments: &[(Segment, &str)],
     expect_prompts: &[PromptStyle],
     one_byte: bool,
 ) {
@@ -24,13 +24,13 @@ fn _check_segmentation(
     let mut prompts = Vec::new();
     let mut segmenter = Segmenter::new(mode, false);
     loop {
-        let (rest, type_) = push_segment(&mut segmenter, input, one_byte);
+        let (rest, segment) = push_segment(&mut segmenter, input, one_byte);
         let len = input.len() - rest.len();
         let token = &input[..len];
-        segments.push((type_, token));
-        match type_ {
-            Type::End => break,
-            Type::Newline => prompts.push(segmenter.prompt()),
+        segments.push((segment, token));
+        match segment {
+            Segment::End => break,
+            Segment::Newline => prompts.push(segmenter.prompt()),
             _ => (),
         }
         input = rest;
@@ -66,7 +66,7 @@ fn _check_segmentation(
 fn check_segmentation(
     input: &str,
     mode: Mode,
-    expect_segments: &[(Type, &str)],
+    expect_segments: &[(Segment, &str)],
     expect_prompts: &[PromptStyle],
 ) {
     for (one_byte, one_byte_name) in [(false, "full-string"), (true, "byte-by-byte")] {
@@ -79,9 +79,9 @@ fn check_segmentation(
             mode,
             &expect_segments
                 .iter()
-                .map(|(type_, s)| match *type_ {
-                    Type::Newline => (Type::Newline, "\r\n"),
-                    _ => (*type_, *s),
+                .map(|(segment, s)| match *segment {
+                    Segment::Newline => (Segment::Newline, "\r\n"),
+                    _ => (*segment, *s),
                 })
                 .collect::<Vec<_>>(),
             expect_prompts,
@@ -91,13 +91,13 @@ fn check_segmentation(
         if let Some(input) = input.strip_suffix('\n') {
             println!("running {one_byte_name} segmentation test without final newline...");
             let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect();
-            assert_eq!(expect_segments.pop(), Some((Type::End, "")));
-            assert_eq!(expect_segments.pop(), Some((Type::Newline, "\n")));
-            while let Some((Type::SeparateCommands | Type::EndCommand, "")) = expect_segments.last()
+            assert_eq!(expect_segments.pop(), Some((Segment::End, "")));
+            assert_eq!(expect_segments.pop(), Some((Segment::Newline, "\n")));
+            while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) = expect_segments.last()
             {
                 expect_segments.pop();
             }
-            expect_segments.push((Type::End, ""));
+            expect_segments.push((Segment::End, ""));
             _check_segmentation(
                 input,
                 mode,
@@ -109,16 +109,17 @@ fn check_segmentation(
     }
 }
 
+#[allow(dead_code)]
 fn print_segmentation(mut input: &str) {
     let mut segmenter = Segmenter::new(Mode::Auto, false);
     loop {
-        let (rest, type_) = segmenter.push(input, true).unwrap();
+        let (rest, segment) = segmenter.push(input, true).unwrap();
         let len = input.len() - rest.len();
         let token = &input[..len];
-        print!("{type_:?} {token:?}");
-        match type_ {
-            Type::Newline => print!(" ({:?})", segmenter.prompt()),
-            Type::End => break,
+        print!("{segment:?} {token:?}");
+        match segment {
+            Segment::Newline => print!(" ({:?})", segmenter.prompt()),
+            Segment::End => break,
             _ => (),
         }
         println!();
@@ -143,93 +144,93 @@ GhIjK
 "#,
         Mode::Auto,
         &[
-            (Type::Identifier, "a"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ab"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "abc"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "abcd"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!abcd"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "A"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "AB"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ABC"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ABCD"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!ABCD"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "aB"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "aBC"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "aBcD"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!aBcD"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "$x"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "$y"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "$z"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!$z"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "grève"),
-            (Type::Spaces, "\u{00a0}"),
-            (Type::Identifier, "Ângstrom"),
-            (Type::Spaces, "\u{00a0}"),
-            (Type::Identifier, "poté"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "#a"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#b"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#c"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "##"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#d"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!#d"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "@efg"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "@"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "@@."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "@#@"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!@"),
-            (Type::Spaces, " "),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "##"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#12345"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#.#"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "f@#_.#6"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "GhIjK"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Identifier, "x"),
-            (Type::Spaces, " "),
-            (Type::Number, "1"),
-            (Type::Identifier, "y"),
-            (Type::Spaces, " "),
-            (Type::Punct, "_"),
-            (Type::Identifier, "z"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "a"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ab"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "abc"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "abcd"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!abcd"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "A"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "AB"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ABC"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ABCD"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!ABCD"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "aB"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "aBC"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "aBcD"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!aBcD"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "$x"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "$y"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "$z"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!$z"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "grève"),
+            (Segment::Spaces, "\u{00a0}"),
+            (Segment::Identifier, "Ângstrom"),
+            (Segment::Spaces, "\u{00a0}"),
+            (Segment::Identifier, "poté"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "#a"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#b"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#c"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "##"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#d"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!#d"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "@efg"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "@"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "@@."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "@#@"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!@"),
+            (Segment::Spaces, " "),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "##"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#12345"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#.#"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "f@#_.#6"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "GhIjK"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Identifier, "x"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "1"),
+            (Segment::Identifier, "y"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "_"),
+            (Segment::Identifier, "z"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -270,82 +271,82 @@ WxYz./* unterminated end of line comment
 "#,
         Mode::Auto,
         &[
-            (Type::Identifier, "abcd."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "abcd"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "ABCD."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ABCD"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "aBcD."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "aBcD"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "$y."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "$z."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "あいうえお"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "#c."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#d."),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "@@."),
-            (Type::Spaces, " "),
-            (Type::Identifier, "@@..."),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "#.#"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "#abcd"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "LMNOP"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "QRSTUV"),
-            (Type::EndCommand, "."),
-            (Type::Comment, "/* end of line comment */"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "qrstuv"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* end of line comment */"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "QrStUv"),
-            (Type::EndCommand, "."),
-            (Type::Comment, "/* end of line comment */"),
-            (Type::Spaces, " "),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "wxyz"),
-            (Type::EndCommand, "."),
-            (Type::Comment, "/* unterminated end of line comment"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "WXYZ"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* unterminated end of line comment"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "WxYz"),
-            (Type::EndCommand, "."),
-            (Type::Comment, "/* unterminated end of line comment "),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "abcd."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "abcd"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "ABCD."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ABCD"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "aBcD."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "aBcD"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "$y."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "$z."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "あいうえお"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "#c."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#d."),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "@@."),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "@@..."),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "#.#"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "#abcd"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "LMNOP"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "QRSTUV"),
+            (Segment::EndCommand, "."),
+            (Segment::Comment, "/* end of line comment */"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "qrstuv"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* end of line comment */"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "QrStUv"),
+            (Segment::EndCommand, "."),
+            (Segment::Comment, "/* end of line comment */"),
+            (Segment::Spaces, " "),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "wxyz"),
+            (Segment::EndCommand, "."),
+            (Segment::Comment, "/* unterminated end of line comment"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "WXYZ"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* unterminated end of line comment"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "WxYz"),
+            (Segment::EndCommand, "."),
+            (Segment::Comment, "/* unterminated end of line comment "),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -379,90 +380,90 @@ and. with.
 "#,
         Mode::Auto,
         &[
-            (Type::ReservedWord, "and"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "or"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "not"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "eq"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "ge"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "gt"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "le"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "lt"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "ne"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "all"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "by"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "to"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "with"),
-            (Type::Newline, "\n"),
-            (Type::ReservedWord, "AND"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "OR"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "NOT"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "EQ"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "GE"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "GT"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "LE"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "LT"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "NE"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "ALL"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "BY"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "TO"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "WITH"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "andx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "orx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "notx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "eqx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "gex"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "gtx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "lex"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ltx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "nex"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "allx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "byx"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "tox"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "withx"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "and."),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "with"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::ReservedWord, "and"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "or"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "not"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "eq"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "ge"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "gt"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "le"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "lt"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "ne"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "all"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "by"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "to"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "with"),
+            (Segment::Newline, "\n"),
+            (Segment::ReservedWord, "AND"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "OR"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "NOT"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "EQ"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "GE"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "GT"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "LE"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "LT"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "NE"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "ALL"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "BY"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "TO"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "WITH"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "andx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "orx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "notx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "eqx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "gex"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "gtx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "lex"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ltx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "nex"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "allx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "byx"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "tox"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "withx"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "and."),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "with"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -482,89 +483,89 @@ fn test_punctuation() {
 "#,
         Mode::Auto,
         &[
-            (Type::Punct, "~"),
-            (Type::Spaces, " "),
-            (Type::Punct, "&"),
-            (Type::Spaces, " "),
-            (Type::Punct, "|"),
-            (Type::Spaces, " "),
-            (Type::Punct, "="),
-            (Type::Spaces, " "),
-            (Type::Punct, ">="),
-            (Type::Spaces, " "),
-            (Type::Punct, ">"),
-            (Type::Spaces, " "),
-            (Type::Punct, "<="),
-            (Type::Spaces, " "),
-            (Type::Punct, "<"),
-            (Type::Spaces, " "),
-            (Type::Punct, "~="),
-            (Type::Spaces, " "),
-            (Type::Punct, "<>"),
-            (Type::Spaces, " "),
-            (Type::Punct, "("),
-            (Type::Spaces, " "),
-            (Type::Punct, ")"),
-            (Type::Spaces, " "),
-            (Type::Punct, ","),
-            (Type::Spaces, " "),
-            (Type::Punct, "-"),
-            (Type::Spaces, " "),
-            (Type::Punct, "+"),
-            (Type::Spaces, " "),
-            (Type::Punct, "*"),
-            (Type::Spaces, " "),
-            (Type::Punct, "/"),
-            (Type::Spaces, " "),
-            (Type::Punct, "["),
-            (Type::Spaces, " "),
-            (Type::Punct, "]"),
-            (Type::Spaces, " "),
-            (Type::Punct, "**"),
-            (Type::Newline, "\n"),
-            (Type::Punct, "~"),
-            (Type::Punct, "&"),
-            (Type::Punct, "|"),
-            (Type::Punct, "="),
-            (Type::Punct, ">="),
-            (Type::Punct, ">"),
-            (Type::Punct, "<="),
-            (Type::Punct, "<"),
-            (Type::Punct, "~="),
-            (Type::Punct, "<>"),
-            (Type::Punct, "("),
-            (Type::Punct, ")"),
-            (Type::Punct, ","),
-            (Type::Punct, "-"),
-            (Type::Punct, "+"),
-            (Type::Punct, "*"),
-            (Type::Punct, "/"),
-            (Type::Punct, "["),
-            (Type::Punct, "]"),
-            (Type::Punct, "**"),
-            (Type::MacroId, "!*"),
-            (Type::Newline, "\n"),
-            (Type::Punct, "%"),
-            (Type::Spaces, " "),
-            (Type::Punct, ":"),
-            (Type::Spaces, " "),
-            (Type::Punct, ";"),
-            (Type::Spaces, " "),
-            (Type::Punct, "?"),
-            (Type::Spaces, " "),
-            (Type::Punct, "_"),
-            (Type::Spaces, " "),
-            (Type::Punct, "`"),
-            (Type::Spaces, " "),
-            (Type::Punct, "{"),
-            (Type::Spaces, " "),
-            (Type::Punct, "}"),
-            (Type::Spaces, " "),
-            (Type::Punct, "~"),
-            (Type::Spaces, " "),
-            (Type::MacroId, "!*"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Punct, "~"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "&"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "|"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "="),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ">="),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ">"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "<="),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "<"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "~="),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "<>"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "("),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ")"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ","),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "-"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "+"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "*"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "/"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "["),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "]"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "**"),
+            (Segment::Newline, "\n"),
+            (Segment::Punct, "~"),
+            (Segment::Punct, "&"),
+            (Segment::Punct, "|"),
+            (Segment::Punct, "="),
+            (Segment::Punct, ">="),
+            (Segment::Punct, ">"),
+            (Segment::Punct, "<="),
+            (Segment::Punct, "<"),
+            (Segment::Punct, "~="),
+            (Segment::Punct, "<>"),
+            (Segment::Punct, "("),
+            (Segment::Punct, ")"),
+            (Segment::Punct, ","),
+            (Segment::Punct, "-"),
+            (Segment::Punct, "+"),
+            (Segment::Punct, "*"),
+            (Segment::Punct, "/"),
+            (Segment::Punct, "["),
+            (Segment::Punct, "]"),
+            (Segment::Punct, "**"),
+            (Segment::MacroId, "!*"),
+            (Segment::Newline, "\n"),
+            (Segment::Punct, "%"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ":"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, ";"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "?"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "_"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "`"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "{"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "}"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "~"),
+            (Segment::Spaces, " "),
+            (Segment::MacroId, "!*"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later],
     );
@@ -583,78 +584,78 @@ fn test_positive_numbers() {
 "#,
         Mode::Auto,
         &[
-            (Type::Number, "0"),
-            (Type::Spaces, " "),
-            (Type::Number, "1"),
-            (Type::Spaces, " "),
-            (Type::Number, "01"),
-            (Type::Spaces, " "),
-            (Type::Number, "001."),
-            (Type::Spaces, " "),
-            (Type::Number, "1"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Number, "123"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* comment 1 */"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* comment 2 */"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Number, "1"),
-            (Type::Spaces, " "),
-            (Type::Number, "0.1"),
-            (Type::Spaces, " "),
-            (Type::Number, "00.1"),
-            (Type::Spaces, " "),
-            (Type::Number, "00.10"),
-            (Type::Newline, "\n"),
-            (Type::Number, "5e1"),
-            (Type::Spaces, " "),
-            (Type::Number, "6E-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "7e+1"),
-            (Type::Spaces, " "),
-            (Type::Number, "6E+01"),
-            (Type::Spaces, " "),
-            (Type::Number, "6e-03"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Number, "3E1"),
-            (Type::Spaces, " "),
-            (Type::Number, ".4e-1"),
-            (Type::Spaces, " "),
-            (Type::Number, ".5E+1"),
-            (Type::Spaces, " "),
-            (Type::Number, ".6e+01"),
-            (Type::Spaces, " "),
-            (Type::Number, ".7E-03"),
-            (Type::Newline, "\n"),
-            (Type::Number, "1.23e1"),
-            (Type::Spaces, " "),
-            (Type::Number, "45.6E-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "78.9e+1"),
-            (Type::Spaces, " "),
-            (Type::Number, "99.9E+01"),
-            (Type::Spaces, " "),
-            (Type::Number, "11.2e-03"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "1e"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "e1"),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "1e+"),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "1e-"),
-            (Type::Spaces, " "),
-            (Type::Number, "1"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Number, "0"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "001."),
+            (Segment::Spaces, " "),
+            (Segment::Number, "1"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Number, "123"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* comment 1 */"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* comment 2 */"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Number, "1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "0.1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "00.1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "00.10"),
+            (Segment::Newline, "\n"),
+            (Segment::Number, "5e1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "6E-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "7e+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "6E+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "6e-03"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Number, "3E1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, ".4e-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, ".5E+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, ".6e+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, ".7E-03"),
+            (Segment::Newline, "\n"),
+            (Segment::Number, "1.23e1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "45.6E-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "78.9e+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "99.9E+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "11.2e-03"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "1e"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "e1"),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "1e+"),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "1e-"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "1"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -682,90 +683,90 @@ fn test_negative_numbers() {
 "#,
         Mode::Auto,
         &[
-            (Type::Spaces, " "),
-            (Type::Number, "-0"),
-            (Type::Spaces, " "),
-            (Type::Number, "-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-01"),
-            (Type::Spaces, " "),
-            (Type::Number, "-001."),
-            (Type::Spaces, " "),
-            (Type::Number, "-1"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Number, "-123"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* comment 1 */"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* comment 2 */"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-0.1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-00.1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-00.10"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Number, "-5e1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-6E-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-7e+1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-6E+01"),
-            (Type::Spaces, " "),
-            (Type::Number, "-6e-03"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.3E1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.4e-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.5E+1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.6e+01"),
-            (Type::Spaces, " "),
-            (Type::Number, "-.7E-03"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Number, "-1.23e1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-45.6E-1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-78.9e+1"),
-            (Type::Spaces, " "),
-            (Type::Number, "-99.9E+01"),
-            (Type::Spaces, " "),
-            (Type::Number, "-11.2e-03"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Punct, "-"),
-            (Type::Comment, "/**/"),
-            (Type::Number, "1"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Punct, "-"),
-            (Type::Punct, "."),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "-1e"),
-            (Type::Spaces, " "),
-            (Type::Punct, "-"),
-            (Type::Identifier, "e1"),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "-1e+"),
-            (Type::Spaces, " "),
-            (Type::ExpectedExponent, "-1e-"),
-            (Type::Spaces, " "),
-            (Type::Number, "-1"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-0"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-001."),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-1"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-123"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* comment 1 */"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* comment 2 */"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-0.1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-00.1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-00.10"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-5e1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-6E-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-7e+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-6E+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-6e-03"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.3E1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.4e-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.5E+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.6e+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-.7E-03"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-1.23e1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-45.6E-1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-78.9e+1"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-99.9E+01"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-11.2e-03"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "-"),
+            (Segment::Comment, "/**/"),
+            (Segment::Number, "1"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "-"),
+            (Segment::Punct, "."),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "-1e"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "-"),
+            (Segment::Identifier, "e1"),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "-1e+"),
+            (Segment::Spaces, " "),
+            (Segment::ExpectedExponent, "-1e-"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "-1"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -798,59 +799,59 @@ u'fffd' U"041"
 "#,
         Mode::Auto,
         &[
-            (Type::QuotedString, "'x'"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "\"y\""),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'abc'"),
-            (Type::Newline, "\n"),
-            (Type::QuotedString, "'Don''t'"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "\"Can't\""),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'Won''t'"),
-            (Type::Newline, "\n"),
-            (Type::QuotedString, "\"\"\"quoted\"\"\""),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'\"quoted\"'"),
-            (Type::Newline, "\n"),
-            (Type::QuotedString, "''"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "\"\""),
-            (Type::Newline, "\n"),
-            (Type::ExpectedQuote, "'missing end quote"),
-            (Type::Newline, "\n"),
-            (Type::ExpectedQuote, "\"missing double quote"),
-            (Type::Newline, "\n"),
-            (Type::HexString, "x\"4142\""),
-            (Type::Spaces, " "),
-            (Type::HexString, "X'5152'"),
-            (Type::Newline, "\n"),
-            (Type::UnicodeString, "u'fffd'"),
-            (Type::Spaces, " "),
-            (Type::UnicodeString, "U\"041\""),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "+"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "new"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::Punct, "+"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* comment */"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'string continuation'"),
-            (Type::Newline, "\n"),
-            (Type::Punct, "+"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/* also a punctuator on blank line"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "-"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'new command'"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::QuotedString, "'x'"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "\"y\""),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'abc'"),
+            (Segment::Newline, "\n"),
+            (Segment::QuotedString, "'Don''t'"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "\"Can't\""),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'Won''t'"),
+            (Segment::Newline, "\n"),
+            (Segment::QuotedString, "\"\"\"quoted\"\"\""),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'\"quoted\"'"),
+            (Segment::Newline, "\n"),
+            (Segment::QuotedString, "''"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "\"\""),
+            (Segment::Newline, "\n"),
+            (Segment::ExpectedQuote, "'missing end quote"),
+            (Segment::Newline, "\n"),
+            (Segment::ExpectedQuote, "\"missing double quote"),
+            (Segment::Newline, "\n"),
+            (Segment::HexString, "x\"4142\""),
+            (Segment::Spaces, " "),
+            (Segment::HexString, "X'5152'"),
+            (Segment::Newline, "\n"),
+            (Segment::UnicodeString, "u'fffd'"),
+            (Segment::Spaces, " "),
+            (Segment::UnicodeString, "U\"041\""),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "+"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "new"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::Punct, "+"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* comment */"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'string continuation'"),
+            (Segment::Newline, "\n"),
+            (Segment::Punct, "+"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/* also a punctuator on blank line"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "-"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'new command'"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -878,26 +879,26 @@ title my title.
 "#,
         Mode::Interactive,
         &[
-            (Type::Shbang, "#! /usr/bin/pspp"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "title"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "my"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "title"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "#"),
-            (Type::MacroId, "!"),
-            (Type::Spaces, " "),
-            (Type::Punct, "/"),
-            (Type::Identifier, "usr"),
-            (Type::Punct, "/"),
-            (Type::Identifier, "bin"),
-            (Type::Punct, "/"),
-            (Type::Identifier, "pspp"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Shbang, "#! /usr/bin/pspp"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "title"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "my"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "title"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "#"),
+            (Segment::MacroId, "!"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "/"),
+            (Segment::Identifier, "usr"),
+            (Segment::Punct, "/"),
+            (Segment::Identifier, "bin"),
+            (Segment::Punct, "/"),
+            (Segment::Identifier, "pspp"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[PromptStyle::First, PromptStyle::First, PromptStyle::Later],
     );
@@ -925,61 +926,61 @@ next command.
 "#,
         Mode::Interactive,
         &[
-            (Type::CommentCommand, "* Comment commands \"don't"),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "have to contain valid tokens"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "** Check ambiguity with ** token"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "****************"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "comment keyword works too"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "COMM also"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "com"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "is"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "ambiguous"),
-            (Type::Spaces, " "),
-            (Type::ReservedWord, "with"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "COMPUTE"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "   "),
+            (Segment::CommentCommand, "* Comment commands \"don't"),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "have to contain valid tokens"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "** Check ambiguity with ** token"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "****************"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "comment keyword works too"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "COMM also"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "com"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "is"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "ambiguous"),
+            (Segment::Spaces, " "),
+            (Segment::ReservedWord, "with"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "COMPUTE"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "   "),
             (
-                Type::CommentCommand,
+                Segment::CommentCommand,
                 "* Comment need not start at left margin",
             ),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::CommentCommand, "* Comment ends with blank line"),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "next"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::CommentCommand, "* Comment ends with blank line"),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "next"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Comment,
@@ -1018,36 +1019,36 @@ second paragraph.
 "#,
         Mode::Interactive,
         &[
-            (Type::StartDocument, ""),
-            (Type::Document, "DOCUMENT one line."),
-            (Type::EndCommand, ""),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::StartDocument, ""),
-            (Type::Document, "DOC more"),
-            (Type::Newline, "\n"),
-            (Type::Document, "    than"),
-            (Type::Newline, "\n"),
-            (Type::Document, "        one"),
-            (Type::Newline, "\n"),
-            (Type::Document, "            line."),
-            (Type::EndCommand, ""),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::StartDocument, ""),
-            (Type::Document, "docu"),
-            (Type::Newline, "\n"),
-            (Type::Document, "first.paragraph"),
-            (Type::Newline, "\n"),
-            (Type::Document, "isn't parsed as tokens"),
-            (Type::Newline, "\n"),
-            (Type::Document, ""),
-            (Type::Newline, "\n"),
-            (Type::Document, "second paragraph."),
-            (Type::EndCommand, ""),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::StartDocument, ""),
+            (Segment::Document, "DOCUMENT one line."),
+            (Segment::EndCommand, ""),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::StartDocument, ""),
+            (Segment::Document, "DOC more"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "    than"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "        one"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "            line."),
+            (Segment::EndCommand, ""),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::StartDocument, ""),
+            (Segment::Document, "docu"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "first.paragraph"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "isn't parsed as tokens"),
+            (Segment::Newline, "\n"),
+            (Segment::Document, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Document, "second paragraph."),
+            (Segment::EndCommand, ""),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -1076,34 +1077,34 @@ FILE /*
 "#,
         Mode::Interactive,
         &[
-            (Type::Identifier, "FIL"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "label"),
-            (Type::Spaces, " "),
-            (Type::UnquotedString, "isn't quoted"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "FILE"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "lab"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "'is quoted'"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "FILE"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/*"),
-            (Type::Newline, "\n"),
-            (Type::Comment, "/**/"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "lab"),
-            (Type::Spaces, " "),
-            (Type::UnquotedString, "not quoted here either"),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "FIL"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "label"),
+            (Segment::Spaces, " "),
+            (Segment::UnquotedString, "isn't quoted"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "FILE"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "lab"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "'is quoted'"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "FILE"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/*"),
+            (Segment::Newline, "\n"),
+            (Segment::Comment, "/**/"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "lab"),
+            (Segment::Spaces, " "),
+            (Segment::UnquotedString, "not quoted here either"),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::First,
@@ -1145,92 +1146,92 @@ not data
 "#,
         Mode::Interactive,
         &[
-            (Type::Identifier, "begin"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "begin"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, "/*"),
-            (Type::Newline, "\n"),
-            (Type::InlineData, "123"),
-            (Type::Newline, "\n"),
-            (Type::InlineData, "xxx"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "BEG"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/**/"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "DAT"),
-            (Type::Spaces, " "),
-            (Type::Comment, "/*"),
-            (Type::Newline, "\n"),
-            (Type::InlineData, "5 6 7 /* x"),
-            (Type::Newline, "\n"),
-            (Type::InlineData, ""),
-            (Type::Newline, "\n"),
-            (Type::InlineData, "end  data"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "begin"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::InlineData, "data"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "begin"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::Spaces, " "),
-            (Type::QuotedString, "\"xxx\""),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "begin"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::Spaces, " "),
-            (Type::Number, "123"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::ReservedWord, "not"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "data"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "begin"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "begin"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/*"),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, "123"),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, "xxx"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "BEG"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/**/"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "DAT"),
+            (Segment::Spaces, " "),
+            (Segment::Comment, "/*"),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, "5 6 7 /* x"),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, ""),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, "end  data"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "begin"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::InlineData, "data"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "begin"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::Spaces, " "),
+            (Segment::QuotedString, "\"xxx\""),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "begin"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "123"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::ReservedWord, "not"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "data"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Data,
@@ -1279,62 +1280,62 @@ end repeat.
 "#,
         Mode::Interactive,
         &[
-            (Type::Identifier, "do"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "x"),
-            (Type::Punct, "="),
-            (Type::Identifier, "a"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "b"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "c"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "          "),
-            (Type::Identifier, "y"),
-            (Type::Punct, "="),
-            (Type::Identifier, "d"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "e"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "f"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "  do repeat a=1 thru 5."),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "another command."),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "second command"),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "+ third command."),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "do"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "repeat"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#a"),
-            (Type::Punct, "="),
-            (Type::Number, "1"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "  inner command."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "do"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "x"),
+            (Segment::Punct, "="),
+            (Segment::Identifier, "a"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "b"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "c"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "          "),
+            (Segment::Identifier, "y"),
+            (Segment::Punct, "="),
+            (Segment::Identifier, "d"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "e"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "f"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "  do repeat a=1 thru 5."),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "another command."),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "second command"),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "+ third command."),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "do"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#a"),
+            (Segment::Punct, "="),
+            (Segment::Number, "1"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "  inner command."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -1371,44 +1372,44 @@ fn test_do_repeat_overflow() {
         .map(|s| s.as_str())
         .collect();
     let mut expect_output = vec![
-        (Type::Identifier, "do"),
-        (Type::Spaces, " "),
-        (Type::Identifier, "repeat"),
-        (Type::Spaces, " "),
-        (Type::Identifier, "v0"),
-        (Type::Punct, "="),
-        (Type::Number, "0"),
-        (Type::Spaces, " "),
-        (Type::Identifier, "thru"),
-        (Type::Spaces, " "),
-        (Type::Number, "5"),
-        (Type::EndCommand, "."),
-        (Type::Newline, "\n"),
+        (Segment::Identifier, "do"),
+        (Segment::Spaces, " "),
+        (Segment::Identifier, "repeat"),
+        (Segment::Spaces, " "),
+        (Segment::Identifier, "v0"),
+        (Segment::Punct, "="),
+        (Segment::Number, "0"),
+        (Segment::Spaces, " "),
+        (Segment::Identifier, "thru"),
+        (Segment::Spaces, " "),
+        (Segment::Number, "5"),
+        (Segment::EndCommand, "."),
+        (Segment::Newline, "\n"),
     ];
     for i in 1..N {
-        expect_output.push((Type::DoRepeatCommand, &do_repeat[i].trim_end()));
+        expect_output.push((Segment::DoRepeatCommand, &do_repeat[i].trim_end()));
         if i >= 255 {
-            expect_output.push((Type::DoRepeatOverflow, ""));
+            expect_output.push((Segment::DoRepeatOverflow, ""));
         }
-        expect_output.push((Type::Newline, "\n"));
+        expect_output.push((Segment::Newline, "\n"));
     }
     for i in 0..254 {
-        expect_output.push((Type::DoRepeatCommand, &end_repeat[i].trim_end()));
-        expect_output.push((Type::Newline, "\n"));
+        expect_output.push((Segment::DoRepeatCommand, &end_repeat[i].trim_end()));
+        expect_output.push((Segment::Newline, "\n"));
     }
     let comments: Vec<String> = (0..(N - 254)).rev().map(|i| format!("/* {i}")).collect();
     for comment in &comments {
         expect_output.extend([
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::EndCommand, "."),
-            (Type::Spaces, " "),
-            (Type::Comment, comment),
-            (Type::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::EndCommand, "."),
+            (Segment::Spaces, " "),
+            (Segment::Comment, comment),
+            (Segment::Newline, "\n"),
         ]);
     }
-    expect_output.push((Type::End, ""));
+    expect_output.push((Segment::End, ""));
 
     let expect_prompts: Vec<_> = (0..N * 2 - 3)
         .map(|_| PromptStyle::DoRepeat)
@@ -1437,62 +1438,62 @@ end repeat
 "#,
         Mode::Batch,
         &[
-            (Type::Identifier, "do"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "x"),
-            (Type::Punct, "="),
-            (Type::Identifier, "a"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "b"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "c"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "          "),
-            (Type::Identifier, "y"),
-            (Type::Punct, "="),
-            (Type::Identifier, "d"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "e"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "f"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::DoRepeatCommand, "do repeat a=1 thru 5"),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "another command"),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "second command"),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "+ third command"),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::Identifier, "do"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "repeat"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "#a"),
-            (Type::Punct, "="),
-            (Type::Number, "1"),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::DoRepeatCommand, "  inner command"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "end"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "repeat"),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "do"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "x"),
+            (Segment::Punct, "="),
+            (Segment::Identifier, "a"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "b"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "c"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "          "),
+            (Segment::Identifier, "y"),
+            (Segment::Punct, "="),
+            (Segment::Identifier, "d"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "e"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "f"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::DoRepeatCommand, "do repeat a=1 thru 5"),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "another command"),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "second command"),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "+ third command"),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "end /* x */ /* y */ repeat print"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::Identifier, "do"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "#a"),
+            (Segment::Punct, "="),
+            (Segment::Number, "1"),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::DoRepeatCommand, "  inner command"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "end"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "repeat"),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -1515,7 +1516,7 @@ end repeat
 
 mod define {
     use crate::{
-        lex::segment::{Mode, Type},
+        lex::segment::{Mode, Segment},
         prompt::PromptStyle,
     };
 
@@ -1530,18 +1531,18 @@ var1 var2 var3 "!enddefine"
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "var1 var2 var3 \"!enddefine\""),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "var1 var2 var3 \"!enddefine\""),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First],
         );
@@ -1555,17 +1556,17 @@ var1 var2 var3 "!enddefine"
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::MacroBody, " var1 var2 var3 /* !enddefine"),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::MacroBody, " var1 var2 var3 /* !enddefine"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1579,17 +1580,17 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "var1 var2 var3"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "var1 var2 var3"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1602,16 +1603,16 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::MacroBody, "var1 var2 var3"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::MacroBody, "var1 var2 var3"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::First],
         );
@@ -1625,16 +1626,16 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1650,20 +1651,20 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, ""),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, ""),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, ""),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, ""),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Define,
@@ -1682,29 +1683,29 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Identifier, "a"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Punct, ","),
-                (Type::Spaces, " "),
-                (Type::Identifier, "b"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Punct, ","),
-                (Type::Spaces, " "),
-                (Type::Identifier, "c"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Identifier, "a"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Punct, ","),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "b"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Punct, ","),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "c"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define, PromptStyle::First],
         );
@@ -1722,35 +1723,35 @@ var1 var2 var3!enddefine.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "a"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Punct, ","),
-                (Type::Spaces, " "),
-                (Type::Identifier, "b"),
-                (Type::Punct, "("),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Punct, ")"),
-                (Type::Punct, ","),
-                (Type::Newline, "\n"),
-                (Type::Spaces, "  "),
-                (Type::Identifier, "c"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Newline, "\n"),
+                (Segment::Spaces, "  "),
+                (Segment::Identifier, "a"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Punct, ","),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "b"),
+                (Segment::Punct, "("),
+                (Segment::Newline, "\n"),
+                (Segment::Spaces, "  "),
+                (Segment::Punct, ")"),
+                (Segment::Punct, ","),
+                (Segment::Newline, "\n"),
+                (Segment::Spaces, "  "),
+                (Segment::Identifier, "c"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Later,
@@ -1775,27 +1776,27 @@ content 2
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Newline, "\n"),
-                (Type::Punct, "("),
-                (Type::Identifier, "x"),
-                (Type::Punct, ","),
-                (Type::Identifier, "y"),
-                (Type::Punct, ","),
-                (Type::Identifier, "z"),
-                (Type::Newline, "\n"),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "content 1"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "content 2"),
-                (Type::Newline, "\n"),
-                (Type::MacroId, "!enddefine"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Newline, "\n"),
+                (Segment::Punct, "("),
+                (Segment::Identifier, "x"),
+                (Segment::Punct, ","),
+                (Segment::Identifier, "y"),
+                (Segment::Punct, ","),
+                (Segment::Identifier, "z"),
+                (Segment::Newline, "\n"),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "content 1"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "content 2"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroId, "!enddefine"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Later,
@@ -1816,22 +1817,22 @@ data list /x 1.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "list"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "data"),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "list"),
+                (Segment::Spaces, " "),
+                (Segment::Punct, "/"),
+                (Segment::Identifier, "x"),
+                (Segment::Spaces, " "),
+                (Segment::Number, "1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First],
         );
@@ -1846,24 +1847,24 @@ data list /x 1.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "x"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "list"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "x"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "data"),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "list"),
+                (Segment::Spaces, " "),
+                (Segment::Punct, "/"),
+                (Segment::Identifier, "x"),
+                (Segment::Spaces, " "),
+                (Segment::Number, "1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Later, PromptStyle::First, PromptStyle::First],
         );
@@ -1878,26 +1879,26 @@ data list /x 1.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "x"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "list"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "x"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "data"),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "list"),
+                (Segment::Spaces, " "),
+                (Segment::Punct, "/"),
+                (Segment::Identifier, "x"),
+                (Segment::Spaces, " "),
+                (Segment::Number, "1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First, PromptStyle::First],
         );
@@ -1913,22 +1914,22 @@ data list /x 1.
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::Identifier, "data"),
-                (Type::Spaces, " "),
-                (Type::Identifier, "list"),
-                (Type::Spaces, " "),
-                (Type::Punct, "/"),
-                (Type::Identifier, "x"),
-                (Type::Spaces, " "),
-                (Type::Number, "1"),
-                (Type::EndCommand, "."),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::Identifier, "data"),
+                (Segment::Spaces, " "),
+                (Segment::Identifier, "list"),
+                (Segment::Spaces, " "),
+                (Segment::Punct, "/"),
+                (Segment::Identifier, "x"),
+                (Segment::Spaces, " "),
+                (Segment::Number, "1"),
+                (Segment::EndCommand, "."),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::First, PromptStyle::First],
         );
@@ -1943,17 +1944,17 @@ content line 2
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "content line 1"),
-                (Type::Newline, "\n"),
-                (Type::MacroBody, "content line 2"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "content line 1"),
+                (Segment::Newline, "\n"),
+                (Segment::MacroBody, "content line 2"),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[
                 PromptStyle::Define,
@@ -1970,13 +1971,13 @@ content line 2
 "#,
             Mode::Interactive,
             &[
-                (Type::Identifier, "define"),
-                (Type::Spaces, " "),
-                (Type::MacroName, "!macro1"),
-                (Type::Punct, "("),
-                (Type::Punct, ")"),
-                (Type::Newline, "\n"),
-                (Type::End, ""),
+                (Segment::Identifier, "define"),
+                (Segment::Spaces, " "),
+                (Segment::MacroName, "!macro1"),
+                (Segment::Punct, "("),
+                (Segment::Punct, ")"),
+                (Segment::Newline, "\n"),
+                (Segment::End, ""),
             ],
             &[PromptStyle::Define],
         );
@@ -1996,46 +1997,46 @@ fourth command.
 "#,
         Mode::Batch,
         &[
-            (Type::Identifier, "first"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "     "),
-            (Type::Identifier, "another"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "line"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "of"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "first"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "+"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "second"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::Identifier, "third"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "fourth"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "   "),
-            (Type::Identifier, "fifth"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "first"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "     "),
+            (Segment::Identifier, "another"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "line"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "of"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "first"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "+"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "second"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::Identifier, "third"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "fourth"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "   "),
+            (Segment::Identifier, "fifth"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
@@ -2068,76 +2069,76 @@ fourth command.
 "#,
         Mode::Auto,
         &[
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "     "),
-            (Type::Identifier, "another"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "line"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "of"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::Number, "2"),
-            (Type::Identifier, "sls"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, "+"),
-            (Type::Spaces, "  "),
-            (Type::Identifier, "another"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "another"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "line"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "of"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "second"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::Identifier, "data"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "list"),
-            (Type::Spaces, " "),
-            (Type::Punct, "/"),
-            (Type::Identifier, "x"),
-            (Type::Spaces, " "),
-            (Type::Number, "1"),
-            (Type::Newline, "\n"),
-            (Type::StartCommand, ""),
-            (Type::Identifier, "aggregate"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "print"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "eject"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "twostep"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "cluster"),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::SeparateCommands, ""),
-            (Type::Newline, "\n"),
-            (Type::Identifier, "fourth"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::Spaces, "   "),
-            (Type::Identifier, "fifth"),
-            (Type::Spaces, " "),
-            (Type::Identifier, "command"),
-            (Type::EndCommand, "."),
-            (Type::Newline, "\n"),
-            (Type::End, ""),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "     "),
+            (Segment::Identifier, "another"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "line"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "of"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::Number, "2"),
+            (Segment::Identifier, "sls"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, "+"),
+            (Segment::Spaces, "  "),
+            (Segment::Identifier, "another"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "another"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "line"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "of"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "second"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::Identifier, "data"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "list"),
+            (Segment::Spaces, " "),
+            (Segment::Punct, "/"),
+            (Segment::Identifier, "x"),
+            (Segment::Spaces, " "),
+            (Segment::Number, "1"),
+            (Segment::Newline, "\n"),
+            (Segment::StartCommand, ""),
+            (Segment::Identifier, "aggregate"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "print"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "eject"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "twostep"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "cluster"),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::SeparateCommands, ""),
+            (Segment::Newline, "\n"),
+            (Segment::Identifier, "fourth"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::Spaces, "   "),
+            (Segment::Identifier, "fifth"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "command"),
+            (Segment::EndCommand, "."),
+            (Segment::Newline, "\n"),
+            (Segment::End, ""),
         ],
         &[
             PromptStyle::Later,
diff --git a/rust/src/lex/token.rs b/rust/src/lex/token.rs
new file mode 100644 (file)
index 0000000..0b2021b
--- /dev/null
@@ -0,0 +1,309 @@
+use thiserror::Error as ThisError;
+
+use super::segment::Segment;
+
+pub enum Token {
+    /// End of input.
+    End,
+
+    /// Identifier.
+    Id(String),
+
+    /// Number.
+    Number(f64),
+
+    /// Quoted string.
+    String(String),
+
+    /// Command terminator or separator.
+    ///
+    /// Usually this is `.`, but a blank line also separates commands, and in
+    /// batch mode any line that begins with a non-blank starts a new command.
+    EndCommand,
+
+    /// Operators, punctuators, and reserved words.
+    Punct(Punct),
+
+    /// Tokens that only appear in macros.
+    MacroToken(MacroToken),
+}
+
+pub enum Punct {
+    /// `+`.
+    Plus,
+
+    /// `-`.
+    Dash,
+
+    /// `*`.
+    Asterisk,
+
+    /// `/`.
+    Slash,
+
+    /// `=`.
+    Equals,
+
+    /// `(`.
+    LParen,
+
+    /// `)`.
+    RParen,
+
+    /// `[`.
+    LSquare,
+
+    /// `]`.
+    RSquare,
+
+    /// `{`.
+    LCurly,
+
+    /// `}`.
+    RCurly,
+
+    /// `,`.
+    Comma,
+
+    /// `;`.
+    Semicolon,
+
+    /// `:`.
+    Colon,
+
+    /// `AND` or `&`.
+    And,
+
+    /// `OR` or `|`.
+    Or,
+
+    /// `NOT` or `~`.
+    Not,
+
+    /// `EQ` or `=`.
+    Eq,
+
+    /// `GE` or '>=`
+    Ge,
+
+    /// `GT` or `>`.
+    Gt,
+
+    /// `LE` or `<=`.
+    Le,
+
+    /// `LT` or `<`.
+    Lt,
+
+    /// `NE` or `~=` or `<>`.
+    Ne,
+
+    /// `ALL`.
+    All,
+
+    /// `BY`.
+    By,
+
+    /// `TO`.
+    To,
+
+    /// `WITH`.
+    With,
+
+    /// `**`.
+    Exp,
+}
+
+/// Tokens that only appear in macros.
+pub enum MacroToken {
+    /// Identifier starting with `!`.
+    MacroId(String),
+
+    /// `!`.
+    Bang,
+
+    /// `%`.
+    Percent,
+
+    /// `?`.
+    Question,
+
+    /// ````.
+    Backtick,
+
+    /// `_`.
+    ///
+    /// Although underscores may appear within identifiers, they can't be the
+    /// first character, so this represents an underscore found on its own.
+    Underscore,
+}
+
+#[derive(ThisError, Debug)]
+pub enum TokenError {
+    /// Unterminated string constant.
+    #[error("Unterminated string constant.")]
+    ExpectedQuote,
+
+    /// Missing exponent.
+    #[error("Missing exponent following `{0}`")]
+    ExpectedExponent(String),
+
+    /// Odd length hex string.
+    #[error("String of hex digits has {0} characters, which is not a multiple of 2.")]
+    OddLengthHexString(usize),
+
+    /// Invalid hex digit.
+    #[error("Invalid hex digit {0:?}.")]
+    BadHexDigit(char),
+
+    /// Invalid length Unicode string.
+    #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")]
+    BadLengthUnicodeString(usize),
+
+    /// Invalid code point.
+    #[error("U+{0:04X} is not a valid Unicode code point.")]
+    BadCodePoint(u32),
+
+    /// Expected hexadecimal Unicode code point
+    #[error("Expected hexadecimal Unicode code point.")]
+    ExpectedCodePoint,
+
+    /// `DO REPEAT` nested too deeply.
+    #[error("`DO REPEAT` nested too deeply.")]
+    DoRepeatOverflow,
+
+    /// Unexpected character.
+    #[error("Unexpected character {0:?} in input.")]
+    UnexpectedChar(char),
+}
+
+impl Token {
+    pub fn try_from_segment((segment, s): (Segment, &str)) -> Result<Option<Self>, TokenError> {
+        match segment {
+            Segment::Number => Ok(Some(Self::Number(s.parse().unwrap()))),
+            Segment::QuotedString => {
+                // Trim quote mark from front and back.
+                let mut chars = s.chars();
+                let quote = chars.next().unwrap();
+                let s = chars.as_str().strip_suffix(quote).unwrap();
+
+                // Replace doubled quotes by single ones.
+                let (single_quote, double_quote) = match quote {
+                    '\'' => ("'", "''"),
+                    '"' => ("\"", "\"\""),
+                    _ => unreachable!(),
+                };
+                Ok(Some(Self::String(s.replace(double_quote, single_quote))))
+            }
+            Segment::HexString => {
+                // Strip `X"` prefix and `"` suffix (or variations).
+                let s = &s[2..s.len() - 1];
+                for c in s.chars() {
+                    if !c.is_ascii_hexdigit() {
+                        return Err(TokenError::BadHexDigit(c))
+                    }
+                }
+                if s.len() % 2 != 0 {
+                    return Err(TokenError::OddLengthHexString(s.len()))
+                }
+                let mut out = String::with_capacity(s.len());
+                for pair in s.as_bytes().chunks_exact(2) {
+                    let hi = char::from(pair[0]).to_digit(16).unwrap() as u8;
+                    let lo = char::from(pair[1]).to_digit(16).unwrap() as u8;
+                    out.push(char::from(hi * 16 + lo));
+                }
+                Ok(Some(Self::String(out)))
+            }
+            Segment::UnicodeString => {
+                // Strip `U"` prefix and `"` suffix (or variations).
+                let s = &s[2..s.len() - 1];
+                if !(1..=8).contains(&s.len()) {
+                    return Err(TokenError::BadLengthUnicodeString(s.len()));
+                }
+                let Ok(code_point) = u32::from_str_radix(s, 16) else {
+                    return Err(TokenError::ExpectedCodePoint);
+                };
+                let Some(c) = char::from_u32(code_point) else {
+                    return Err(TokenError::BadCodePoint(code_point));
+                };
+                Ok(Some(Self::String(String::from(c))))
+            }
+
+            Segment::UnquotedString
+            | Segment::DoRepeatCommand
+            | Segment::InlineData
+            | Segment::Document
+            | Segment::MacroBody
+            | Segment::MacroName => Ok(Some(Self::String(String::from(s)))),
+
+            Segment::ReservedWord => {
+                let c0 = s.as_bytes()[0].to_ascii_uppercase();
+                let c1 = s.as_bytes()[1].to_ascii_uppercase();
+                match (c0, c1) {
+                    (b'B', _) => Ok(Some(Self::Punct(Punct::By))),
+                    (b'E', _) => Ok(Some(Self::Punct(Punct::Eq))),
+                    (b'G', b'T') => Ok(Some(Self::Punct(Punct::Gt))),
+                    (b'G', _) => Ok(Some(Self::Punct(Punct::Ge))),
+                    (b'L', b'T') => Ok(Some(Self::Punct(Punct::Lt))),
+                    (b'L', _) => Ok(Some(Self::Punct(Punct::Le))),
+                    (b'N', b'E') => Ok(Some(Self::Punct(Punct::Ne))),
+                    (b'N', _) => Ok(Some(Self::Punct(Punct::Not))),
+                    (b'O', _) => Ok(Some(Self::Punct(Punct::Or))),
+                    (b'T', _) => Ok(Some(Self::Punct(Punct::To))),
+                    (b'A', b'L') => Ok(Some(Self::Punct(Punct::All))),
+                    (b'A', _) => Ok(Some(Self::Punct(Punct::And))),
+                    (b'W', _) => Ok(Some(Self::Punct(Punct::With))),
+                    _ => unreachable!(),
+                }
+            }
+            Segment::Identifier => Ok(Some(Self::Id(String::from(s)))),
+            Segment::Punct => match s {
+                "(" => Ok(Some(Self::Punct(Punct::LParen))),
+                ")" => Ok(Some(Self::Punct(Punct::RParen))),
+                "[" => Ok(Some(Self::Punct(Punct::LSquare))),
+                "]" => Ok(Some(Self::Punct(Punct::RSquare))),
+                "{" => Ok(Some(Self::Punct(Punct::LCurly))),
+                "}" => Ok(Some(Self::Punct(Punct::RCurly))),
+                "," => Ok(Some(Self::Punct(Punct::Comma))),
+                "=" => Ok(Some(Self::Punct(Punct::Equals))),
+                "-" => Ok(Some(Self::Punct(Punct::Dash))),
+                "&" => Ok(Some(Self::Punct(Punct::And))),
+                "|" => Ok(Some(Self::Punct(Punct::Or))),
+                "+" => Ok(Some(Self::Punct(Punct::Plus))),
+                "/" => Ok(Some(Self::Punct(Punct::Slash))),
+                "*" => Ok(Some(Self::Punct(Punct::Asterisk))),
+                "<" => Ok(Some(Self::Punct(Punct::Lt))),
+                ">" => Ok(Some(Self::Punct(Punct::Gt))),
+                "~" => Ok(Some(Self::Punct(Punct::Not))),
+                ":" => Ok(Some(Self::Punct(Punct::Colon))),
+                ";" => Ok(Some(Self::Punct(Punct::Semicolon))),
+                "**" => Ok(Some(Self::Punct(Punct::Exp))),
+                "<=" => Ok(Some(Self::Punct(Punct::Le))),
+                "<>" => Ok(Some(Self::Punct(Punct::Ne))),
+                "~=" => Ok(Some(Self::Punct(Punct::Ne))),
+                ">=" => Ok(Some(Self::Punct(Punct::Ge))),
+                "!" => Ok(Some(Self::MacroToken(MacroToken::Bang))),
+                "%" => Ok(Some(Self::MacroToken(MacroToken::Percent))),
+                "?" => Ok(Some(Self::MacroToken(MacroToken::Question))),
+                "`" => Ok(Some(Self::MacroToken(MacroToken::Backtick))),
+                "_" => Ok(Some(Self::MacroToken(MacroToken::Underscore))),
+                _ => unreachable!(),
+            },
+            Segment::Shbang
+            | Segment::Spaces
+            | Segment::Comment
+            | Segment::Newline
+            | Segment::CommentCommand => Ok(None),
+            Segment::DoRepeatOverflow => Err(TokenError::DoRepeatOverflow),
+            Segment::MacroId => Ok(Some(Self::MacroToken(MacroToken::MacroId(String::from(s))))),
+            Segment::StartDocument => Ok(Some(Self::Id(String::from("DOCUMENT")))),
+            Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
+                Ok(Some(Self::EndCommand))
+            }
+            Segment::End => Ok(Some(Self::End)),
+            Segment::ExpectedQuote => Err(TokenError::ExpectedQuote),
+            Segment::ExpectedExponent => Err(TokenError::ExpectedExponent(String::from(s))),
+            Segment::UnexpectedChar => Err(TokenError::UnexpectedChar(s.chars().next().unwrap())),
+        }
+    }
+}
index e4fe405d47c340963666604f3c43a4059fd2a456..e0ab8f872ead36f7826bbf274cd4e1b6695e1d0a 100644 (file)
@@ -222,12 +222,6 @@ scan_punct2__ (char c0, char c1)
 
     case '~':
       return T_NE;
-
-    case '&':
-      return T_AND;
-
-    case '|':
-      return T_OR;
     }
 
   NOT_REACHED ();