From: Ben Pfaff Date: Sat, 13 Jul 2024 22:18:57 +0000 (-0700) Subject: work on scan X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d552c4355f774b1047fccf490dfe3a62d7310331;p=pspp work on scan --- diff --git a/rust/src/lex/mod.rs b/rust/src/lex/mod.rs index 2047837df4..732cf3a09c 100644 --- a/rust/src/lex/mod.rs +++ b/rust/src/lex/mod.rs @@ -1,2 +1,16 @@ +//! PSPP syntax scanning. +//! +//! PSPP divides traditional "lexical analysis" or "tokenization" into two +//! phases: a lower-level phase called "segmentation" and a higher-level phase +//! called "scanning". [super::segment] implements the segmentation phase and +//! this module the scanning phase. +//! +//! Scanning accepts as input a stream of segments, which are UTF-8 strings each +//! labeled with a segment type. It outputs a stream of "scan tokens", which +//! are the same as the tokens used by the PSPP parser with a few additional +//! types. + pub mod segment; +pub mod scan; pub mod command_name; +pub mod token; diff --git a/rust/src/lex/scan/mod.rs b/rust/src/lex/scan/mod.rs new file mode 100644 index 0000000000..343bde8ca2 --- /dev/null +++ b/rust/src/lex/scan/mod.rs @@ -0,0 +1,12 @@ +//! PSPP lexical analysis. +//! +//! PSPP divides traditional "lexical analysis" or "tokenization" into two +//! phases: a lower-level phase called "segmentation" and a higher-level phase +//! called "scanning". [segment] implements the segmentation phase and [scan] +//! the scanning phase. +//! +//! Scanning accepts as input a stream of segments, which are UTF-8 strings each +//! labeled with a segment type. It outputs a stream of "scan tokens", which +//! are the same as the tokens used by the PSPP parser with a few additional +//! types. + diff --git a/rust/src/lex/segment/mod.rs b/rust/src/lex/segment/mod.rs index 6bf30ba2e5..401d523825 100644 --- a/rust/src/lex/segment/mod.rs +++ b/rust/src/lex/segment/mod.rs @@ -56,7 +56,7 @@ pub enum Mode { /// The type of a segment. #[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum Type { +pub enum Segment { Number, QuotedString, HexString, @@ -214,10 +214,14 @@ impl Segmenter { /// consumed, must not be provided with *different* values on subsequent /// calls. This is because the function must often make decisions based on /// looking ahead beyond the bytes that it consumes. - pub fn push<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> { + pub fn push<'a>( + &mut self, + input: &'a str, + eof: bool, + ) -> Result<(&'a str, Segment), Incomplete> { if input.is_empty() { if eof { - return Ok((input, Type::End)); + return Ok((input, Segment::End)); } else { return Err(Incomplete); }; @@ -443,12 +447,12 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { if let (Some('#'), rest) = take(input, eof)? { if let (Some('!'), rest) = take(rest, eof)? { let rest = self.parse_full_line(rest, eof)?; self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((rest, Type::Shbang)); + return Ok((rest, Segment::Shbang)); } } @@ -469,7 +473,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { debug_assert_eq!(self.state.0, State::General); debug_assert!(self.start_of_line()); debug_assert!(!input.is_empty()); @@ -481,16 +485,16 @@ impl Segmenter { '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => { // This `+` is punctuation that may separate pieces of a string. self.state = (State::General, Substate::empty()); - return Ok((rest, Type::Punct)); + return Ok((rest, Segment::Punct)); } '+' | '-' | '.' => { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((rest, Type::StartCommand)); + return Ok((rest, Segment::StartCommand)); } _ if c.is_whitespace() => { if at_end_of_line(input, eof)? { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Type::SeparateCommands)); + return Ok((input, Segment::SeparateCommands)); } } _ => { @@ -498,7 +502,7 @@ impl Segmenter { && !self.state.1.contains(Substate::START_OF_COMMAND) { self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Type::StartCommand)); + return Ok((input, Segment::StartCommand)); } } } @@ -509,7 +513,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { debug_assert!(self.state.0 == State::General); debug_assert!(!self.state.1.contains(Substate::START_OF_LINE)); let (Some(c), rest) = take(input, eof)? else { @@ -520,16 +524,16 @@ impl Segmenter { self.state.1 |= Substate::START_OF_LINE; Ok(( self.parse_newline(input, eof).unwrap().unwrap(), - Type::Newline, + Segment::Newline, )) } '/' => { if let (Some('*'), rest) = take(rest, eof)? { let rest = skip_comment(rest, eof)?; - return Ok((rest, Type::Comment)); + return Ok((rest, Segment::Comment)); } else { self.state.1 = Substate::empty(); - return Ok((rest, Type::Punct)); + return Ok((rest, Segment::Punct)); } } '-' => { @@ -548,11 +552,11 @@ impl Segmenter { None | Some(_) => (), } self.state.1 = Substate::empty(); - return Ok((rest, Type::Punct)); + return Ok((rest, Segment::Punct)); } '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => { self.state.1 = Substate::empty(); - return Ok((rest, Type::Punct)); + return Ok((rest, Segment::Punct)); } '*' => { if self.state.1.contains(Substate::START_OF_COMMAND) { @@ -567,43 +571,43 @@ impl Segmenter { '~' => self.parse_digraph(&['='], rest, eof), '.' if at_end_of_line(rest, eof)? => { self.state.1 = Substate::START_OF_COMMAND; - Ok((rest, Type::EndCommand)) + Ok((rest, Segment::EndCommand)) } '.' => match take(rest, eof)? { (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof), - _ => Ok((rest, Type::Punct)), + _ => Ok((rest, Segment::Punct)), }, '0'..='9' => self.parse_number(input, eof), - 'u' | 'U' => self.maybe_parse_string(Type::UnicodeString, (input, rest), eof), - 'x' | 'X' => self.maybe_parse_string(Type::HexString, (input, rest), eof), - '\'' | '"' => self.parse_string(Type::QuotedString, c, rest, eof), + 'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof), + 'x' | 'X' => self.maybe_parse_string(Segment::HexString, (input, rest), eof), + '\'' | '"' => self.parse_string(Segment::QuotedString, c, rest, eof), '!' => { let (c, rest2) = take(rest, eof)?; match c { - Some('*') => Ok((rest2, Type::MacroId)), + Some('*') => Ok((rest2, Segment::MacroId)), Some(_) => self.parse_id(input, eof), - None => Ok((rest, Type::Punct)), + None => Ok((rest, Segment::Punct)), } } - c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Type::Spaces)), + c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Segment::Spaces)), c if c.may_start_id() => self.parse_id(input, eof), '!'..='~' if c != '\\' && c != '^' => { self.state.1 = Substate::empty(); - Ok((rest, Type::Punct)) + Ok((rest, Segment::Punct)) } _ => { self.state.1 = Substate::empty(); - Ok((rest, Type::UnexpectedChar)) + Ok((rest, Segment::UnexpectedChar)) } } } fn parse_string<'a>( &mut self, - type_: Type, + segment: Segment, quote: char, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { println!("{quote:?} {input:?}"); while let (Some(c), rest) = take(input, eof)? { match c { @@ -611,7 +615,7 @@ impl Segmenter { let (c, rest2) = take(rest, eof)?; if c != Some(quote) { self.state.1 = Substate::empty(); - return Ok((rest, type_)); + return Ok((rest, segment)); } input = rest2; } @@ -620,16 +624,16 @@ impl Segmenter { } } self.state.1 = Substate::empty(); - Ok((input, Type::ExpectedQuote)) + Ok((input, Segment::ExpectedQuote)) } fn maybe_parse_string<'a>( &mut self, - type_: Type, + segment: Segment, input: (&'a str, &'a str), eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { match take(input.1, eof)? { - (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(type_, c, rest, eof), + (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof), _ => self.parse_id(input.0, eof), } } @@ -640,40 +644,44 @@ impl Segmenter { ) -> Result<(&'a str, &'a str), Incomplete> { let mut sub = Segmenter::new(self.mode, true); loop { - let (rest, type_) = sub.push(input, eof)?; - match type_ { - Type::Shbang | Type::Spaces | Type::Comment | Type::Newline => (), + let (rest, segment) = sub.push(input, eof)?; + match segment { + Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (), - Type::Identifier => return Ok((&input[..input.len() - rest.len()], rest)), + Segment::Identifier => return Ok((&input[..input.len() - rest.len()], rest)), - Type::Number - | Type::QuotedString - | Type::HexString - | Type::UnicodeString - | Type::UnquotedString - | Type::ReservedWord - | Type::Punct - | Type::CommentCommand - | Type::DoRepeatCommand - | Type::DoRepeatOverflow - | Type::InlineData - | Type::MacroId - | Type::MacroName - | Type::MacroBody - | Type::StartDocument - | Type::Document - | Type::StartCommand - | Type::SeparateCommands - | Type::EndCommand - | Type::End - | Type::ExpectedQuote - | Type::ExpectedExponent - | Type::UnexpectedChar => return Ok(("", rest)), + Segment::Number + | Segment::QuotedString + | Segment::HexString + | Segment::UnicodeString + | Segment::UnquotedString + | Segment::ReservedWord + | Segment::Punct + | Segment::CommentCommand + | Segment::DoRepeatCommand + | Segment::DoRepeatOverflow + | Segment::InlineData + | Segment::MacroId + | Segment::MacroName + | Segment::MacroBody + | Segment::StartDocument + | Segment::Document + | Segment::StartCommand + | Segment::SeparateCommands + | Segment::EndCommand + | Segment::End + | Segment::ExpectedQuote + | Segment::ExpectedExponent + | Segment::UnexpectedChar => return Ok(("", rest)), } input = rest; } } - fn parse_id<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> { + fn parse_id<'a>( + &mut self, + input: &'a str, + eof: bool, + ) -> Result<(&'a str, Segment), Incomplete> { let (Some(_), mut end) = take(input, eof).unwrap() else { unreachable!() }; @@ -696,18 +704,18 @@ impl Segmenter { return self.parse_comment_1(input, eof); } else if id_match("DOCUMENT", identifier) { self.state.0 = State::Document1; - return Ok((input, Type::StartDocument)); + return Ok((input, Segment::StartDocument)); } else if id_match_n("DEFINE", identifier, 6) { self.state.0 = State::Define1; } else if id_match("FILE", identifier) { if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) { self.state = (State::FileLabel1, Substate::empty()); - return Ok((rest, Type::Identifier)); + return Ok((rest, Segment::Identifier)); } } else if id_match("DO", identifier) { if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) { self.state = (State::DoRepeat1, Substate::empty()); - return Ok((rest, Type::Identifier)); + return Ok((rest, Segment::Identifier)); } } else if id_match("BEGIN", identifier) { let (next_id, rest2) = self.next_id_in_command(rest, eof)?; @@ -728,28 +736,28 @@ impl Segmenter { }, Substate::empty(), ); - return Ok((rest, Type::Identifier)); + return Ok((rest, Segment::Identifier)); } } } } self.state.1 = Substate::empty(); - let type_ = if is_reserved_word(identifier) { - Type::ReservedWord + let segment = if is_reserved_word(identifier) { + Segment::ReservedWord } else if identifier.starts_with('!') { - Type::MacroId + Segment::MacroId } else { - Type::Identifier + Segment::Identifier }; - Ok((rest, type_)) + Ok((rest, segment)) } fn parse_digraph<'a>( &mut self, seconds: &[char], input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let (c, rest) = take(input, eof)?; self.state.1 = Substate::empty(); Ok(( @@ -757,14 +765,14 @@ impl Segmenter { Some(c) if seconds.contains(&c) => rest, _ => input, }, - Type::Punct, + Segment::Punct, )) } fn parse_number<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let mut input = skip_digits(input, eof)?; if let Some(rest) = match_char(|c| c == '.', input, eof)? { let rest2 = skip_digits(rest, eof)?; @@ -777,18 +785,18 @@ impl Segmenter { let rest2 = skip_digits(rest, eof)?; if rest2.len() == rest.len() { self.state.1 = Substate::empty(); - return Ok((rest, Type::ExpectedExponent)); + return Ok((rest, Segment::ExpectedExponent)); } input = rest2; } self.state.1 = Substate::empty(); - Ok((input, Type::Number)) + Ok((input, Segment::Number)) } fn parse_comment_1<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { enum CommentState<'a> { Blank, NotBlank, @@ -799,7 +807,7 @@ impl Segmenter { let (Some(c), rest) = take(input, eof)? else { // End of file. self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Type::SeparateCommands)); + return Ok((input, Segment::SeparateCommands)); }; match c { '.' => state = CommentState::Period(input), @@ -808,17 +816,17 @@ impl Segmenter { CommentState::Blank => { // Blank line ends comment command. self.state = (State::General, Substate::START_OF_COMMAND); - return Ok((input, Type::SeparateCommands)); + return Ok((input, Segment::SeparateCommands)); } CommentState::Period(period) => { // '.' at end of line ends comment command. self.state = (State::General, Substate::empty()); - return Ok((period, Type::CommentCommand)); + return Ok((period, Segment::CommentCommand)); } CommentState::NotBlank => { // Comment continues onto next line. self.state = (State::Comment2, Substate::empty()); - return Ok((input, Type::CommentCommand)); + return Ok((input, Segment::CommentCommand)); } } } @@ -832,7 +840,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); let new_command = match take(rest, eof)?.0 { @@ -848,18 +856,18 @@ impl Segmenter { } else { self.state.0 = State::Comment1; } - Ok((rest, Type::Newline)) + Ok((rest, Segment::Newline)) } fn parse_document_1<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let mut end_cmd = false; loop { let (Some(c), rest) = take(input, eof)? else { self.state.0 = State::Document3; - return Ok((input, Type::Document)); + return Ok((input, Segment::Document)); }; match c { '.' => end_cmd = true, @@ -869,7 +877,7 @@ impl Segmenter { } else { State::Document2 }; - return Ok((input, Type::Document)); + return Ok((input, Segment::Document)); } c if !c.is_whitespace() => end_cmd = false, _ => (), @@ -881,21 +889,21 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state.0 = State::Document1; - Ok((rest, Type::Newline)) + Ok((rest, Segment::Newline)) } fn parse_document_3<'a>( &mut self, input: &'a str, _eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { self.state = ( State::General, Substate::START_OF_COMMAND | Substate::START_OF_LINE, ); - Ok((input, Type::EndCommand)) + Ok((input, Segment::EndCommand)) } fn quoted_file_label(input: &str, eof: bool) -> Result { let input = skip_spaces_and_comments(input, eof)?; @@ -908,13 +916,13 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let mut sub = Segmenter { state: (State::General, self.state.1), ..*self }; - let (rest, type_) = sub.push(input, eof)?; - if type_ == Type::Identifier { + let (rest, segment) = sub.push(input, eof)?; + if segment == Segment::Identifier { let id = &input[..input.len() - rest.len()]; debug_assert!(id_match("LABEL", id), "{id} should be LABEL"); if Self::quoted_file_label(rest, eof)? { @@ -925,29 +933,29 @@ impl Segmenter { } else { self.state.1 = sub.state.1; } - Ok((rest, type_)) + Ok((rest, segment)) } fn parse_file_label_2<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let input = skip_spaces(input, eof)?; self.state.0 = State::FileLabel3; - Ok((input, Type::Spaces)) + Ok((input, Segment::Spaces)) } fn parse_file_label_3<'a>( &mut self, mut input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let mut end_cmd = None; loop { let (c, rest) = take(input, eof)?; match c { None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => { self.state = (State::General, Substate::empty()); - return Ok((end_cmd.unwrap_or(input), Type::UnquotedString)); + return Ok((end_cmd.unwrap_or(input), Segment::UnquotedString)); } None => unreachable!(), Some('.') => end_cmd = Some(input), @@ -957,7 +965,11 @@ impl Segmenter { input = rest; } } - fn subparse<'a>(&mut self, input: &'a str, eof: bool) -> Result<(&'a str, Type), Incomplete> { + fn subparse<'a>( + &mut self, + input: &'a str, + eof: bool, + ) -> Result<(&'a str, Segment), Incomplete> { let mut sub = Segmenter { mode: self.mode, state: (State::General, self.state.1), @@ -974,17 +986,17 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - if type_ == Type::SeparateCommands { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + if segment == Segment::SeparateCommands { // We reached a blank line that separates the head from the body. self.state.0 = State::DoRepeat2; - } else if type_ == Type::EndCommand || type_ == Type::StartCommand { + } else if segment == Segment::EndCommand || segment == Segment::StartCommand { // We reached the body. self.state.0 = State::DoRepeat3; self.nest = 1; } - Ok((rest, type_)) + Ok((rest, segment)) } /// We are segmenting a `DO REPEAT` command, currently reading a blank line /// that separates the head from the body. @@ -992,14 +1004,14 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - if type_ == Type::Newline { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + if segment == Segment::Newline { // We reached the body. self.state.0 = State::DoRepeat3; self.nest = 1; } - Ok((rest, type_)) + Ok((rest, segment)) } fn parse_newline<'a>( &mut self, @@ -1057,9 +1069,9 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { if let Some(rest) = self.parse_newline(input, eof)? { - return Ok((rest, Type::Newline)); + return Ok((rest, Segment::Newline)); } let rest = self.parse_full_line(input, eof)?; let direction = self.check_repeat_command(input, eof)?; @@ -1081,11 +1093,11 @@ impl Segmenter { return self.push(input, eof); } } - return Ok((rest, Type::DoRepeatCommand)); + return Ok((rest, Segment::DoRepeatCommand)); } - fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Type), Incomplete> { + fn parse_do_repeat_4<'a>(&mut self, input: &'a str) -> Result<(&'a str, Segment), Incomplete> { self.state.0 = State::DoRepeat3; - Ok((input, Type::DoRepeatOverflow)) + Ok((input, Segment::DoRepeatOverflow)) } /// We are segmenting a `DEFINE` command, which consists of: /// @@ -1109,44 +1121,44 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - match type_ { - Type::Identifier | Type::MacroId if self.state.0 == State::Define1 => { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + match segment { + Segment::Identifier | Segment::MacroId if self.state.0 == State::Define1 => { self.state.0 = State::Define2; - return Ok((rest, Type::MacroName)); + return Ok((rest, Segment::MacroName)); } - Type::SeparateCommands | Type::EndCommand | Type::StartCommand => { + Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => { // The DEFINE command is malformed because we reached its end // without ever hitting a `(` token. Transition back to general // parsing. self.state.0 = State::General; } - Type::Punct if input.starts_with('(') => { + Segment::Punct if input.starts_with('(') => { self.state.0 = State::Define3; self.nest = 1; } _ => (), } - Ok((rest, type_)) + Ok((rest, segment)) } fn parse_define_3<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - match type_ { - Type::SeparateCommands | Type::EndCommand | Type::StartCommand => { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + match segment { + Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => { // The DEFINE command is malformed because we reached its end // without ever hitting a `(` token. Transition back to general // parsing. self.state.0 = State::General; } - Type::Punct if input.starts_with('(') => { + Segment::Punct if input.starts_with('(') => { self.nest += 1; } - Type::Punct if input.starts_with(')') => { + Segment::Punct if input.starts_with(')') => { self.nest -= 1; if self.nest == 0 { self.state = (State::Define4, Substate::empty()); @@ -1154,7 +1166,7 @@ impl Segmenter { } _ => (), } - Ok((rest, type_)) + Ok((rest, segment)) } fn find_enddefine<'a>(mut input: &'a str) -> Option<&'a str> { loop { @@ -1185,7 +1197,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_full_line(input, eof)?; let line = &input[..input.len() - rest.len()]; if let Some(end) = Self::find_enddefine(line) { @@ -1197,10 +1209,10 @@ impl Segmenter { self.push(input, eof) } else if prefix.trim_start().is_empty() { // Line starts with spaces followed by `!ENDDEFINE`. - Ok((rest, Type::Spaces)) + Ok((rest, Segment::Spaces)) } else { // Line starts with some content followed by `!ENDDEFINE`. - Ok((rest, Type::MacroBody)) + Ok((rest, Segment::MacroBody)) } } else { // No `!ENDDEFINE`. We have a full line of macro body. @@ -1211,48 +1223,48 @@ impl Segmenter { // // However, if it's a later line, we need to report it because blank // lines can have significance. - let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() { + let segment = if self.state.0 == State::Define4 && line.trim_start().is_empty() { if line.is_empty() { return self.parse_define_6(input, eof); } - Type::Spaces + Segment::Spaces } else { - Type::MacroBody + Segment::MacroBody }; self.state.0 = State::Define6; - Ok((rest, type_)) + Ok((rest, segment)) } } fn parse_define_6<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state.0 = State::Define5; - Ok((rest, Type::Newline)) + Ok((rest, Segment::Newline)) } fn parse_begin_data_1<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - if type_ == Type::Newline { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + if segment == Segment::Newline { self.state.0 = State::BeginData2; } - Ok((rest, type_)) + Ok((rest, segment)) } fn parse_begin_data_2<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { - let (rest, type_) = self.subparse(input, eof)?; - if type_ == Type::Newline { + ) -> Result<(&'a str, Segment), Incomplete> { + let (rest, segment) = self.subparse(input, eof)?; + if segment == Segment::Newline { self.state.0 = State::BeginData3; } - Ok((rest, type_)) + Ok((rest, segment)) } fn is_end_data(line: &str) -> bool { let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else { @@ -1283,7 +1295,7 @@ impl Segmenter { &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_full_line(input, eof)?; let line = &input[..input.len() - rest.len()]; if Self::is_end_data(line) { @@ -1294,17 +1306,17 @@ impl Segmenter { self.push(input, eof) } else { self.state.0 = State::BeginData4; - Ok((rest, Type::InlineData)) + Ok((rest, Segment::InlineData)) } } fn parse_begin_data_4<'a>( &mut self, input: &'a str, eof: bool, - ) -> Result<(&'a str, Type), Incomplete> { + ) -> Result<(&'a str, Segment), Incomplete> { let rest = self.parse_newline(input, eof)?.unwrap(); self.state.0 = State::BeginData3; - Ok((rest, Type::Newline)) + Ok((rest, Segment::Newline)) } } diff --git a/rust/src/lex/segment/test.rs b/rust/src/lex/segment/test.rs index d24523f56e..d01a80d779 100644 --- a/rust/src/lex/segment/test.rs +++ b/rust/src/lex/segment/test.rs @@ -1,12 +1,12 @@ use crate::prompt::PromptStyle; -use super::{Mode, Segmenter, Type}; +use super::{Mode, Segmenter, Segment}; -fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Type) { +fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Segment) { if one_byte { for len in input.char_indices().map(|(pos, _c)| pos) { - if let Ok((rest, type_)) = segmenter.push(&input[..len], false) { - return (&input[len - rest.len()..], type_); + if let Ok((rest, segment)) = segmenter.push(&input[..len], false) { + return (&input[len - rest.len()..], segment); } } } @@ -16,7 +16,7 @@ fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) - fn _check_segmentation( mut input: &str, mode: Mode, - expect_segments: &[(Type, &str)], + expect_segments: &[(Segment, &str)], expect_prompts: &[PromptStyle], one_byte: bool, ) { @@ -24,13 +24,13 @@ fn _check_segmentation( let mut prompts = Vec::new(); let mut segmenter = Segmenter::new(mode, false); loop { - let (rest, type_) = push_segment(&mut segmenter, input, one_byte); + let (rest, segment) = push_segment(&mut segmenter, input, one_byte); let len = input.len() - rest.len(); let token = &input[..len]; - segments.push((type_, token)); - match type_ { - Type::End => break, - Type::Newline => prompts.push(segmenter.prompt()), + segments.push((segment, token)); + match segment { + Segment::End => break, + Segment::Newline => prompts.push(segmenter.prompt()), _ => (), } input = rest; @@ -66,7 +66,7 @@ fn _check_segmentation( fn check_segmentation( input: &str, mode: Mode, - expect_segments: &[(Type, &str)], + expect_segments: &[(Segment, &str)], expect_prompts: &[PromptStyle], ) { for (one_byte, one_byte_name) in [(false, "full-string"), (true, "byte-by-byte")] { @@ -79,9 +79,9 @@ fn check_segmentation( mode, &expect_segments .iter() - .map(|(type_, s)| match *type_ { - Type::Newline => (Type::Newline, "\r\n"), - _ => (*type_, *s), + .map(|(segment, s)| match *segment { + Segment::Newline => (Segment::Newline, "\r\n"), + _ => (*segment, *s), }) .collect::>(), expect_prompts, @@ -91,13 +91,13 @@ fn check_segmentation( if let Some(input) = input.strip_suffix('\n') { println!("running {one_byte_name} segmentation test without final newline..."); let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect(); - assert_eq!(expect_segments.pop(), Some((Type::End, ""))); - assert_eq!(expect_segments.pop(), Some((Type::Newline, "\n"))); - while let Some((Type::SeparateCommands | Type::EndCommand, "")) = expect_segments.last() + assert_eq!(expect_segments.pop(), Some((Segment::End, ""))); + assert_eq!(expect_segments.pop(), Some((Segment::Newline, "\n"))); + while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) = expect_segments.last() { expect_segments.pop(); } - expect_segments.push((Type::End, "")); + expect_segments.push((Segment::End, "")); _check_segmentation( input, mode, @@ -109,16 +109,17 @@ fn check_segmentation( } } +#[allow(dead_code)] fn print_segmentation(mut input: &str) { let mut segmenter = Segmenter::new(Mode::Auto, false); loop { - let (rest, type_) = segmenter.push(input, true).unwrap(); + let (rest, segment) = segmenter.push(input, true).unwrap(); let len = input.len() - rest.len(); let token = &input[..len]; - print!("{type_:?} {token:?}"); - match type_ { - Type::Newline => print!(" ({:?})", segmenter.prompt()), - Type::End => break, + print!("{segment:?} {token:?}"); + match segment { + Segment::Newline => print!(" ({:?})", segmenter.prompt()), + Segment::End => break, _ => (), } println!(); @@ -143,93 +144,93 @@ GhIjK "#, Mode::Auto, &[ - (Type::Identifier, "a"), - (Type::Spaces, " "), - (Type::Identifier, "ab"), - (Type::Spaces, " "), - (Type::Identifier, "abc"), - (Type::Spaces, " "), - (Type::Identifier, "abcd"), - (Type::Spaces, " "), - (Type::MacroId, "!abcd"), - (Type::Newline, "\n"), - (Type::Identifier, "A"), - (Type::Spaces, " "), - (Type::Identifier, "AB"), - (Type::Spaces, " "), - (Type::Identifier, "ABC"), - (Type::Spaces, " "), - (Type::Identifier, "ABCD"), - (Type::Spaces, " "), - (Type::MacroId, "!ABCD"), - (Type::Newline, "\n"), - (Type::Identifier, "aB"), - (Type::Spaces, " "), - (Type::Identifier, "aBC"), - (Type::Spaces, " "), - (Type::Identifier, "aBcD"), - (Type::Spaces, " "), - (Type::MacroId, "!aBcD"), - (Type::Newline, "\n"), - (Type::Identifier, "$x"), - (Type::Spaces, " "), - (Type::Identifier, "$y"), - (Type::Spaces, " "), - (Type::Identifier, "$z"), - (Type::Spaces, " "), - (Type::MacroId, "!$z"), - (Type::Newline, "\n"), - (Type::Identifier, "grève"), - (Type::Spaces, "\u{00a0}"), - (Type::Identifier, "Ângstrom"), - (Type::Spaces, "\u{00a0}"), - (Type::Identifier, "poté"), - (Type::Newline, "\n"), - (Type::Identifier, "#a"), - (Type::Spaces, " "), - (Type::Identifier, "#b"), - (Type::Spaces, " "), - (Type::Identifier, "#c"), - (Type::Spaces, " "), - (Type::Identifier, "##"), - (Type::Spaces, " "), - (Type::Identifier, "#d"), - (Type::Spaces, " "), - (Type::MacroId, "!#d"), - (Type::Newline, "\n"), - (Type::Identifier, "@efg"), - (Type::Spaces, " "), - (Type::Identifier, "@"), - (Type::Spaces, " "), - (Type::Identifier, "@@."), - (Type::Spaces, " "), - (Type::Identifier, "@#@"), - (Type::Spaces, " "), - (Type::MacroId, "!@"), - (Type::Spaces, " "), - (Type::Newline, "\n"), - (Type::Identifier, "##"), - (Type::Spaces, " "), - (Type::Identifier, "#"), - (Type::Spaces, " "), - (Type::Identifier, "#12345"), - (Type::Spaces, " "), - (Type::Identifier, "#.#"), - (Type::Newline, "\n"), - (Type::Identifier, "f@#_.#6"), - (Type::Newline, "\n"), - (Type::Identifier, "GhIjK"), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::Identifier, "y"), - (Type::Spaces, " "), - (Type::Punct, "_"), - (Type::Identifier, "z"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "a"), + (Segment::Spaces, " "), + (Segment::Identifier, "ab"), + (Segment::Spaces, " "), + (Segment::Identifier, "abc"), + (Segment::Spaces, " "), + (Segment::Identifier, "abcd"), + (Segment::Spaces, " "), + (Segment::MacroId, "!abcd"), + (Segment::Newline, "\n"), + (Segment::Identifier, "A"), + (Segment::Spaces, " "), + (Segment::Identifier, "AB"), + (Segment::Spaces, " "), + (Segment::Identifier, "ABC"), + (Segment::Spaces, " "), + (Segment::Identifier, "ABCD"), + (Segment::Spaces, " "), + (Segment::MacroId, "!ABCD"), + (Segment::Newline, "\n"), + (Segment::Identifier, "aB"), + (Segment::Spaces, " "), + (Segment::Identifier, "aBC"), + (Segment::Spaces, " "), + (Segment::Identifier, "aBcD"), + (Segment::Spaces, " "), + (Segment::MacroId, "!aBcD"), + (Segment::Newline, "\n"), + (Segment::Identifier, "$x"), + (Segment::Spaces, " "), + (Segment::Identifier, "$y"), + (Segment::Spaces, " "), + (Segment::Identifier, "$z"), + (Segment::Spaces, " "), + (Segment::MacroId, "!$z"), + (Segment::Newline, "\n"), + (Segment::Identifier, "grève"), + (Segment::Spaces, "\u{00a0}"), + (Segment::Identifier, "Ângstrom"), + (Segment::Spaces, "\u{00a0}"), + (Segment::Identifier, "poté"), + (Segment::Newline, "\n"), + (Segment::Identifier, "#a"), + (Segment::Spaces, " "), + (Segment::Identifier, "#b"), + (Segment::Spaces, " "), + (Segment::Identifier, "#c"), + (Segment::Spaces, " "), + (Segment::Identifier, "##"), + (Segment::Spaces, " "), + (Segment::Identifier, "#d"), + (Segment::Spaces, " "), + (Segment::MacroId, "!#d"), + (Segment::Newline, "\n"), + (Segment::Identifier, "@efg"), + (Segment::Spaces, " "), + (Segment::Identifier, "@"), + (Segment::Spaces, " "), + (Segment::Identifier, "@@."), + (Segment::Spaces, " "), + (Segment::Identifier, "@#@"), + (Segment::Spaces, " "), + (Segment::MacroId, "!@"), + (Segment::Spaces, " "), + (Segment::Newline, "\n"), + (Segment::Identifier, "##"), + (Segment::Spaces, " "), + (Segment::Identifier, "#"), + (Segment::Spaces, " "), + (Segment::Identifier, "#12345"), + (Segment::Spaces, " "), + (Segment::Identifier, "#.#"), + (Segment::Newline, "\n"), + (Segment::Identifier, "f@#_.#6"), + (Segment::Newline, "\n"), + (Segment::Identifier, "GhIjK"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::Identifier, "y"), + (Segment::Spaces, " "), + (Segment::Punct, "_"), + (Segment::Identifier, "z"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -270,82 +271,82 @@ WxYz./* unterminated end of line comment "#, Mode::Auto, &[ - (Type::Identifier, "abcd."), - (Type::Spaces, " "), - (Type::Identifier, "abcd"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "ABCD."), - (Type::Spaces, " "), - (Type::Identifier, "ABCD"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "aBcD."), - (Type::Spaces, " "), - (Type::Identifier, "aBcD"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Newline, "\n"), - (Type::Identifier, "$y."), - (Type::Spaces, " "), - (Type::Identifier, "$z."), - (Type::Spaces, " "), - (Type::Identifier, "あいうえお"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "#c."), - (Type::Spaces, " "), - (Type::Identifier, "#d."), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "@@."), - (Type::Spaces, " "), - (Type::Identifier, "@@..."), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "#.#"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "#abcd"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Spaces, " "), - (Type::Newline, "\n"), - (Type::Identifier, "LMNOP"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Newline, "\n"), - (Type::Identifier, "QRSTUV"), - (Type::EndCommand, "."), - (Type::Comment, "/* end of line comment */"), - (Type::Newline, "\n"), - (Type::Identifier, "qrstuv"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, "/* end of line comment */"), - (Type::Newline, "\n"), - (Type::Identifier, "QrStUv"), - (Type::EndCommand, "."), - (Type::Comment, "/* end of line comment */"), - (Type::Spaces, " "), - (Type::Newline, "\n"), - (Type::Identifier, "wxyz"), - (Type::EndCommand, "."), - (Type::Comment, "/* unterminated end of line comment"), - (Type::Newline, "\n"), - (Type::Identifier, "WXYZ"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, "/* unterminated end of line comment"), - (Type::Newline, "\n"), - (Type::Identifier, "WxYz"), - (Type::EndCommand, "."), - (Type::Comment, "/* unterminated end of line comment "), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "abcd."), + (Segment::Spaces, " "), + (Segment::Identifier, "abcd"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "ABCD."), + (Segment::Spaces, " "), + (Segment::Identifier, "ABCD"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "aBcD."), + (Segment::Spaces, " "), + (Segment::Identifier, "aBcD"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Newline, "\n"), + (Segment::Identifier, "$y."), + (Segment::Spaces, " "), + (Segment::Identifier, "$z."), + (Segment::Spaces, " "), + (Segment::Identifier, "あいうえお"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "#c."), + (Segment::Spaces, " "), + (Segment::Identifier, "#d."), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "@@."), + (Segment::Spaces, " "), + (Segment::Identifier, "@@..."), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "#.#"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "#abcd"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Spaces, " "), + (Segment::Newline, "\n"), + (Segment::Identifier, "LMNOP"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Newline, "\n"), + (Segment::Identifier, "QRSTUV"), + (Segment::EndCommand, "."), + (Segment::Comment, "/* end of line comment */"), + (Segment::Newline, "\n"), + (Segment::Identifier, "qrstuv"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, "/* end of line comment */"), + (Segment::Newline, "\n"), + (Segment::Identifier, "QrStUv"), + (Segment::EndCommand, "."), + (Segment::Comment, "/* end of line comment */"), + (Segment::Spaces, " "), + (Segment::Newline, "\n"), + (Segment::Identifier, "wxyz"), + (Segment::EndCommand, "."), + (Segment::Comment, "/* unterminated end of line comment"), + (Segment::Newline, "\n"), + (Segment::Identifier, "WXYZ"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, "/* unterminated end of line comment"), + (Segment::Newline, "\n"), + (Segment::Identifier, "WxYz"), + (Segment::EndCommand, "."), + (Segment::Comment, "/* unterminated end of line comment "), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::First, @@ -379,90 +380,90 @@ and. with. "#, Mode::Auto, &[ - (Type::ReservedWord, "and"), - (Type::Spaces, " "), - (Type::ReservedWord, "or"), - (Type::Spaces, " "), - (Type::ReservedWord, "not"), - (Type::Spaces, " "), - (Type::ReservedWord, "eq"), - (Type::Spaces, " "), - (Type::ReservedWord, "ge"), - (Type::Spaces, " "), - (Type::ReservedWord, "gt"), - (Type::Spaces, " "), - (Type::ReservedWord, "le"), - (Type::Spaces, " "), - (Type::ReservedWord, "lt"), - (Type::Spaces, " "), - (Type::ReservedWord, "ne"), - (Type::Spaces, " "), - (Type::ReservedWord, "all"), - (Type::Spaces, " "), - (Type::ReservedWord, "by"), - (Type::Spaces, " "), - (Type::ReservedWord, "to"), - (Type::Spaces, " "), - (Type::ReservedWord, "with"), - (Type::Newline, "\n"), - (Type::ReservedWord, "AND"), - (Type::Spaces, " "), - (Type::ReservedWord, "OR"), - (Type::Spaces, " "), - (Type::ReservedWord, "NOT"), - (Type::Spaces, " "), - (Type::ReservedWord, "EQ"), - (Type::Spaces, " "), - (Type::ReservedWord, "GE"), - (Type::Spaces, " "), - (Type::ReservedWord, "GT"), - (Type::Spaces, " "), - (Type::ReservedWord, "LE"), - (Type::Spaces, " "), - (Type::ReservedWord, "LT"), - (Type::Spaces, " "), - (Type::ReservedWord, "NE"), - (Type::Spaces, " "), - (Type::ReservedWord, "ALL"), - (Type::Spaces, " "), - (Type::ReservedWord, "BY"), - (Type::Spaces, " "), - (Type::ReservedWord, "TO"), - (Type::Spaces, " "), - (Type::ReservedWord, "WITH"), - (Type::Newline, "\n"), - (Type::Identifier, "andx"), - (Type::Spaces, " "), - (Type::Identifier, "orx"), - (Type::Spaces, " "), - (Type::Identifier, "notx"), - (Type::Spaces, " "), - (Type::Identifier, "eqx"), - (Type::Spaces, " "), - (Type::Identifier, "gex"), - (Type::Spaces, " "), - (Type::Identifier, "gtx"), - (Type::Spaces, " "), - (Type::Identifier, "lex"), - (Type::Spaces, " "), - (Type::Identifier, "ltx"), - (Type::Spaces, " "), - (Type::Identifier, "nex"), - (Type::Spaces, " "), - (Type::Identifier, "allx"), - (Type::Spaces, " "), - (Type::Identifier, "byx"), - (Type::Spaces, " "), - (Type::Identifier, "tox"), - (Type::Spaces, " "), - (Type::Identifier, "withx"), - (Type::Newline, "\n"), - (Type::Identifier, "and."), - (Type::Spaces, " "), - (Type::ReservedWord, "with"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::ReservedWord, "and"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "or"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "not"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "eq"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "ge"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "gt"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "le"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "lt"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "ne"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "all"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "by"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "to"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "with"), + (Segment::Newline, "\n"), + (Segment::ReservedWord, "AND"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "OR"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "NOT"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "EQ"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "GE"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "GT"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "LE"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "LT"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "NE"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "ALL"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "BY"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "TO"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "WITH"), + (Segment::Newline, "\n"), + (Segment::Identifier, "andx"), + (Segment::Spaces, " "), + (Segment::Identifier, "orx"), + (Segment::Spaces, " "), + (Segment::Identifier, "notx"), + (Segment::Spaces, " "), + (Segment::Identifier, "eqx"), + (Segment::Spaces, " "), + (Segment::Identifier, "gex"), + (Segment::Spaces, " "), + (Segment::Identifier, "gtx"), + (Segment::Spaces, " "), + (Segment::Identifier, "lex"), + (Segment::Spaces, " "), + (Segment::Identifier, "ltx"), + (Segment::Spaces, " "), + (Segment::Identifier, "nex"), + (Segment::Spaces, " "), + (Segment::Identifier, "allx"), + (Segment::Spaces, " "), + (Segment::Identifier, "byx"), + (Segment::Spaces, " "), + (Segment::Identifier, "tox"), + (Segment::Spaces, " "), + (Segment::Identifier, "withx"), + (Segment::Newline, "\n"), + (Segment::Identifier, "and."), + (Segment::Spaces, " "), + (Segment::ReservedWord, "with"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -482,89 +483,89 @@ fn test_punctuation() { "#, Mode::Auto, &[ - (Type::Punct, "~"), - (Type::Spaces, " "), - (Type::Punct, "&"), - (Type::Spaces, " "), - (Type::Punct, "|"), - (Type::Spaces, " "), - (Type::Punct, "="), - (Type::Spaces, " "), - (Type::Punct, ">="), - (Type::Spaces, " "), - (Type::Punct, ">"), - (Type::Spaces, " "), - (Type::Punct, "<="), - (Type::Spaces, " "), - (Type::Punct, "<"), - (Type::Spaces, " "), - (Type::Punct, "~="), - (Type::Spaces, " "), - (Type::Punct, "<>"), - (Type::Spaces, " "), - (Type::Punct, "("), - (Type::Spaces, " "), - (Type::Punct, ")"), - (Type::Spaces, " "), - (Type::Punct, ","), - (Type::Spaces, " "), - (Type::Punct, "-"), - (Type::Spaces, " "), - (Type::Punct, "+"), - (Type::Spaces, " "), - (Type::Punct, "*"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Spaces, " "), - (Type::Punct, "["), - (Type::Spaces, " "), - (Type::Punct, "]"), - (Type::Spaces, " "), - (Type::Punct, "**"), - (Type::Newline, "\n"), - (Type::Punct, "~"), - (Type::Punct, "&"), - (Type::Punct, "|"), - (Type::Punct, "="), - (Type::Punct, ">="), - (Type::Punct, ">"), - (Type::Punct, "<="), - (Type::Punct, "<"), - (Type::Punct, "~="), - (Type::Punct, "<>"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Punct, ","), - (Type::Punct, "-"), - (Type::Punct, "+"), - (Type::Punct, "*"), - (Type::Punct, "/"), - (Type::Punct, "["), - (Type::Punct, "]"), - (Type::Punct, "**"), - (Type::MacroId, "!*"), - (Type::Newline, "\n"), - (Type::Punct, "%"), - (Type::Spaces, " "), - (Type::Punct, ":"), - (Type::Spaces, " "), - (Type::Punct, ";"), - (Type::Spaces, " "), - (Type::Punct, "?"), - (Type::Spaces, " "), - (Type::Punct, "_"), - (Type::Spaces, " "), - (Type::Punct, "`"), - (Type::Spaces, " "), - (Type::Punct, "{"), - (Type::Spaces, " "), - (Type::Punct, "}"), - (Type::Spaces, " "), - (Type::Punct, "~"), - (Type::Spaces, " "), - (Type::MacroId, "!*"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Punct, "~"), + (Segment::Spaces, " "), + (Segment::Punct, "&"), + (Segment::Spaces, " "), + (Segment::Punct, "|"), + (Segment::Spaces, " "), + (Segment::Punct, "="), + (Segment::Spaces, " "), + (Segment::Punct, ">="), + (Segment::Spaces, " "), + (Segment::Punct, ">"), + (Segment::Spaces, " "), + (Segment::Punct, "<="), + (Segment::Spaces, " "), + (Segment::Punct, "<"), + (Segment::Spaces, " "), + (Segment::Punct, "~="), + (Segment::Spaces, " "), + (Segment::Punct, "<>"), + (Segment::Spaces, " "), + (Segment::Punct, "("), + (Segment::Spaces, " "), + (Segment::Punct, ")"), + (Segment::Spaces, " "), + (Segment::Punct, ","), + (Segment::Spaces, " "), + (Segment::Punct, "-"), + (Segment::Spaces, " "), + (Segment::Punct, "+"), + (Segment::Spaces, " "), + (Segment::Punct, "*"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Spaces, " "), + (Segment::Punct, "["), + (Segment::Spaces, " "), + (Segment::Punct, "]"), + (Segment::Spaces, " "), + (Segment::Punct, "**"), + (Segment::Newline, "\n"), + (Segment::Punct, "~"), + (Segment::Punct, "&"), + (Segment::Punct, "|"), + (Segment::Punct, "="), + (Segment::Punct, ">="), + (Segment::Punct, ">"), + (Segment::Punct, "<="), + (Segment::Punct, "<"), + (Segment::Punct, "~="), + (Segment::Punct, "<>"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Punct, ","), + (Segment::Punct, "-"), + (Segment::Punct, "+"), + (Segment::Punct, "*"), + (Segment::Punct, "/"), + (Segment::Punct, "["), + (Segment::Punct, "]"), + (Segment::Punct, "**"), + (Segment::MacroId, "!*"), + (Segment::Newline, "\n"), + (Segment::Punct, "%"), + (Segment::Spaces, " "), + (Segment::Punct, ":"), + (Segment::Spaces, " "), + (Segment::Punct, ";"), + (Segment::Spaces, " "), + (Segment::Punct, "?"), + (Segment::Spaces, " "), + (Segment::Punct, "_"), + (Segment::Spaces, " "), + (Segment::Punct, "`"), + (Segment::Spaces, " "), + (Segment::Punct, "{"), + (Segment::Spaces, " "), + (Segment::Punct, "}"), + (Segment::Spaces, " "), + (Segment::Punct, "~"), + (Segment::Spaces, " "), + (Segment::MacroId, "!*"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Later, PromptStyle::Later, PromptStyle::Later], ); @@ -583,78 +584,78 @@ fn test_positive_numbers() { "#, Mode::Auto, &[ - (Type::Number, "0"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::Spaces, " "), - (Type::Number, "01"), - (Type::Spaces, " "), - (Type::Number, "001."), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Number, "123"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, "/* comment 1 */"), - (Type::Spaces, " "), - (Type::Comment, "/* comment 2 */"), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Number, "1"), - (Type::Spaces, " "), - (Type::Number, "0.1"), - (Type::Spaces, " "), - (Type::Number, "00.1"), - (Type::Spaces, " "), - (Type::Number, "00.10"), - (Type::Newline, "\n"), - (Type::Number, "5e1"), - (Type::Spaces, " "), - (Type::Number, "6E-1"), - (Type::Spaces, " "), - (Type::Number, "7e+1"), - (Type::Spaces, " "), - (Type::Number, "6E+01"), - (Type::Spaces, " "), - (Type::Number, "6e-03"), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Number, "3E1"), - (Type::Spaces, " "), - (Type::Number, ".4e-1"), - (Type::Spaces, " "), - (Type::Number, ".5E+1"), - (Type::Spaces, " "), - (Type::Number, ".6e+01"), - (Type::Spaces, " "), - (Type::Number, ".7E-03"), - (Type::Newline, "\n"), - (Type::Number, "1.23e1"), - (Type::Spaces, " "), - (Type::Number, "45.6E-1"), - (Type::Spaces, " "), - (Type::Number, "78.9e+1"), - (Type::Spaces, " "), - (Type::Number, "99.9E+01"), - (Type::Spaces, " "), - (Type::Number, "11.2e-03"), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Spaces, " "), - (Type::ExpectedExponent, "1e"), - (Type::Spaces, " "), - (Type::Identifier, "e1"), - (Type::Spaces, " "), - (Type::ExpectedExponent, "1e+"), - (Type::Spaces, " "), - (Type::ExpectedExponent, "1e-"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Number, "0"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::Spaces, " "), + (Segment::Number, "01"), + (Segment::Spaces, " "), + (Segment::Number, "001."), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Number, "123"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, "/* comment 1 */"), + (Segment::Spaces, " "), + (Segment::Comment, "/* comment 2 */"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Number, "1"), + (Segment::Spaces, " "), + (Segment::Number, "0.1"), + (Segment::Spaces, " "), + (Segment::Number, "00.1"), + (Segment::Spaces, " "), + (Segment::Number, "00.10"), + (Segment::Newline, "\n"), + (Segment::Number, "5e1"), + (Segment::Spaces, " "), + (Segment::Number, "6E-1"), + (Segment::Spaces, " "), + (Segment::Number, "7e+1"), + (Segment::Spaces, " "), + (Segment::Number, "6E+01"), + (Segment::Spaces, " "), + (Segment::Number, "6e-03"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Number, "3E1"), + (Segment::Spaces, " "), + (Segment::Number, ".4e-1"), + (Segment::Spaces, " "), + (Segment::Number, ".5E+1"), + (Segment::Spaces, " "), + (Segment::Number, ".6e+01"), + (Segment::Spaces, " "), + (Segment::Number, ".7E-03"), + (Segment::Newline, "\n"), + (Segment::Number, "1.23e1"), + (Segment::Spaces, " "), + (Segment::Number, "45.6E-1"), + (Segment::Spaces, " "), + (Segment::Number, "78.9e+1"), + (Segment::Spaces, " "), + (Segment::Number, "99.9E+01"), + (Segment::Spaces, " "), + (Segment::Number, "11.2e-03"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "1e"), + (Segment::Spaces, " "), + (Segment::Identifier, "e1"), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "1e+"), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "1e-"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::First, @@ -682,90 +683,90 @@ fn test_negative_numbers() { "#, Mode::Auto, &[ - (Type::Spaces, " "), - (Type::Number, "-0"), - (Type::Spaces, " "), - (Type::Number, "-1"), - (Type::Spaces, " "), - (Type::Number, "-01"), - (Type::Spaces, " "), - (Type::Number, "-001."), - (Type::Spaces, " "), - (Type::Number, "-1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Number, "-123"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, "/* comment 1 */"), - (Type::Spaces, " "), - (Type::Comment, "/* comment 2 */"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Number, "-.1"), - (Type::Spaces, " "), - (Type::Number, "-0.1"), - (Type::Spaces, " "), - (Type::Number, "-00.1"), - (Type::Spaces, " "), - (Type::Number, "-00.10"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Number, "-5e1"), - (Type::Spaces, " "), - (Type::Number, "-6E-1"), - (Type::Spaces, " "), - (Type::Number, "-7e+1"), - (Type::Spaces, " "), - (Type::Number, "-6E+01"), - (Type::Spaces, " "), - (Type::Number, "-6e-03"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Number, "-.3E1"), - (Type::Spaces, " "), - (Type::Number, "-.4e-1"), - (Type::Spaces, " "), - (Type::Number, "-.5E+1"), - (Type::Spaces, " "), - (Type::Number, "-.6e+01"), - (Type::Spaces, " "), - (Type::Number, "-.7E-03"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Number, "-1.23e1"), - (Type::Spaces, " "), - (Type::Number, "-45.6E-1"), - (Type::Spaces, " "), - (Type::Number, "-78.9e+1"), - (Type::Spaces, " "), - (Type::Number, "-99.9E+01"), - (Type::Spaces, " "), - (Type::Number, "-11.2e-03"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Punct, "-"), - (Type::Comment, "/**/"), - (Type::Number, "1"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Punct, "-"), - (Type::Punct, "."), - (Type::Spaces, " "), - (Type::ExpectedExponent, "-1e"), - (Type::Spaces, " "), - (Type::Punct, "-"), - (Type::Identifier, "e1"), - (Type::Spaces, " "), - (Type::ExpectedExponent, "-1e+"), - (Type::Spaces, " "), - (Type::ExpectedExponent, "-1e-"), - (Type::Spaces, " "), - (Type::Number, "-1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Spaces, " "), + (Segment::Number, "-0"), + (Segment::Spaces, " "), + (Segment::Number, "-1"), + (Segment::Spaces, " "), + (Segment::Number, "-01"), + (Segment::Spaces, " "), + (Segment::Number, "-001."), + (Segment::Spaces, " "), + (Segment::Number, "-1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Number, "-123"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, "/* comment 1 */"), + (Segment::Spaces, " "), + (Segment::Comment, "/* comment 2 */"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Number, "-.1"), + (Segment::Spaces, " "), + (Segment::Number, "-0.1"), + (Segment::Spaces, " "), + (Segment::Number, "-00.1"), + (Segment::Spaces, " "), + (Segment::Number, "-00.10"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Number, "-5e1"), + (Segment::Spaces, " "), + (Segment::Number, "-6E-1"), + (Segment::Spaces, " "), + (Segment::Number, "-7e+1"), + (Segment::Spaces, " "), + (Segment::Number, "-6E+01"), + (Segment::Spaces, " "), + (Segment::Number, "-6e-03"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Number, "-.3E1"), + (Segment::Spaces, " "), + (Segment::Number, "-.4e-1"), + (Segment::Spaces, " "), + (Segment::Number, "-.5E+1"), + (Segment::Spaces, " "), + (Segment::Number, "-.6e+01"), + (Segment::Spaces, " "), + (Segment::Number, "-.7E-03"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Number, "-1.23e1"), + (Segment::Spaces, " "), + (Segment::Number, "-45.6E-1"), + (Segment::Spaces, " "), + (Segment::Number, "-78.9e+1"), + (Segment::Spaces, " "), + (Segment::Number, "-99.9E+01"), + (Segment::Spaces, " "), + (Segment::Number, "-11.2e-03"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Punct, "-"), + (Segment::Comment, "/**/"), + (Segment::Number, "1"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Punct, "-"), + (Segment::Punct, "."), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "-1e"), + (Segment::Spaces, " "), + (Segment::Punct, "-"), + (Segment::Identifier, "e1"), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "-1e+"), + (Segment::Spaces, " "), + (Segment::ExpectedExponent, "-1e-"), + (Segment::Spaces, " "), + (Segment::Number, "-1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::First, @@ -798,59 +799,59 @@ u'fffd' U"041" "#, Mode::Auto, &[ - (Type::QuotedString, "'x'"), - (Type::Spaces, " "), - (Type::QuotedString, "\"y\""), - (Type::Spaces, " "), - (Type::QuotedString, "'abc'"), - (Type::Newline, "\n"), - (Type::QuotedString, "'Don''t'"), - (Type::Spaces, " "), - (Type::QuotedString, "\"Can't\""), - (Type::Spaces, " "), - (Type::QuotedString, "'Won''t'"), - (Type::Newline, "\n"), - (Type::QuotedString, "\"\"\"quoted\"\"\""), - (Type::Spaces, " "), - (Type::QuotedString, "'\"quoted\"'"), - (Type::Newline, "\n"), - (Type::QuotedString, "''"), - (Type::Spaces, " "), - (Type::QuotedString, "\"\""), - (Type::Newline, "\n"), - (Type::ExpectedQuote, "'missing end quote"), - (Type::Newline, "\n"), - (Type::ExpectedQuote, "\"missing double quote"), - (Type::Newline, "\n"), - (Type::HexString, "x\"4142\""), - (Type::Spaces, " "), - (Type::HexString, "X'5152'"), - (Type::Newline, "\n"), - (Type::UnicodeString, "u'fffd'"), - (Type::Spaces, " "), - (Type::UnicodeString, "U\"041\""), - (Type::Newline, "\n"), - (Type::StartCommand, "+"), - (Type::Spaces, " "), - (Type::Identifier, "new"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::Punct, "+"), - (Type::Spaces, " "), - (Type::Comment, "/* comment */"), - (Type::Spaces, " "), - (Type::QuotedString, "'string continuation'"), - (Type::Newline, "\n"), - (Type::Punct, "+"), - (Type::Spaces, " "), - (Type::Comment, "/* also a punctuator on blank line"), - (Type::Newline, "\n"), - (Type::StartCommand, "-"), - (Type::Spaces, " "), - (Type::QuotedString, "'new command'"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::QuotedString, "'x'"), + (Segment::Spaces, " "), + (Segment::QuotedString, "\"y\""), + (Segment::Spaces, " "), + (Segment::QuotedString, "'abc'"), + (Segment::Newline, "\n"), + (Segment::QuotedString, "'Don''t'"), + (Segment::Spaces, " "), + (Segment::QuotedString, "\"Can't\""), + (Segment::Spaces, " "), + (Segment::QuotedString, "'Won''t'"), + (Segment::Newline, "\n"), + (Segment::QuotedString, "\"\"\"quoted\"\"\""), + (Segment::Spaces, " "), + (Segment::QuotedString, "'\"quoted\"'"), + (Segment::Newline, "\n"), + (Segment::QuotedString, "''"), + (Segment::Spaces, " "), + (Segment::QuotedString, "\"\""), + (Segment::Newline, "\n"), + (Segment::ExpectedQuote, "'missing end quote"), + (Segment::Newline, "\n"), + (Segment::ExpectedQuote, "\"missing double quote"), + (Segment::Newline, "\n"), + (Segment::HexString, "x\"4142\""), + (Segment::Spaces, " "), + (Segment::HexString, "X'5152'"), + (Segment::Newline, "\n"), + (Segment::UnicodeString, "u'fffd'"), + (Segment::Spaces, " "), + (Segment::UnicodeString, "U\"041\""), + (Segment::Newline, "\n"), + (Segment::StartCommand, "+"), + (Segment::Spaces, " "), + (Segment::Identifier, "new"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::Punct, "+"), + (Segment::Spaces, " "), + (Segment::Comment, "/* comment */"), + (Segment::Spaces, " "), + (Segment::QuotedString, "'string continuation'"), + (Segment::Newline, "\n"), + (Segment::Punct, "+"), + (Segment::Spaces, " "), + (Segment::Comment, "/* also a punctuator on blank line"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "-"), + (Segment::Spaces, " "), + (Segment::QuotedString, "'new command'"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -878,26 +879,26 @@ title my title. "#, Mode::Interactive, &[ - (Type::Shbang, "#! /usr/bin/pspp"), - (Type::Newline, "\n"), - (Type::Identifier, "title"), - (Type::Spaces, " "), - (Type::Identifier, "my"), - (Type::Spaces, " "), - (Type::Identifier, "title"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "#"), - (Type::MacroId, "!"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "usr"), - (Type::Punct, "/"), - (Type::Identifier, "bin"), - (Type::Punct, "/"), - (Type::Identifier, "pspp"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Shbang, "#! /usr/bin/pspp"), + (Segment::Newline, "\n"), + (Segment::Identifier, "title"), + (Segment::Spaces, " "), + (Segment::Identifier, "my"), + (Segment::Spaces, " "), + (Segment::Identifier, "title"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "#"), + (Segment::MacroId, "!"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "usr"), + (Segment::Punct, "/"), + (Segment::Identifier, "bin"), + (Segment::Punct, "/"), + (Segment::Identifier, "pspp"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First, PromptStyle::Later], ); @@ -925,61 +926,61 @@ next command. "#, Mode::Interactive, &[ - (Type::CommentCommand, "* Comment commands \"don't"), - (Type::Newline, "\n"), - (Type::CommentCommand, "have to contain valid tokens"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::CommentCommand, "** Check ambiguity with ** token"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::CommentCommand, "****************"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::CommentCommand, "comment keyword works too"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::CommentCommand, "COMM also"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "com"), - (Type::Spaces, " "), - (Type::Identifier, "is"), - (Type::Spaces, " "), - (Type::Identifier, "ambiguous"), - (Type::Spaces, " "), - (Type::ReservedWord, "with"), - (Type::Spaces, " "), - (Type::Identifier, "COMPUTE"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Spaces, " "), + (Segment::CommentCommand, "* Comment commands \"don't"), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "have to contain valid tokens"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "** Check ambiguity with ** token"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "****************"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "comment keyword works too"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "COMM also"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "com"), + (Segment::Spaces, " "), + (Segment::Identifier, "is"), + (Segment::Spaces, " "), + (Segment::Identifier, "ambiguous"), + (Segment::Spaces, " "), + (Segment::ReservedWord, "with"), + (Segment::Spaces, " "), + (Segment::Identifier, "COMPUTE"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), ( - Type::CommentCommand, + Segment::CommentCommand, "* Comment need not start at left margin", ), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::CommentCommand, "* Comment ends with blank line"), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "next"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::CommentCommand, "* Comment ends with blank line"), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "next"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Comment, @@ -1018,36 +1019,36 @@ second paragraph. "#, Mode::Interactive, &[ - (Type::StartDocument, ""), - (Type::Document, "DOCUMENT one line."), - (Type::EndCommand, ""), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::StartDocument, ""), - (Type::Document, "DOC more"), - (Type::Newline, "\n"), - (Type::Document, " than"), - (Type::Newline, "\n"), - (Type::Document, " one"), - (Type::Newline, "\n"), - (Type::Document, " line."), - (Type::EndCommand, ""), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::StartDocument, ""), - (Type::Document, "docu"), - (Type::Newline, "\n"), - (Type::Document, "first.paragraph"), - (Type::Newline, "\n"), - (Type::Document, "isn't parsed as tokens"), - (Type::Newline, "\n"), - (Type::Document, ""), - (Type::Newline, "\n"), - (Type::Document, "second paragraph."), - (Type::EndCommand, ""), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::StartDocument, ""), + (Segment::Document, "DOCUMENT one line."), + (Segment::EndCommand, ""), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::StartDocument, ""), + (Segment::Document, "DOC more"), + (Segment::Newline, "\n"), + (Segment::Document, " than"), + (Segment::Newline, "\n"), + (Segment::Document, " one"), + (Segment::Newline, "\n"), + (Segment::Document, " line."), + (Segment::EndCommand, ""), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::StartDocument, ""), + (Segment::Document, "docu"), + (Segment::Newline, "\n"), + (Segment::Document, "first.paragraph"), + (Segment::Newline, "\n"), + (Segment::Document, "isn't parsed as tokens"), + (Segment::Newline, "\n"), + (Segment::Document, ""), + (Segment::Newline, "\n"), + (Segment::Document, "second paragraph."), + (Segment::EndCommand, ""), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::First, @@ -1076,34 +1077,34 @@ FILE /* "#, Mode::Interactive, &[ - (Type::Identifier, "FIL"), - (Type::Spaces, " "), - (Type::Identifier, "label"), - (Type::Spaces, " "), - (Type::UnquotedString, "isn't quoted"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "FILE"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "lab"), - (Type::Spaces, " "), - (Type::QuotedString, "'is quoted'"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "FILE"), - (Type::Spaces, " "), - (Type::Comment, "/*"), - (Type::Newline, "\n"), - (Type::Comment, "/**/"), - (Type::Spaces, " "), - (Type::Identifier, "lab"), - (Type::Spaces, " "), - (Type::UnquotedString, "not quoted here either"), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "FIL"), + (Segment::Spaces, " "), + (Segment::Identifier, "label"), + (Segment::Spaces, " "), + (Segment::UnquotedString, "isn't quoted"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "FILE"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "lab"), + (Segment::Spaces, " "), + (Segment::QuotedString, "'is quoted'"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "FILE"), + (Segment::Spaces, " "), + (Segment::Comment, "/*"), + (Segment::Newline, "\n"), + (Segment::Comment, "/**/"), + (Segment::Spaces, " "), + (Segment::Identifier, "lab"), + (Segment::Spaces, " "), + (Segment::UnquotedString, "not quoted here either"), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::First, @@ -1145,92 +1146,92 @@ not data "#, Mode::Interactive, &[ - (Type::Identifier, "begin"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "begin"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, "/*"), - (Type::Newline, "\n"), - (Type::InlineData, "123"), - (Type::Newline, "\n"), - (Type::InlineData, "xxx"), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "BEG"), - (Type::Spaces, " "), - (Type::Comment, "/**/"), - (Type::Spaces, " "), - (Type::Identifier, "DAT"), - (Type::Spaces, " "), - (Type::Comment, "/*"), - (Type::Newline, "\n"), - (Type::InlineData, "5 6 7 /* x"), - (Type::Newline, "\n"), - (Type::InlineData, ""), - (Type::Newline, "\n"), - (Type::InlineData, "end data"), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::Newline, "\n"), - (Type::StartCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "begin"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::InlineData, "data"), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "begin"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::QuotedString, "\"xxx\""), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "begin"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Number, "123"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::ReservedWord, "not"), - (Type::Spaces, " "), - (Type::Identifier, "data"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "begin"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "begin"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, "/*"), + (Segment::Newline, "\n"), + (Segment::InlineData, "123"), + (Segment::Newline, "\n"), + (Segment::InlineData, "xxx"), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "BEG"), + (Segment::Spaces, " "), + (Segment::Comment, "/**/"), + (Segment::Spaces, " "), + (Segment::Identifier, "DAT"), + (Segment::Spaces, " "), + (Segment::Comment, "/*"), + (Segment::Newline, "\n"), + (Segment::InlineData, "5 6 7 /* x"), + (Segment::Newline, "\n"), + (Segment::InlineData, ""), + (Segment::Newline, "\n"), + (Segment::InlineData, "end data"), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "begin"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::InlineData, "data"), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "begin"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::QuotedString, "\"xxx\""), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "begin"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Number, "123"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::ReservedWord, "not"), + (Segment::Spaces, " "), + (Segment::Identifier, "data"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Data, @@ -1279,62 +1280,62 @@ end repeat. "#, Mode::Interactive, &[ - (Type::Identifier, "do"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Spaces, " "), - (Type::Identifier, "x"), - (Type::Punct, "="), - (Type::Identifier, "a"), - (Type::Spaces, " "), - (Type::Identifier, "b"), - (Type::Spaces, " "), - (Type::Identifier, "c"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "y"), - (Type::Punct, "="), - (Type::Identifier, "d"), - (Type::Spaces, " "), - (Type::Identifier, "e"), - (Type::Spaces, " "), - (Type::Identifier, "f"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, " do repeat a=1 thru 5."), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "another command."), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "second command"), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "+ third command."), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print."), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "do"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Spaces, " "), - (Type::Identifier, "#a"), - (Type::Punct, "="), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, " inner command."), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "do"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Spaces, " "), + (Segment::Identifier, "x"), + (Segment::Punct, "="), + (Segment::Identifier, "a"), + (Segment::Spaces, " "), + (Segment::Identifier, "b"), + (Segment::Spaces, " "), + (Segment::Identifier, "c"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "y"), + (Segment::Punct, "="), + (Segment::Identifier, "d"), + (Segment::Spaces, " "), + (Segment::Identifier, "e"), + (Segment::Spaces, " "), + (Segment::Identifier, "f"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, " do repeat a=1 thru 5."), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "another command."), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "second command"), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "+ third command."), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "end /* x */ /* y */ repeat print."), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "do"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Spaces, " "), + (Segment::Identifier, "#a"), + (Segment::Punct, "="), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, " inner command."), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1371,44 +1372,44 @@ fn test_do_repeat_overflow() { .map(|s| s.as_str()) .collect(); let mut expect_output = vec![ - (Type::Identifier, "do"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Spaces, " "), - (Type::Identifier, "v0"), - (Type::Punct, "="), - (Type::Number, "0"), - (Type::Spaces, " "), - (Type::Identifier, "thru"), - (Type::Spaces, " "), - (Type::Number, "5"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), + (Segment::Identifier, "do"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Spaces, " "), + (Segment::Identifier, "v0"), + (Segment::Punct, "="), + (Segment::Number, "0"), + (Segment::Spaces, " "), + (Segment::Identifier, "thru"), + (Segment::Spaces, " "), + (Segment::Number, "5"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), ]; for i in 1..N { - expect_output.push((Type::DoRepeatCommand, &do_repeat[i].trim_end())); + expect_output.push((Segment::DoRepeatCommand, &do_repeat[i].trim_end())); if i >= 255 { - expect_output.push((Type::DoRepeatOverflow, "")); + expect_output.push((Segment::DoRepeatOverflow, "")); } - expect_output.push((Type::Newline, "\n")); + expect_output.push((Segment::Newline, "\n")); } for i in 0..254 { - expect_output.push((Type::DoRepeatCommand, &end_repeat[i].trim_end())); - expect_output.push((Type::Newline, "\n")); + expect_output.push((Segment::DoRepeatCommand, &end_repeat[i].trim_end())); + expect_output.push((Segment::Newline, "\n")); } let comments: Vec = (0..(N - 254)).rev().map(|i| format!("/* {i}")).collect(); for comment in &comments { expect_output.extend([ - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::EndCommand, "."), - (Type::Spaces, " "), - (Type::Comment, comment), - (Type::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::EndCommand, "."), + (Segment::Spaces, " "), + (Segment::Comment, comment), + (Segment::Newline, "\n"), ]); } - expect_output.push((Type::End, "")); + expect_output.push((Segment::End, "")); let expect_prompts: Vec<_> = (0..N * 2 - 3) .map(|_| PromptStyle::DoRepeat) @@ -1437,62 +1438,62 @@ end repeat "#, Mode::Batch, &[ - (Type::Identifier, "do"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Spaces, " "), - (Type::Identifier, "x"), - (Type::Punct, "="), - (Type::Identifier, "a"), - (Type::Spaces, " "), - (Type::Identifier, "b"), - (Type::Spaces, " "), - (Type::Identifier, "c"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "y"), - (Type::Punct, "="), - (Type::Identifier, "d"), - (Type::Spaces, " "), - (Type::Identifier, "e"), - (Type::Spaces, " "), - (Type::Identifier, "f"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::DoRepeatCommand, "do repeat a=1 thru 5"), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "another command"), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "second command"), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "+ third command"), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, "end /* x */ /* y */ repeat print"), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::Identifier, "do"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Spaces, " "), - (Type::Identifier, "#a"), - (Type::Punct, "="), - (Type::Number, "1"), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::DoRepeatCommand, " inner command"), - (Type::Newline, "\n"), - (Type::Identifier, "end"), - (Type::Spaces, " "), - (Type::Identifier, "repeat"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "do"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Spaces, " "), + (Segment::Identifier, "x"), + (Segment::Punct, "="), + (Segment::Identifier, "a"), + (Segment::Spaces, " "), + (Segment::Identifier, "b"), + (Segment::Spaces, " "), + (Segment::Identifier, "c"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "y"), + (Segment::Punct, "="), + (Segment::Identifier, "d"), + (Segment::Spaces, " "), + (Segment::Identifier, "e"), + (Segment::Spaces, " "), + (Segment::Identifier, "f"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::DoRepeatCommand, "do repeat a=1 thru 5"), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "another command"), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "second command"), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "+ third command"), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, "end /* x */ /* y */ repeat print"), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::Identifier, "do"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Spaces, " "), + (Segment::Identifier, "#a"), + (Segment::Punct, "="), + (Segment::Number, "1"), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::DoRepeatCommand, " inner command"), + (Segment::Newline, "\n"), + (Segment::Identifier, "end"), + (Segment::Spaces, " "), + (Segment::Identifier, "repeat"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1515,7 +1516,7 @@ end repeat mod define { use crate::{ - lex::segment::{Mode, Type}, + lex::segment::{Mode, Segment}, prompt::PromptStyle, }; @@ -1530,18 +1531,18 @@ var1 var2 var3 "!enddefine" "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroBody, "var1 var2 var3 \"!enddefine\""), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "var1 var2 var3 \"!enddefine\""), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::Define, PromptStyle::First], ); @@ -1555,17 +1556,17 @@ var1 var2 var3 "!enddefine" "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::MacroBody, " var1 var2 var3 /* !enddefine"), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::MacroBody, " var1 var2 var3 /* !enddefine"), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1579,17 +1580,17 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroBody, "var1 var2 var3"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "var1 var2 var3"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1602,16 +1603,16 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::MacroBody, "var1 var2 var3"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::MacroBody, "var1 var2 var3"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::First], ); @@ -1625,16 +1626,16 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1650,20 +1651,20 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroBody, ""), - (Type::Newline, "\n"), - (Type::MacroBody, ""), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroBody, ""), + (Segment::Newline, "\n"), + (Segment::MacroBody, ""), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Define, @@ -1682,29 +1683,29 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Identifier, "a"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Punct, ","), - (Type::Spaces, " "), - (Type::Identifier, "b"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Punct, ","), - (Type::Spaces, " "), - (Type::Identifier, "c"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Identifier, "a"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Punct, ","), + (Segment::Spaces, " "), + (Segment::Identifier, "b"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Punct, ","), + (Segment::Spaces, " "), + (Segment::Identifier, "c"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define, PromptStyle::First], ); @@ -1722,35 +1723,35 @@ var1 var2 var3!enddefine. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "a"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Punct, ","), - (Type::Spaces, " "), - (Type::Identifier, "b"), - (Type::Punct, "("), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Punct, ")"), - (Type::Punct, ","), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "c"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "a"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Punct, ","), + (Segment::Spaces, " "), + (Segment::Identifier, "b"), + (Segment::Punct, "("), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Punct, ")"), + (Segment::Punct, ","), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "c"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1775,27 +1776,27 @@ content 2 "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Newline, "\n"), - (Type::Punct, "("), - (Type::Identifier, "x"), - (Type::Punct, ","), - (Type::Identifier, "y"), - (Type::Punct, ","), - (Type::Identifier, "z"), - (Type::Newline, "\n"), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroBody, "content 1"), - (Type::Newline, "\n"), - (Type::MacroBody, "content 2"), - (Type::Newline, "\n"), - (Type::MacroId, "!enddefine"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Newline, "\n"), + (Segment::Punct, "("), + (Segment::Identifier, "x"), + (Segment::Punct, ","), + (Segment::Identifier, "y"), + (Segment::Punct, ","), + (Segment::Identifier, "z"), + (Segment::Newline, "\n"), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "content 1"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "content 2"), + (Segment::Newline, "\n"), + (Segment::MacroId, "!enddefine"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -1816,22 +1817,22 @@ data list /x 1. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Identifier, "list"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Identifier, "list"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First], ); @@ -1846,24 +1847,24 @@ data list /x 1. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Newline, "\n"), - (Type::Identifier, "x"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Identifier, "list"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Newline, "\n"), + (Segment::Identifier, "x"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Identifier, "list"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Later, PromptStyle::First, PromptStyle::First], ); @@ -1878,26 +1879,26 @@ data list /x 1. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "x"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Identifier, "list"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "x"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Identifier, "list"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First, PromptStyle::First], ); @@ -1913,22 +1914,22 @@ data list /x 1. "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Identifier, "list"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Identifier, "list"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::First, PromptStyle::First], ); @@ -1943,17 +1944,17 @@ content line 2 "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::MacroBody, "content line 1"), - (Type::Newline, "\n"), - (Type::MacroBody, "content line 2"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "content line 1"), + (Segment::Newline, "\n"), + (Segment::MacroBody, "content line 2"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Define, @@ -1970,13 +1971,13 @@ content line 2 "#, Mode::Interactive, &[ - (Type::Identifier, "define"), - (Type::Spaces, " "), - (Type::MacroName, "!macro1"), - (Type::Punct, "("), - (Type::Punct, ")"), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "define"), + (Segment::Spaces, " "), + (Segment::MacroName, "!macro1"), + (Segment::Punct, "("), + (Segment::Punct, ")"), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[PromptStyle::Define], ); @@ -1996,46 +1997,46 @@ fourth command. "#, Mode::Batch, &[ - (Type::Identifier, "first"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "another"), - (Type::Spaces, " "), - (Type::Identifier, "line"), - (Type::Spaces, " "), - (Type::Identifier, "of"), - (Type::Spaces, " "), - (Type::Identifier, "first"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::StartCommand, "+"), - (Type::Spaces, " "), - (Type::Identifier, "second"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::Identifier, "third"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "fourth"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "fifth"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "first"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "another"), + (Segment::Spaces, " "), + (Segment::Identifier, "line"), + (Segment::Spaces, " "), + (Segment::Identifier, "of"), + (Segment::Spaces, " "), + (Segment::Identifier, "first"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "+"), + (Segment::Spaces, " "), + (Segment::Identifier, "second"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::Identifier, "third"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "fourth"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "fifth"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, @@ -2068,76 +2069,76 @@ fourth command. "#, Mode::Auto, &[ - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "another"), - (Type::Spaces, " "), - (Type::Identifier, "line"), - (Type::Spaces, " "), - (Type::Identifier, "of"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::Number, "2"), - (Type::Identifier, "sls"), - (Type::Newline, "\n"), - (Type::StartCommand, "+"), - (Type::Spaces, " "), - (Type::Identifier, "another"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::Identifier, "another"), - (Type::Spaces, " "), - (Type::Identifier, "line"), - (Type::Spaces, " "), - (Type::Identifier, "of"), - (Type::Spaces, " "), - (Type::Identifier, "second"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::Identifier, "data"), - (Type::Spaces, " "), - (Type::Identifier, "list"), - (Type::Spaces, " "), - (Type::Punct, "/"), - (Type::Identifier, "x"), - (Type::Spaces, " "), - (Type::Number, "1"), - (Type::Newline, "\n"), - (Type::StartCommand, ""), - (Type::Identifier, "aggregate"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "print"), - (Type::Spaces, " "), - (Type::Identifier, "eject"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Identifier, "twostep"), - (Type::Spaces, " "), - (Type::Identifier, "cluster"), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::SeparateCommands, ""), - (Type::Newline, "\n"), - (Type::Identifier, "fourth"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::Spaces, " "), - (Type::Identifier, "fifth"), - (Type::Spaces, " "), - (Type::Identifier, "command"), - (Type::EndCommand, "."), - (Type::Newline, "\n"), - (Type::End, ""), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "another"), + (Segment::Spaces, " "), + (Segment::Identifier, "line"), + (Segment::Spaces, " "), + (Segment::Identifier, "of"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::Number, "2"), + (Segment::Identifier, "sls"), + (Segment::Newline, "\n"), + (Segment::StartCommand, "+"), + (Segment::Spaces, " "), + (Segment::Identifier, "another"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::Identifier, "another"), + (Segment::Spaces, " "), + (Segment::Identifier, "line"), + (Segment::Spaces, " "), + (Segment::Identifier, "of"), + (Segment::Spaces, " "), + (Segment::Identifier, "second"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::Identifier, "data"), + (Segment::Spaces, " "), + (Segment::Identifier, "list"), + (Segment::Spaces, " "), + (Segment::Punct, "/"), + (Segment::Identifier, "x"), + (Segment::Spaces, " "), + (Segment::Number, "1"), + (Segment::Newline, "\n"), + (Segment::StartCommand, ""), + (Segment::Identifier, "aggregate"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "print"), + (Segment::Spaces, " "), + (Segment::Identifier, "eject"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Identifier, "twostep"), + (Segment::Spaces, " "), + (Segment::Identifier, "cluster"), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::SeparateCommands, ""), + (Segment::Newline, "\n"), + (Segment::Identifier, "fourth"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::Spaces, " "), + (Segment::Identifier, "fifth"), + (Segment::Spaces, " "), + (Segment::Identifier, "command"), + (Segment::EndCommand, "."), + (Segment::Newline, "\n"), + (Segment::End, ""), ], &[ PromptStyle::Later, diff --git a/rust/src/lex/token.rs b/rust/src/lex/token.rs new file mode 100644 index 0000000000..0b2021b5c8 --- /dev/null +++ b/rust/src/lex/token.rs @@ -0,0 +1,309 @@ +use thiserror::Error as ThisError; + +use super::segment::Segment; + +pub enum Token { + /// End of input. + End, + + /// Identifier. + Id(String), + + /// Number. + Number(f64), + + /// Quoted string. + String(String), + + /// Command terminator or separator. + /// + /// Usually this is `.`, but a blank line also separates commands, and in + /// batch mode any line that begins with a non-blank starts a new command. + EndCommand, + + /// Operators, punctuators, and reserved words. + Punct(Punct), + + /// Tokens that only appear in macros. + MacroToken(MacroToken), +} + +pub enum Punct { + /// `+`. + Plus, + + /// `-`. + Dash, + + /// `*`. + Asterisk, + + /// `/`. + Slash, + + /// `=`. + Equals, + + /// `(`. + LParen, + + /// `)`. + RParen, + + /// `[`. + LSquare, + + /// `]`. + RSquare, + + /// `{`. + LCurly, + + /// `}`. + RCurly, + + /// `,`. + Comma, + + /// `;`. + Semicolon, + + /// `:`. + Colon, + + /// `AND` or `&`. + And, + + /// `OR` or `|`. + Or, + + /// `NOT` or `~`. + Not, + + /// `EQ` or `=`. + Eq, + + /// `GE` or '>=` + Ge, + + /// `GT` or `>`. + Gt, + + /// `LE` or `<=`. + Le, + + /// `LT` or `<`. + Lt, + + /// `NE` or `~=` or `<>`. + Ne, + + /// `ALL`. + All, + + /// `BY`. + By, + + /// `TO`. + To, + + /// `WITH`. + With, + + /// `**`. + Exp, +} + +/// Tokens that only appear in macros. +pub enum MacroToken { + /// Identifier starting with `!`. + MacroId(String), + + /// `!`. + Bang, + + /// `%`. + Percent, + + /// `?`. + Question, + + /// ````. + Backtick, + + /// `_`. + /// + /// Although underscores may appear within identifiers, they can't be the + /// first character, so this represents an underscore found on its own. + Underscore, +} + +#[derive(ThisError, Debug)] +pub enum TokenError { + /// Unterminated string constant. + #[error("Unterminated string constant.")] + ExpectedQuote, + + /// Missing exponent. + #[error("Missing exponent following `{0}`")] + ExpectedExponent(String), + + /// Odd length hex string. + #[error("String of hex digits has {0} characters, which is not a multiple of 2.")] + OddLengthHexString(usize), + + /// Invalid hex digit. + #[error("Invalid hex digit {0:?}.")] + BadHexDigit(char), + + /// Invalid length Unicode string. + #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")] + BadLengthUnicodeString(usize), + + /// Invalid code point. + #[error("U+{0:04X} is not a valid Unicode code point.")] + BadCodePoint(u32), + + /// Expected hexadecimal Unicode code point + #[error("Expected hexadecimal Unicode code point.")] + ExpectedCodePoint, + + /// `DO REPEAT` nested too deeply. + #[error("`DO REPEAT` nested too deeply.")] + DoRepeatOverflow, + + /// Unexpected character. + #[error("Unexpected character {0:?} in input.")] + UnexpectedChar(char), +} + +impl Token { + pub fn try_from_segment((segment, s): (Segment, &str)) -> Result, TokenError> { + match segment { + Segment::Number => Ok(Some(Self::Number(s.parse().unwrap()))), + Segment::QuotedString => { + // Trim quote mark from front and back. + let mut chars = s.chars(); + let quote = chars.next().unwrap(); + let s = chars.as_str().strip_suffix(quote).unwrap(); + + // Replace doubled quotes by single ones. + let (single_quote, double_quote) = match quote { + '\'' => ("'", "''"), + '"' => ("\"", "\"\""), + _ => unreachable!(), + }; + Ok(Some(Self::String(s.replace(double_quote, single_quote)))) + } + Segment::HexString => { + // Strip `X"` prefix and `"` suffix (or variations). + let s = &s[2..s.len() - 1]; + for c in s.chars() { + if !c.is_ascii_hexdigit() { + return Err(TokenError::BadHexDigit(c)) + } + } + if s.len() % 2 != 0 { + return Err(TokenError::OddLengthHexString(s.len())) + } + let mut out = String::with_capacity(s.len()); + for pair in s.as_bytes().chunks_exact(2) { + let hi = char::from(pair[0]).to_digit(16).unwrap() as u8; + let lo = char::from(pair[1]).to_digit(16).unwrap() as u8; + out.push(char::from(hi * 16 + lo)); + } + Ok(Some(Self::String(out))) + } + Segment::UnicodeString => { + // Strip `U"` prefix and `"` suffix (or variations). + let s = &s[2..s.len() - 1]; + if !(1..=8).contains(&s.len()) { + return Err(TokenError::BadLengthUnicodeString(s.len())); + } + let Ok(code_point) = u32::from_str_radix(s, 16) else { + return Err(TokenError::ExpectedCodePoint); + }; + let Some(c) = char::from_u32(code_point) else { + return Err(TokenError::BadCodePoint(code_point)); + }; + Ok(Some(Self::String(String::from(c)))) + } + + Segment::UnquotedString + | Segment::DoRepeatCommand + | Segment::InlineData + | Segment::Document + | Segment::MacroBody + | Segment::MacroName => Ok(Some(Self::String(String::from(s)))), + + Segment::ReservedWord => { + let c0 = s.as_bytes()[0].to_ascii_uppercase(); + let c1 = s.as_bytes()[1].to_ascii_uppercase(); + match (c0, c1) { + (b'B', _) => Ok(Some(Self::Punct(Punct::By))), + (b'E', _) => Ok(Some(Self::Punct(Punct::Eq))), + (b'G', b'T') => Ok(Some(Self::Punct(Punct::Gt))), + (b'G', _) => Ok(Some(Self::Punct(Punct::Ge))), + (b'L', b'T') => Ok(Some(Self::Punct(Punct::Lt))), + (b'L', _) => Ok(Some(Self::Punct(Punct::Le))), + (b'N', b'E') => Ok(Some(Self::Punct(Punct::Ne))), + (b'N', _) => Ok(Some(Self::Punct(Punct::Not))), + (b'O', _) => Ok(Some(Self::Punct(Punct::Or))), + (b'T', _) => Ok(Some(Self::Punct(Punct::To))), + (b'A', b'L') => Ok(Some(Self::Punct(Punct::All))), + (b'A', _) => Ok(Some(Self::Punct(Punct::And))), + (b'W', _) => Ok(Some(Self::Punct(Punct::With))), + _ => unreachable!(), + } + } + Segment::Identifier => Ok(Some(Self::Id(String::from(s)))), + Segment::Punct => match s { + "(" => Ok(Some(Self::Punct(Punct::LParen))), + ")" => Ok(Some(Self::Punct(Punct::RParen))), + "[" => Ok(Some(Self::Punct(Punct::LSquare))), + "]" => Ok(Some(Self::Punct(Punct::RSquare))), + "{" => Ok(Some(Self::Punct(Punct::LCurly))), + "}" => Ok(Some(Self::Punct(Punct::RCurly))), + "," => Ok(Some(Self::Punct(Punct::Comma))), + "=" => Ok(Some(Self::Punct(Punct::Equals))), + "-" => Ok(Some(Self::Punct(Punct::Dash))), + "&" => Ok(Some(Self::Punct(Punct::And))), + "|" => Ok(Some(Self::Punct(Punct::Or))), + "+" => Ok(Some(Self::Punct(Punct::Plus))), + "/" => Ok(Some(Self::Punct(Punct::Slash))), + "*" => Ok(Some(Self::Punct(Punct::Asterisk))), + "<" => Ok(Some(Self::Punct(Punct::Lt))), + ">" => Ok(Some(Self::Punct(Punct::Gt))), + "~" => Ok(Some(Self::Punct(Punct::Not))), + ":" => Ok(Some(Self::Punct(Punct::Colon))), + ";" => Ok(Some(Self::Punct(Punct::Semicolon))), + "**" => Ok(Some(Self::Punct(Punct::Exp))), + "<=" => Ok(Some(Self::Punct(Punct::Le))), + "<>" => Ok(Some(Self::Punct(Punct::Ne))), + "~=" => Ok(Some(Self::Punct(Punct::Ne))), + ">=" => Ok(Some(Self::Punct(Punct::Ge))), + "!" => Ok(Some(Self::MacroToken(MacroToken::Bang))), + "%" => Ok(Some(Self::MacroToken(MacroToken::Percent))), + "?" => Ok(Some(Self::MacroToken(MacroToken::Question))), + "`" => Ok(Some(Self::MacroToken(MacroToken::Backtick))), + "_" => Ok(Some(Self::MacroToken(MacroToken::Underscore))), + _ => unreachable!(), + }, + Segment::Shbang + | Segment::Spaces + | Segment::Comment + | Segment::Newline + | Segment::CommentCommand => Ok(None), + Segment::DoRepeatOverflow => Err(TokenError::DoRepeatOverflow), + Segment::MacroId => Ok(Some(Self::MacroToken(MacroToken::MacroId(String::from(s))))), + Segment::StartDocument => Ok(Some(Self::Id(String::from("DOCUMENT")))), + Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => { + Ok(Some(Self::EndCommand)) + } + Segment::End => Ok(Some(Self::End)), + Segment::ExpectedQuote => Err(TokenError::ExpectedQuote), + Segment::ExpectedExponent => Err(TokenError::ExpectedExponent(String::from(s))), + Segment::UnexpectedChar => Err(TokenError::UnexpectedChar(s.chars().next().unwrap())), + } + } +} diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c index e4fe405d47..e0ab8f872e 100644 --- a/src/language/lexer/scan.c +++ b/src/language/lexer/scan.c @@ -222,12 +222,6 @@ scan_punct2__ (char c0, char c1) case '~': return T_NE; - - case '&': - return T_AND; - - case '|': - return T_OR; } NOT_REACHED ();