From 75b2b4555ed2ec5d94940df27f43c54be3cd36e5 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 12 Jul 2025 09:34:22 -0700 Subject: [PATCH] Work on docs --- rust/pspp/src/lex/command_name.rs | 67 +- rust/pspp/src/lex/lexer.rs | 15 +- rust/pspp/src/lex/scan/mod.rs | 373 ++++++----- rust/pspp/src/lex/scan/test.rs | 1011 ++++++++++++++--------------- rust/pspp/src/lex/segment/mod.rs | 165 +++-- rust/pspp/src/macros.rs | 8 +- 6 files changed, 905 insertions(+), 734 deletions(-) diff --git a/rust/pspp/src/lex/command_name.rs b/rust/pspp/src/lex/command_name.rs index ed2daeaf65..9aa26300bf 100644 --- a/rust/pspp/src/lex/command_name.rs +++ b/rust/pspp/src/lex/command_name.rs @@ -14,28 +14,64 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . +//! # Command names +//! +//! PSPP needs to parse command names in a few contexts: +//! +//! - For executing command syntax. +//! +//! - For lexical analysis in [Auto](crate::lex::segment::Syntax::Auto) syntax +//! mode. In this syntax mode, a line of syntax begins a new command if the +//! line has no leading white space and it begins with the name of a known +//! command. +//! +//! This module supports identifying commands for these purposes. + +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] + use crate::identifier::id_match_n_nonstatic; +/// How a string matches the name of a command. pub struct Match { + /// Is this an exact match? + /// + /// Words in command names are allowed to be abbreviated to their first 3 + /// letters. An exact match means that none of the words were abbreviated. pub exact: bool, + + /// Number of words omitted from the command name. + /// + /// It means: + /// + /// - Positive: Number of words omitted at the end of the command name + /// (command names may be abbreviated as only as many words needed to be + /// unambiguous). + /// + /// - Negative: The absolute value is the number of extra words at the end + /// of the string that are not part of the command name. + /// + /// - Zero: The string and command name match exactly. pub missing_words: isize, } -/// Compares `string` obtained from the user against the full name of a `command`, -/// using this algorithm: +/// Compares `string` obtained from the user against the full name of a +/// `command`. +/// +/// It uses this algorithm: /// /// 1. Divide `command` into words `c[0]` through `c[n - 1]`. /// /// 2. Divide `string` into words `s[0]` through `s[m - 1]`. /// -/// 3. Compare word `c[i]` against `s[i]` for `0 <= i < min(n, m)`, using the keyword -/// matching algorithm implemented by lex_id_match(). If any of them fail to -/// match, then `string` does not match `command` and the function returns false. +/// 3. Compare word `c[i]` against `s[i]` for `0 <= i < min(n, m)`, using the +/// keyword matching algorithm implemented by lex_id_match(). If any of +/// them fail to match, then `string` does not match `command` and the +/// function returns `None`. /// -/// 4. Otherwise, `string` and `command` match. Set *MISSING_WORDS to n - m. Set -/// *EXACT to false if any of the `S[i]` were found to be abbreviated in the -/// comparisons done in step 3, or to true if they were all exactly equal -/// (modulo case). Return true. +/// 4. Otherwise, `string` and `command` match. Returns a [Match] with +/// `missing_words` set to `n - m` and `exact` set based on whether any of +/// the words in the command name were abbreviated. pub fn command_match(command: &str, string: &str) -> Option { let mut command_words = command.split_whitespace(); let mut string_words = string.split_whitespace(); @@ -73,6 +109,7 @@ pub struct CommandMatcher<'a, T> { } impl<'a, T> CommandMatcher<'a, T> { + /// Creates a new matcher for `string`. pub fn new(string: &'a str) -> Self { Self { string, @@ -85,8 +122,8 @@ impl<'a, T> CommandMatcher<'a, T> { } /// Consider `command` as a candidate for the command name being parsed. If - /// `command` is the correct command name, then [Self::get_match] will - /// return `aux` later. + /// `command` is the correct command name, then [get_match](Self::get_match) + /// will return `aux` later. pub fn add(&mut self, command: &str, aux: T) { if let Some(Match { missing_words, @@ -110,6 +147,11 @@ impl<'a, T> CommandMatcher<'a, T> { } } + /// Returns the best match among the possibilities passed to + /// [add](Self::add). Also returns the number of additional words that the + /// caller should consider reading, because the full command name might be + /// longer (if a command was returned) or because more words might be needed + /// for disambiguation (if no command name was returned). pub fn get_match(self) -> (Option, isize) { if self.extensible { (None, 1) @@ -123,6 +165,9 @@ impl<'a, T> CommandMatcher<'a, T> { } } +/// List of all PSPP command names. +/// +/// This includes commands that are not yet implemented. pub const COMMAND_NAMES: &[&str] = &[ "2SLS", "ACF", diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs index d79225356e..904a853b08 100644 --- a/rust/pspp/src/lex/lexer.rs +++ b/rust/pspp/src/lex/lexer.rs @@ -34,13 +34,14 @@ use thiserror::Error as ThisError; use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; use crate::{ + lex::scan::merge_tokens, macros::{macro_tokens_to_syntax, MacroSet, ParseStatus, Parser}, message::{Category, Diagnostic, Location, Point, Severity}, settings::Settings, }; use super::{ - scan::{MergeResult, ScanError, ScanToken, StringScanner}, + scan::{MergeAction, ScanError, StringScanner}, segment::{Segmenter, Syntax}, token::Token, }; @@ -603,9 +604,9 @@ impl Source { let pos = self.seg_pos..self.seg_pos + seg_len; self.seg_pos += seg_len; - match ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type) { + match seg_type.to_token(&self.file.buffer[pos.clone()]) { None => (), - Some(ScanToken::Token(token)) => { + Some(Ok(token)) => { let end = token == Token::End; pp.push_back(LexToken { file: self.file.clone(), @@ -617,7 +618,7 @@ impl Source { break; } } - Some(ScanToken::Error(error)) => errors.push(LexError { error, pos }), + Some(Err(error)) => errors.push(LexError { error, pos }), } } // XXX report errors @@ -636,11 +637,11 @@ impl Source { }; while let Ok(Some(result)) = - ScanToken::merge(|index| Ok(merge.get(index).map(|token| &token.token))) + merge_tokens(|index| Ok(merge.get(index).map(|token| &token.token))) { match result { - MergeResult::Copy => self.lookahead.push_back(merge.pop_front().unwrap()), - MergeResult::Expand { n, token } => { + MergeAction::Copy => self.lookahead.push_back(merge.pop_front().unwrap()), + MergeAction::Expand { n, token } => { let first = &merge[0]; let last = &merge[n - 1]; self.lookahead.push_back(LexToken { diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs index c15b9fb3f7..7e99b847b3 100644 --- a/rust/pspp/src/lex/scan/mod.rs +++ b/rust/pspp/src/lex/scan/mod.rs @@ -21,8 +21,10 @@ //! //! Scanning accepts as input a stream of segments, which are UTF-8 strings //! labeled with a [segment type](super::segment::Segment). It outputs a stream -//! of [ScanToken]s, which are either the [Token] used by the PSPP parser or an -//! error. +//! of [Token]s used by the PSPP parser or an error. + +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] use crate::identifier::{Identifier, ReservedWord}; @@ -33,6 +35,7 @@ use super::{ use std::collections::VecDeque; use thiserror::Error as ThisError; +/// Error returned by [merge_tokens]. #[derive(ThisError, Clone, Debug, PartialEq, Eq)] pub enum ScanError { /// Unterminated string constant. @@ -53,11 +56,21 @@ pub enum ScanError { /// Incomplete UTF-8 sequence. #[error("Incomplete UTF-8 sequence `{substring}` starting {offset} digits into hex string.")] - IncompleteUtf8 { substring: String, offset: usize }, + IncompleteUtf8 { + /// Incomplete sequence. + substring: String, + /// Offset of start of sequence. + offset: usize, + }, /// Bad UTF-8 sequence. #[error("Invalid UTF-8 sequence `{substring}` starting {offset} digits into hex string.")] - BadUtf8 { substring: String, offset: usize }, + BadUtf8 { + /// Invalid sequence. + substring: String, + /// Offset of start of sequence. + offset: usize, + }, /// Invalid length Unicode string. #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")] @@ -80,25 +93,9 @@ pub enum ScanError { UnexpectedChar(char), } -/// The input or output to token merging. -#[derive(Clone, Debug, PartialEq)] -pub enum ScanToken { - Token(Token), - Error(ScanError), -} - -impl ScanToken { - pub fn token(self) -> Option { - match self { - ScanToken::Token(token) => Some(token), - ScanToken::Error(_) => None, - } - } -} - -/// The result of merging tokens. +/// The action returned by [merge_tokens]. #[derive(Clone, Debug)] -pub enum MergeResult { +pub enum MergeAction { /// Copy one token literally from input to output. Copy, @@ -112,13 +109,28 @@ pub enum MergeResult { }, } +/// Used by [merge_tokens] to indicate that more input is needed. #[derive(Copy, Clone, Debug)] pub struct Incomplete; -impl ScanToken { - pub fn from_segment(s: &str, segment: Segment) -> Option { - match segment { - Segment::Number => Some(Self::Token(Token::Number(s.parse().unwrap()))), +impl Segment { + /// Tries to transform this segment, which was obtained for `s`, into a + /// token. Returns one of: + /// + /// - `None`: This segment doesn't correspond to any token (because it is a + /// comment, white space, etc.) and can be dropped in tokenization. + /// + /// - `Some(Ok(token))`: This segment corresponds to the given token. + /// + /// - `Some(Err(error))`: The segment contains an error, which the caller + /// should report. + /// + /// The raw token (or error) that this function returns should ordinarily be + /// merged with adjacent tokens with [merge_tokens] or some higher-level + /// construct. + pub fn to_token(self, s: &str) -> Option> { + match self { + Segment::Number => Some(Ok(Token::Number(s.parse().unwrap()))), Segment::QuotedString => { // Trim quote mark from front and back. let mut chars = s.chars(); @@ -131,20 +143,18 @@ impl ScanToken { '"' => ("\"", "\"\""), _ => unreachable!(), }; - Some(Self::Token(Token::String( - s.replace(double_quote, single_quote), - ))) + Some(Ok(Token::String(s.replace(double_quote, single_quote)))) } Segment::HexString => { // Strip `X"` prefix and `"` suffix (or variations). let s = &s[2..s.len() - 1]; for c in s.chars() { if !c.is_ascii_hexdigit() { - return Some(Self::Error(ScanError::BadHexDigit(c))); + return Some(Err(ScanError::BadHexDigit(c))); } } if s.len() % 2 != 0 { - return Some(Self::Error(ScanError::OddLengthHexString(s.len()))); + return Some(Err(ScanError::OddLengthHexString(s.len()))); } let bytes = s .as_bytes() @@ -156,7 +166,7 @@ impl ScanToken { }) .collect::>(); match String::from_utf8(bytes) { - Ok(string) => Some(Self::Token(Token::String(string))), + Ok(string) => Some(Ok(Token::String(string))), Err(error) => { let details = error.utf8_error(); let offset = details.valid_up_to() * 2; @@ -165,7 +175,7 @@ impl ScanToken { .map(|len| offset + len * 2) .unwrap_or(s.len()); let substring = String::from(&s[offset..end]); - Some(Self::Error(if details.error_len().is_some() { + Some(Err(if details.error_len().is_some() { ScanError::BadUtf8 { substring, offset } } else { ScanError::IncompleteUtf8 { substring, offset } @@ -177,15 +187,15 @@ impl ScanToken { // Strip `U"` prefix and `"` suffix (or variations). let s = &s[2..s.len() - 1]; if !(1..=8).contains(&s.len()) { - return Some(Self::Error(ScanError::BadLengthUnicodeString(s.len()))); + return Some(Err(ScanError::BadLengthUnicodeString(s.len()))); } let Ok(code_point) = u32::from_str_radix(s, 16) else { - return Some(Self::Error(ScanError::ExpectedCodePoint)); + return Some(Err(ScanError::ExpectedCodePoint)); }; let Some(c) = char::from_u32(code_point) else { - return Some(Self::Error(ScanError::BadCodePoint(code_point))); + return Some(Err(ScanError::BadCodePoint(code_point))); }; - Some(Self::Token(Token::String(String::from(c)))) + Some(Ok(Token::String(String::from(c)))) } Segment::UnquotedString @@ -193,61 +203,61 @@ impl ScanToken { | Segment::InlineData | Segment::Document | Segment::MacroBody - | Segment::MacroName => Some(Self::Token(Token::String(String::from(s)))), + | Segment::MacroName => Some(Ok(Token::String(String::from(s)))), Segment::Identifier => { if let Ok(reserved_word) = ReservedWord::try_from(s) { match reserved_word { - ReservedWord::And => Some(Self::Token(Token::Punct(Punct::And))), - ReservedWord::Or => Some(Self::Token(Token::Punct(Punct::Or))), - ReservedWord::Not => Some(Self::Token(Token::Punct(Punct::Not))), - ReservedWord::Eq => Some(Self::Token(Token::Punct(Punct::Eq))), - ReservedWord::Ge => Some(Self::Token(Token::Punct(Punct::Ge))), - ReservedWord::Gt => Some(Self::Token(Token::Punct(Punct::Gt))), - ReservedWord::Le => Some(Self::Token(Token::Punct(Punct::Le))), - ReservedWord::Lt => Some(Self::Token(Token::Punct(Punct::Lt))), - ReservedWord::Ne => Some(Self::Token(Token::Punct(Punct::Ne))), - ReservedWord::All => Some(Self::Token(Token::Punct(Punct::All))), - ReservedWord::By => Some(Self::Token(Token::Punct(Punct::By))), - ReservedWord::To => Some(Self::Token(Token::Punct(Punct::To))), - ReservedWord::With => Some(Self::Token(Token::Punct(Punct::With))), + ReservedWord::And => Some(Ok(Token::Punct(Punct::And))), + ReservedWord::Or => Some(Ok(Token::Punct(Punct::Or))), + ReservedWord::Not => Some(Ok(Token::Punct(Punct::Not))), + ReservedWord::Eq => Some(Ok(Token::Punct(Punct::Eq))), + ReservedWord::Ge => Some(Ok(Token::Punct(Punct::Ge))), + ReservedWord::Gt => Some(Ok(Token::Punct(Punct::Gt))), + ReservedWord::Le => Some(Ok(Token::Punct(Punct::Le))), + ReservedWord::Lt => Some(Ok(Token::Punct(Punct::Lt))), + ReservedWord::Ne => Some(Ok(Token::Punct(Punct::Ne))), + ReservedWord::All => Some(Ok(Token::Punct(Punct::All))), + ReservedWord::By => Some(Ok(Token::Punct(Punct::By))), + ReservedWord::To => Some(Ok(Token::Punct(Punct::To))), + ReservedWord::With => Some(Ok(Token::Punct(Punct::With))), } } else { - Some(Self::Token(Token::Id(Identifier::new(s).unwrap()))) + Some(Ok(Token::Id(Identifier::new(s).unwrap()))) } } Segment::Punct => match s { - "(" => Some(Self::Token(Token::Punct(Punct::LParen))), - ")" => Some(Self::Token(Token::Punct(Punct::RParen))), - "[" => Some(Self::Token(Token::Punct(Punct::LSquare))), - "]" => Some(Self::Token(Token::Punct(Punct::RSquare))), - "{" => Some(Self::Token(Token::Punct(Punct::LCurly))), - "}" => Some(Self::Token(Token::Punct(Punct::RCurly))), - "," => Some(Self::Token(Token::Punct(Punct::Comma))), - "=" => Some(Self::Token(Token::Punct(Punct::Equals))), - "-" => Some(Self::Token(Token::Punct(Punct::Dash))), - "&" => Some(Self::Token(Token::Punct(Punct::And))), - "|" => Some(Self::Token(Token::Punct(Punct::Or))), - "+" => Some(Self::Token(Token::Punct(Punct::Plus))), - "/" => Some(Self::Token(Token::Punct(Punct::Slash))), - "*" => Some(Self::Token(Token::Punct(Punct::Asterisk))), - "<" => Some(Self::Token(Token::Punct(Punct::Lt))), - ">" => Some(Self::Token(Token::Punct(Punct::Gt))), - "~" => Some(Self::Token(Token::Punct(Punct::Not))), - ":" => Some(Self::Token(Token::Punct(Punct::Colon))), - ";" => Some(Self::Token(Token::Punct(Punct::Semicolon))), - "**" => Some(Self::Token(Token::Punct(Punct::Exp))), - "<=" => Some(Self::Token(Token::Punct(Punct::Le))), - "<>" => Some(Self::Token(Token::Punct(Punct::Ne))), - "~=" => Some(Self::Token(Token::Punct(Punct::Ne))), - ">=" => Some(Self::Token(Token::Punct(Punct::Ge))), - "!" => Some(Self::Token(Token::Punct(Punct::Bang))), - "%" => Some(Self::Token(Token::Punct(Punct::Percent))), - "?" => Some(Self::Token(Token::Punct(Punct::Question))), - "`" => Some(Self::Token(Token::Punct(Punct::Backtick))), - "_" => Some(Self::Token(Token::Punct(Punct::Underscore))), - "." => Some(Self::Token(Token::Punct(Punct::Dot))), - "!*" => Some(Self::Token(Token::Punct(Punct::BangAsterisk))), + "(" => Some(Ok(Token::Punct(Punct::LParen))), + ")" => Some(Ok(Token::Punct(Punct::RParen))), + "[" => Some(Ok(Token::Punct(Punct::LSquare))), + "]" => Some(Ok(Token::Punct(Punct::RSquare))), + "{" => Some(Ok(Token::Punct(Punct::LCurly))), + "}" => Some(Ok(Token::Punct(Punct::RCurly))), + "," => Some(Ok(Token::Punct(Punct::Comma))), + "=" => Some(Ok(Token::Punct(Punct::Equals))), + "-" => Some(Ok(Token::Punct(Punct::Dash))), + "&" => Some(Ok(Token::Punct(Punct::And))), + "|" => Some(Ok(Token::Punct(Punct::Or))), + "+" => Some(Ok(Token::Punct(Punct::Plus))), + "/" => Some(Ok(Token::Punct(Punct::Slash))), + "*" => Some(Ok(Token::Punct(Punct::Asterisk))), + "<" => Some(Ok(Token::Punct(Punct::Lt))), + ">" => Some(Ok(Token::Punct(Punct::Gt))), + "~" => Some(Ok(Token::Punct(Punct::Not))), + ":" => Some(Ok(Token::Punct(Punct::Colon))), + ";" => Some(Ok(Token::Punct(Punct::Semicolon))), + "**" => Some(Ok(Token::Punct(Punct::Exp))), + "<=" => Some(Ok(Token::Punct(Punct::Le))), + "<>" => Some(Ok(Token::Punct(Punct::Ne))), + "~=" => Some(Ok(Token::Punct(Punct::Ne))), + ">=" => Some(Ok(Token::Punct(Punct::Ge))), + "!" => Some(Ok(Token::Punct(Punct::Bang))), + "%" => Some(Ok(Token::Punct(Punct::Percent))), + "?" => Some(Ok(Token::Punct(Punct::Question))), + "`" => Some(Ok(Token::Punct(Punct::Backtick))), + "_" => Some(Ok(Token::Punct(Punct::Underscore))), + "." => Some(Ok(Token::Punct(Punct::Dot))), + "!*" => Some(Ok(Token::Punct(Punct::BangAsterisk))), _ => unreachable!("bad punctuator {s:?}"), }, Segment::Shbang @@ -255,95 +265,117 @@ impl ScanToken { | Segment::Comment | Segment::Newline | Segment::CommentCommand => None, - Segment::DoRepeatOverflow => Some(Self::Error(ScanError::DoRepeatOverflow)), - Segment::StartDocument => { - Some(Self::Token(Token::Id(Identifier::new("DOCUMENT").unwrap()))) - } + Segment::DoRepeatOverflow => Some(Err(ScanError::DoRepeatOverflow)), + Segment::StartDocument => Some(Ok(Token::Id(Identifier::new("DOCUMENT").unwrap()))), Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => { - Some(Self::Token(Token::End)) + Some(Ok(Token::End)) } - Segment::ExpectedQuote => Some(Self::Error(ScanError::ExpectedQuote)), - Segment::ExpectedExponent => { - Some(Self::Error(ScanError::ExpectedExponent(String::from(s)))) + Segment::ExpectedQuote => Some(Err(ScanError::ExpectedQuote)), + Segment::ExpectedExponent => Some(Err(ScanError::ExpectedExponent(String::from(s)))), + Segment::UnexpectedChar => { + Some(Err(ScanError::UnexpectedChar(s.chars().next().unwrap()))) } - Segment::UnexpectedChar => Some(Self::Error(ScanError::UnexpectedChar( - s.chars().next().unwrap(), - ))), } } +} - /// Attempts to merge a sequence of tokens together into a single token. The - /// tokens are taken from the beginning of `input`. If successful, removes one - /// or more token from the beginning of `input` and returnss the merged - /// token. More input tokens might be needed; if so, leaves `input` alone and - /// returns `None`. In the latter case, the caller should add more tokens to the - /// input ([Token::End] or [Token::Punct(Punct::EndCmd)] is always sufficient). - /// - /// This performs two different kinds of token merging: - /// - /// - String concatenation, where syntax like `"a" + "b"` is converted into a - /// single string token. This is definitely needed because the parser relies - /// on it. - /// - /// - Negative number merging, where syntax like `-5` is converted from a pair - /// of tokens (a dash and a positive number) into a single token (a negative - /// number). This might not be needed anymore because the segmenter - /// directly treats a dash followed by a number, with optional intervening - /// white space, as a negative number. It's only needed if we want - /// intervening comments to be allowed or for part of the negative number - /// token to be produced by macro expansion. - pub fn merge<'a, F>(get_token: F) -> Result, Incomplete> - where - F: Fn(usize) -> Result, Incomplete>, - { - let Some(token) = get_token(0)? else { - return Ok(None); - }; - match token { - Token::Punct(Punct::Dash) => match get_token(1)? { - Some(Token::Number(number)) if number.is_sign_positive() => { - let number = *number; - Ok(Some(MergeResult::Expand { - n: 2, - token: Token::Number(-number), - })) - } - _ => Ok(Some(MergeResult::Copy)), - }, - Token::String(_) => { - let mut i = 0; - while matches!(get_token(i * 2 + 1)?, Some(Token::Punct(Punct::Plus))) - && matches!(get_token(i * 2 + 2)?, Some(Token::String(_))) - { - i += 1; - } - if i == 0 { - Ok(Some(MergeResult::Copy)) - } else { - let mut output = String::new(); - for i in 0..=i { - let Token::String(s) = get_token(i * 2).unwrap().unwrap() else { - unreachable!() - }; - output.push_str(s); - } - Ok(Some(MergeResult::Expand { - n: i * 2 + 1, - token: Token::String(output), - })) +/// Attempts to merge a sequence of tokens together into a single token. +/// +/// The tokens are taken from the beginning of `input`, which given +/// 0-based token index returns: +/// +/// * `Ok(Some(token))`: The token with the given index. +/// +/// * `Ok(None)`: End of input. +/// +/// * `Err(Incomplete)`: The given token isn't available yet (it may or may not +/// exist). +/// +/// This function returns one of: +/// +/// * `Ok(Some(MergeAction))`: How to transform one or more input tokens into an +/// output token. +/// +/// * `Ok(None)`: End of input. (Only returned if `input(0)` is `Ok(None)`.) +/// +/// * `Err(Incomplete)`: More input tokens are needed. Call again with longer +/// `input`. ([Token::End] or [Token::Punct(Punct::EndCmd)] is +/// always sufficient as extra input.) +/// +/// This performs two different kinds of token merging: +/// +/// - String concatenation, where syntax like `"a" + "b"` is converted into a +/// single string token. This is definitely needed because the parser relies +/// on it. +/// +/// - Negative number merging, where syntax like `-5` is converted from a pair +/// of tokens (a dash and a positive number) into a single token (a negative +/// number). This might not be needed anymore because the segmenter +/// directly treats a dash followed by a number, with optional intervening +/// white space, as a negative number. It's only needed if we want +/// intervening comments to be allowed or for part of the negative number +/// token to be produced by macro expansion. +pub fn merge_tokens<'a, F>(input: F) -> Result, Incomplete> +where + F: Fn(usize) -> Result, Incomplete>, +{ + let Some(token) = input(0)? else { + return Ok(None); + }; + match token { + Token::Punct(Punct::Dash) => match input(1)? { + Some(Token::Number(number)) if number.is_sign_positive() => { + let number = *number; + Ok(Some(MergeAction::Expand { + n: 2, + token: Token::Number(-number), + })) + } + _ => Ok(Some(MergeAction::Copy)), + }, + Token::String(_) => { + let mut i = 0; + while matches!(input(i * 2 + 1)?, Some(Token::Punct(Punct::Plus))) + && matches!(input(i * 2 + 2)?, Some(Token::String(_))) + { + i += 1; + } + if i == 0 { + Ok(Some(MergeAction::Copy)) + } else { + let mut output = String::new(); + for i in 0..=i { + let Token::String(s) = input(i * 2).unwrap().unwrap() else { + unreachable!() + }; + output.push_str(s); } + Ok(Some(MergeAction::Expand { + n: i * 2 + 1, + token: Token::String(output), + })) } - _ => Ok(Some(MergeResult::Copy)), } + _ => Ok(Some(MergeAction::Copy)), } } +/// Too-simple lexical analyzer for strings. +/// +/// Given a string, [StringSegmenter] provides iteration over raw tokens. +/// Unlike [StringScanner], [StringSegmenter] does not merge tokens using +/// [merge_tokens]. Usually merging is desirable, so [StringScanner] should be +/// preferred. +/// +/// This is used as part of macro expansion. pub struct StringSegmenter<'a> { input: &'a str, segmenter: Segmenter, } impl<'a> StringSegmenter<'a> { + /// Creates a new [StringSegmenter] for `input` using syntax variant `mode`. + /// See [Segmenter::new] for an explanation of `is_snippet`. pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self { Self { input, @@ -353,7 +385,7 @@ impl<'a> StringSegmenter<'a> { } impl<'a> Iterator for StringSegmenter<'a> { - type Item = (&'a str, ScanToken); + type Item = (&'a str, Result); fn next(&mut self) -> Option { loop { @@ -361,13 +393,16 @@ impl<'a> Iterator for StringSegmenter<'a> { let (s, rest) = self.input.split_at(seg_len); self.input = rest; - if let Some(token) = ScanToken::from_segment(s, seg_type) { + if let Some(token) = seg_type.to_token(s) { return Some((s, token)); } } } } +/// Simple lexical analyzer for strings. +/// +/// Given a string, [StringScanner] provides iteration over tokens. pub struct StringScanner<'a> { input: &'a str, eof: bool, @@ -376,6 +411,8 @@ pub struct StringScanner<'a> { } impl<'a> StringScanner<'a> { + /// Creates a new [StringScanner] for `input` using syntax variant `mode`. + /// See [Segmenter::new] for an explanation of `is_snippet`. pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self { Self { input, @@ -385,8 +422,8 @@ impl<'a> StringScanner<'a> { } } - fn merge(&mut self, eof: bool) -> Result, Incomplete> { - match ScanToken::merge(|index| { + fn merge(&mut self, eof: bool) -> Result>, Incomplete> { + match merge_tokens(|index| { if let Some(token) = self.tokens.get(index) { Ok(Some(token)) } else if eof { @@ -395,22 +432,24 @@ impl<'a> StringScanner<'a> { Err(Incomplete) } })? { - Some(MergeResult::Copy) => Ok(Some(ScanToken::Token(self.tokens.pop_front().unwrap()))), - Some(MergeResult::Expand { n, token }) => { + Some(MergeAction::Copy) => Ok(Some(Ok(self.tokens.pop_front().unwrap()))), + Some(MergeAction::Expand { n, token }) => { self.tokens.drain(..n); - Ok(Some(ScanToken::Token(token))) + Ok(Some(Ok(token))) } None => Ok(None), } } + /// Transforms this [StringScanner] into an iterator that includes only the + /// [Token]s, omitting [ScanError]s. pub fn unwrapped(self) -> impl Iterator + use<'a> { - self.map(|scan_token| scan_token.token().unwrap()) + self.map(|scan_token| scan_token.ok().unwrap()) } } impl Iterator for StringScanner<'_> { - type Item = ScanToken; + type Item = Result; fn next(&mut self) -> Option { loop { @@ -424,15 +463,15 @@ impl Iterator for StringScanner<'_> { }; let (s, rest) = self.input.split_at(seg_len); - match ScanToken::from_segment(s, seg_type) { - Some(ScanToken::Error(error)) => { + match seg_type.to_token(s) { + Some(Err(error)) => { if let Ok(Some(token)) = self.merge(true) { return Some(token); } self.input = rest; - return Some(ScanToken::Error(error)); + return Some(Err(error)); } - Some(ScanToken::Token(token)) => { + Some(Ok(token)) => { self.tokens.push_back(token); } None => (), diff --git a/rust/pspp/src/lex/scan/test.rs b/rust/pspp/src/lex/scan/test.rs index 48dafb2121..6f0e582cda 100644 --- a/rust/pspp/src/lex/scan/test.rs +++ b/rust/pspp/src/lex/scan/test.rs @@ -22,7 +22,7 @@ use crate::{ }, }; -use super::{ScanError, ScanToken, StringScanner}; +use super::{ScanError, StringScanner}; fn print_token(token: &Token) { match token { @@ -35,18 +35,18 @@ fn print_token(token: &Token) { } #[track_caller] -fn check_scan(input: &str, mode: Syntax, expected: &[ScanToken]) { +fn check_scan(input: &str, mode: Syntax, expected: &[Result]) { let tokens = StringScanner::new(input, mode, false).collect::>(); if tokens != expected { for token in &tokens { match token { - ScanToken::Token(token) => { - print!("ScanToken::Token("); + Ok(token) => { + print!("Ok("); print_token(token); print!(")"); } - ScanToken::Error(error) => print!("ScanToken::Error(ScanError::{error:?})"), + Err(error) => print!("Err(ScanError::{error:?})"), } println!(","); } @@ -76,33 +76,33 @@ WXYZ. /* unterminated end of line comment "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("aB").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("i5").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("$x").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("@efg").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("@@.").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("!abcd").unwrap())), - ScanToken::Token(Token::Punct(Punct::BangAsterisk)), - ScanToken::Token(Token::Punct(Punct::BangAsterisk)), - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("#.#").unwrap())), - ScanToken::Token(Token::Punct(Punct::Dot)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Punct(Punct::Underscore)), - ScanToken::Token(Token::Id(Identifier::new("z").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("abcd.").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("abcd").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("QRSTUV").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("QrStUv").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("WXYZ").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Error(ScanError::UnexpectedChar('�')), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Id(Identifier::new("aB").unwrap())), + Ok(Token::Id(Identifier::new("i5").unwrap())), + Ok(Token::Id(Identifier::new("$x").unwrap())), + Ok(Token::Id(Identifier::new("@efg").unwrap())), + Ok(Token::Id(Identifier::new("@@.").unwrap())), + Ok(Token::Id(Identifier::new("!abcd").unwrap())), + Ok(Token::Punct(Punct::BangAsterisk)), + Ok(Token::Punct(Punct::BangAsterisk)), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Id(Identifier::new("#.#").unwrap())), + Ok(Token::Punct(Punct::Dot)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Punct(Punct::Underscore)), + Ok(Token::Id(Identifier::new("z").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("abcd.").unwrap())), + Ok(Token::Id(Identifier::new("abcd").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("QRSTUV").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("QrStUv").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("WXYZ").unwrap())), + Ok(Token::End), + Err(ScanError::UnexpectedChar('�')), + Ok(Token::End), ], ); } @@ -117,48 +117,48 @@ and. with. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Punct(Punct::And)), - ScanToken::Token(Token::Punct(Punct::Or)), - ScanToken::Token(Token::Punct(Punct::Not)), - ScanToken::Token(Token::Punct(Punct::Eq)), - ScanToken::Token(Token::Punct(Punct::Ge)), - ScanToken::Token(Token::Punct(Punct::Gt)), - ScanToken::Token(Token::Punct(Punct::Le)), - ScanToken::Token(Token::Punct(Punct::Lt)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::All)), - ScanToken::Token(Token::Punct(Punct::By)), - ScanToken::Token(Token::Punct(Punct::To)), - ScanToken::Token(Token::Punct(Punct::With)), - ScanToken::Token(Token::Punct(Punct::And)), - ScanToken::Token(Token::Punct(Punct::Or)), - ScanToken::Token(Token::Punct(Punct::Not)), - ScanToken::Token(Token::Punct(Punct::Eq)), - ScanToken::Token(Token::Punct(Punct::Ge)), - ScanToken::Token(Token::Punct(Punct::Gt)), - ScanToken::Token(Token::Punct(Punct::Le)), - ScanToken::Token(Token::Punct(Punct::Lt)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::All)), - ScanToken::Token(Token::Punct(Punct::By)), - ScanToken::Token(Token::Punct(Punct::To)), - ScanToken::Token(Token::Punct(Punct::With)), - ScanToken::Token(Token::Id(Identifier::new("andx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("orx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("notx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("eqx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("gex").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("gtx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("lex").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("ltx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("nex").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("allx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("byx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("tox").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("withx").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("and.").unwrap())), - ScanToken::Token(Token::Punct(Punct::With)), - ScanToken::Token(Token::End), + Ok(Token::Punct(Punct::And)), + Ok(Token::Punct(Punct::Or)), + Ok(Token::Punct(Punct::Not)), + Ok(Token::Punct(Punct::Eq)), + Ok(Token::Punct(Punct::Ge)), + Ok(Token::Punct(Punct::Gt)), + Ok(Token::Punct(Punct::Le)), + Ok(Token::Punct(Punct::Lt)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::All)), + Ok(Token::Punct(Punct::By)), + Ok(Token::Punct(Punct::To)), + Ok(Token::Punct(Punct::With)), + Ok(Token::Punct(Punct::And)), + Ok(Token::Punct(Punct::Or)), + Ok(Token::Punct(Punct::Not)), + Ok(Token::Punct(Punct::Eq)), + Ok(Token::Punct(Punct::Ge)), + Ok(Token::Punct(Punct::Gt)), + Ok(Token::Punct(Punct::Le)), + Ok(Token::Punct(Punct::Lt)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::All)), + Ok(Token::Punct(Punct::By)), + Ok(Token::Punct(Punct::To)), + Ok(Token::Punct(Punct::With)), + Ok(Token::Id(Identifier::new("andx").unwrap())), + Ok(Token::Id(Identifier::new("orx").unwrap())), + Ok(Token::Id(Identifier::new("notx").unwrap())), + Ok(Token::Id(Identifier::new("eqx").unwrap())), + Ok(Token::Id(Identifier::new("gex").unwrap())), + Ok(Token::Id(Identifier::new("gtx").unwrap())), + Ok(Token::Id(Identifier::new("lex").unwrap())), + Ok(Token::Id(Identifier::new("ltx").unwrap())), + Ok(Token::Id(Identifier::new("nex").unwrap())), + Ok(Token::Id(Identifier::new("allx").unwrap())), + Ok(Token::Id(Identifier::new("byx").unwrap())), + Ok(Token::Id(Identifier::new("tox").unwrap())), + Ok(Token::Id(Identifier::new("withx").unwrap())), + Ok(Token::Id(Identifier::new("and.").unwrap())), + Ok(Token::Punct(Punct::With)), + Ok(Token::End), ], ); } @@ -172,55 +172,55 @@ fn test_punctuation() { "#, Syntax::Auto, &[ - ScanToken::Token(Token::Punct(Punct::Not)), - ScanToken::Token(Token::Punct(Punct::And)), - ScanToken::Token(Token::Punct(Punct::Or)), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Punct(Punct::Ge)), - ScanToken::Token(Token::Punct(Punct::Gt)), - ScanToken::Token(Token::Punct(Punct::Le)), - ScanToken::Token(Token::Punct(Punct::Lt)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Punct(Punct::Dash)), - ScanToken::Token(Token::Punct(Punct::Plus)), - ScanToken::Token(Token::Punct(Punct::Asterisk)), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Punct(Punct::LSquare)), - ScanToken::Token(Token::Punct(Punct::RSquare)), - ScanToken::Token(Token::Punct(Punct::Exp)), - ScanToken::Token(Token::Punct(Punct::Not)), - ScanToken::Token(Token::Punct(Punct::And)), - ScanToken::Token(Token::Punct(Punct::Or)), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Punct(Punct::Ge)), - ScanToken::Token(Token::Punct(Punct::Gt)), - ScanToken::Token(Token::Punct(Punct::Le)), - ScanToken::Token(Token::Punct(Punct::Lt)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::Ne)), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Punct(Punct::Dash)), - ScanToken::Token(Token::Punct(Punct::Plus)), - ScanToken::Token(Token::Punct(Punct::Asterisk)), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Punct(Punct::LSquare)), - ScanToken::Token(Token::Punct(Punct::RSquare)), - ScanToken::Token(Token::Punct(Punct::Exp)), - ScanToken::Token(Token::Punct(Punct::Percent)), - ScanToken::Token(Token::Punct(Punct::Colon)), - ScanToken::Token(Token::Punct(Punct::Semicolon)), - ScanToken::Token(Token::Punct(Punct::Question)), - ScanToken::Token(Token::Punct(Punct::Underscore)), - ScanToken::Token(Token::Punct(Punct::Backtick)), - ScanToken::Token(Token::Punct(Punct::LCurly)), - ScanToken::Token(Token::Punct(Punct::RCurly)), - ScanToken::Token(Token::Punct(Punct::Not)), + Ok(Token::Punct(Punct::Not)), + Ok(Token::Punct(Punct::And)), + Ok(Token::Punct(Punct::Or)), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Punct(Punct::Ge)), + Ok(Token::Punct(Punct::Gt)), + Ok(Token::Punct(Punct::Le)), + Ok(Token::Punct(Punct::Lt)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Punct(Punct::Dash)), + Ok(Token::Punct(Punct::Plus)), + Ok(Token::Punct(Punct::Asterisk)), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Punct(Punct::LSquare)), + Ok(Token::Punct(Punct::RSquare)), + Ok(Token::Punct(Punct::Exp)), + Ok(Token::Punct(Punct::Not)), + Ok(Token::Punct(Punct::And)), + Ok(Token::Punct(Punct::Or)), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Punct(Punct::Ge)), + Ok(Token::Punct(Punct::Gt)), + Ok(Token::Punct(Punct::Le)), + Ok(Token::Punct(Punct::Lt)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::Ne)), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Punct(Punct::Dash)), + Ok(Token::Punct(Punct::Plus)), + Ok(Token::Punct(Punct::Asterisk)), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Punct(Punct::LSquare)), + Ok(Token::Punct(Punct::RSquare)), + Ok(Token::Punct(Punct::Exp)), + Ok(Token::Punct(Punct::Percent)), + Ok(Token::Punct(Punct::Colon)), + Ok(Token::Punct(Punct::Semicolon)), + Ok(Token::Punct(Punct::Question)), + Ok(Token::Punct(Punct::Underscore)), + Ok(Token::Punct(Punct::Backtick)), + Ok(Token::Punct(Punct::LCurly)), + Ok(Token::Punct(Punct::RCurly)), + Ok(Token::Punct(Punct::Not)), ], ); } @@ -238,40 +238,40 @@ fn test_positive_numbers() { "#, Syntax::Auto, &[ - ScanToken::Token(Token::Number(0.0)), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Number(123.0)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::Number(0.1)), - ScanToken::Token(Token::Number(0.1)), - ScanToken::Token(Token::Number(0.1)), - ScanToken::Token(Token::Number(50.0)), - ScanToken::Token(Token::Number(0.6)), - ScanToken::Token(Token::Number(70.0)), - ScanToken::Token(Token::Number(60.0)), - ScanToken::Token(Token::Number(0.006)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Number(30.0)), - ScanToken::Token(Token::Number(0.04)), - ScanToken::Token(Token::Number(5.0)), - ScanToken::Token(Token::Number(6.0)), - ScanToken::Token(Token::Number(0.0007)), - ScanToken::Token(Token::Number(12.3)), - ScanToken::Token(Token::Number(4.56)), - ScanToken::Token(Token::Number(789.0)), - ScanToken::Token(Token::Number(999.0)), - ScanToken::Token(Token::Number(0.0112)), - ScanToken::Token(Token::End), - ScanToken::Error(ScanError::ExpectedExponent(String::from("1e"))), - ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())), - ScanToken::Error(ScanError::ExpectedExponent(String::from("1e+"))), - ScanToken::Error(ScanError::ExpectedExponent(String::from("1e-"))), + Ok(Token::Number(0.0)), + Ok(Token::Number(1.0)), + Ok(Token::Number(1.0)), + Ok(Token::Number(1.0)), + Ok(Token::Number(1.0)), + Ok(Token::End), + Ok(Token::Number(123.0)), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Number(1.0)), + Ok(Token::Number(0.1)), + Ok(Token::Number(0.1)), + Ok(Token::Number(0.1)), + Ok(Token::Number(50.0)), + Ok(Token::Number(0.6)), + Ok(Token::Number(70.0)), + Ok(Token::Number(60.0)), + Ok(Token::Number(0.006)), + Ok(Token::End), + Ok(Token::Number(30.0)), + Ok(Token::Number(0.04)), + Ok(Token::Number(5.0)), + Ok(Token::Number(6.0)), + Ok(Token::Number(0.0007)), + Ok(Token::Number(12.3)), + Ok(Token::Number(4.56)), + Ok(Token::Number(789.0)), + Ok(Token::Number(999.0)), + Ok(Token::Number(0.0112)), + Ok(Token::End), + Err(ScanError::ExpectedExponent(String::from("1e"))), + Ok(Token::Id(Identifier::new("e1").unwrap())), + Err(ScanError::ExpectedExponent(String::from("1e+"))), + Err(ScanError::ExpectedExponent(String::from("1e-"))), ], ); } @@ -290,43 +290,43 @@ fn test_negative_numbers() { "#, Syntax::Auto, &[ - ScanToken::Token(Token::Number(-0.0)), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Number(-123.0)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Number(-0.1)), - ScanToken::Token(Token::Number(-0.1)), - ScanToken::Token(Token::Number(-0.1)), - ScanToken::Token(Token::Number(-0.1)), - ScanToken::Token(Token::Number(-50.0)), - ScanToken::Token(Token::Number(-0.6)), - ScanToken::Token(Token::Number(-70.0)), - ScanToken::Token(Token::Number(-60.0)), - ScanToken::Token(Token::Number(-0.006)), - ScanToken::Token(Token::Number(-3.0)), - ScanToken::Token(Token::Number(-0.04)), - ScanToken::Token(Token::Number(-5.0)), - ScanToken::Token(Token::Number(-6.0)), - ScanToken::Token(Token::Number(-0.0007)), - ScanToken::Token(Token::Number(-12.3)), - ScanToken::Token(Token::Number(-4.56)), - ScanToken::Token(Token::Number(-789.0)), - ScanToken::Token(Token::Number(-999.0)), - ScanToken::Token(Token::Number(-0.0112)), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::Punct(Punct::Dash)), - ScanToken::Token(Token::Punct(Punct::Dot)), - ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e"))), - ScanToken::Token(Token::Punct(Punct::Dash)), - ScanToken::Token(Token::Id(Identifier::new("e1").unwrap())), - ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e+"))), - ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e-"))), - ScanToken::Token(Token::Number(-1.0)), - ScanToken::Token(Token::End), + Ok(Token::Number(-0.0)), + Ok(Token::Number(-1.0)), + Ok(Token::Number(-1.0)), + Ok(Token::Number(-1.0)), + Ok(Token::Number(-1.0)), + Ok(Token::End), + Ok(Token::Number(-123.0)), + Ok(Token::End), + Ok(Token::Number(-0.1)), + Ok(Token::Number(-0.1)), + Ok(Token::Number(-0.1)), + Ok(Token::Number(-0.1)), + Ok(Token::Number(-50.0)), + Ok(Token::Number(-0.6)), + Ok(Token::Number(-70.0)), + Ok(Token::Number(-60.0)), + Ok(Token::Number(-0.006)), + Ok(Token::Number(-3.0)), + Ok(Token::Number(-0.04)), + Ok(Token::Number(-5.0)), + Ok(Token::Number(-6.0)), + Ok(Token::Number(-0.0007)), + Ok(Token::Number(-12.3)), + Ok(Token::Number(-4.56)), + Ok(Token::Number(-789.0)), + Ok(Token::Number(-999.0)), + Ok(Token::Number(-0.0112)), + Ok(Token::Number(-1.0)), + Ok(Token::Punct(Punct::Dash)), + Ok(Token::Punct(Punct::Dot)), + Err(ScanError::ExpectedExponent(String::from("-1e"))), + Ok(Token::Punct(Punct::Dash)), + Ok(Token::Id(Identifier::new("e1").unwrap())), + Err(ScanError::ExpectedExponent(String::from("-1e+"))), + Err(ScanError::ExpectedExponent(String::from("-1e-"))), + Ok(Token::Number(-1.0)), + Ok(Token::End), ], ); } @@ -366,34 +366,34 @@ x"4142" "#, Syntax::Auto, &[ - ScanToken::Token(Token::String(String::from("x"))), - ScanToken::Token(Token::String(String::from("y"))), - ScanToken::Token(Token::String(String::from("abc"))), - ScanToken::Token(Token::String(String::from("Don't"))), - ScanToken::Token(Token::String(String::from("Can't"))), - ScanToken::Token(Token::String(String::from("Won't"))), - ScanToken::Token(Token::String(String::from("\"quoted\""))), - ScanToken::Token(Token::String(String::from("\"quoted\""))), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::String(String::from("'"))), - ScanToken::Token(Token::String(String::from("\""))), - ScanToken::Error(ScanError::ExpectedQuote), - ScanToken::Error(ScanError::ExpectedQuote), - ScanToken::Token(Token::String(String::from("xyzabcde"))), - ScanToken::Token(Token::String(String::from("foobar"))), - ScanToken::Token(Token::String(String::from("foobar"))), - ScanToken::Token(Token::String(String::from("foo"))), - ScanToken::Token(Token::Punct(Punct::Plus)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::String(String::from("bar"))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Punct(Punct::Plus)), - ScanToken::Token(Token::String(String::from("AB5152"))), - ScanToken::Token(Token::String(String::from("4142QR"))), - ScanToken::Token(Token::String(String::from("ABお"))), - ScanToken::Token(Token::String(String::from("�あいうえお"))), - ScanToken::Token(Token::String(String::from("abc�えxyz"))), + Ok(Token::String(String::from("x"))), + Ok(Token::String(String::from("y"))), + Ok(Token::String(String::from("abc"))), + Ok(Token::String(String::from("Don't"))), + Ok(Token::String(String::from("Can't"))), + Ok(Token::String(String::from("Won't"))), + Ok(Token::String(String::from("\"quoted\""))), + Ok(Token::String(String::from("\"quoted\""))), + Ok(Token::String(String::from(""))), + Ok(Token::String(String::from(""))), + Ok(Token::String(String::from("'"))), + Ok(Token::String(String::from("\""))), + Err(ScanError::ExpectedQuote), + Err(ScanError::ExpectedQuote), + Ok(Token::String(String::from("xyzabcde"))), + Ok(Token::String(String::from("foobar"))), + Ok(Token::String(String::from("foobar"))), + Ok(Token::String(String::from("foo"))), + Ok(Token::Punct(Punct::Plus)), + Ok(Token::End), + Ok(Token::String(String::from("bar"))), + Ok(Token::End), + Ok(Token::Punct(Punct::Plus)), + Ok(Token::String(String::from("AB5152"))), + Ok(Token::String(String::from("4142QR"))), + Ok(Token::String(String::from("ABお"))), + Ok(Token::String(String::from("�あいうえお"))), + Ok(Token::String(String::from("abc�えxyz"))), ], ); } @@ -406,14 +406,14 @@ fn test_shbang() { "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("#").unwrap())), - ScanToken::Token(Token::Punct(Punct::Bang)), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("usr").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("bin").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("pspp").unwrap())), + Ok(Token::Id(Identifier::new("#").unwrap())), + Ok(Token::Punct(Punct::Bang)), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("usr").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("bin").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("pspp").unwrap())), ], ); } @@ -440,27 +440,27 @@ next command. "#, Syntax::Auto, &[ - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("com").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("is").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("ambiguous").unwrap())), - ScanToken::Token(Token::Punct(Punct::With)), - ScanToken::Token(Token::Id(Identifier::new("COMPUTE").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("next").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Id(Identifier::new("com").unwrap())), + Ok(Token::Id(Identifier::new("is").unwrap())), + Ok(Token::Id(Identifier::new("ambiguous").unwrap())), + Ok(Token::Punct(Punct::With)), + Ok(Token::Id(Identifier::new("COMPUTE").unwrap())), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Id(Identifier::new("next").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), + Ok(Token::End), ], ); } @@ -481,25 +481,25 @@ second paragraph. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())), - ScanToken::Token(Token::String(String::from("DOCUMENT one line."))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())), - ScanToken::Token(Token::String(String::from("DOC more"))), - ScanToken::Token(Token::String(String::from(" than"))), - ScanToken::Token(Token::String(String::from(" one"))), - ScanToken::Token(Token::String(String::from(" line."))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())), - ScanToken::Token(Token::String(String::from("docu"))), - ScanToken::Token(Token::String(String::from("first.paragraph"))), - ScanToken::Token(Token::String(String::from("isn't parsed as tokens"))), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::String(String::from("second paragraph."))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("DOCUMENT").unwrap())), + Ok(Token::String(String::from("DOCUMENT one line."))), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Id(Identifier::new("DOCUMENT").unwrap())), + Ok(Token::String(String::from("DOC more"))), + Ok(Token::String(String::from(" than"))), + Ok(Token::String(String::from(" one"))), + Ok(Token::String(String::from(" line."))), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Id(Identifier::new("DOCUMENT").unwrap())), + Ok(Token::String(String::from("docu"))), + Ok(Token::String(String::from("first.paragraph"))), + Ok(Token::String(String::from("isn't parsed as tokens"))), + Ok(Token::String(String::from(""))), + Ok(Token::String(String::from("second paragraph."))), + Ok(Token::End), + Ok(Token::End), ], ); } @@ -516,18 +516,18 @@ FILE /* "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("FIL").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("label").unwrap())), - ScanToken::Token(Token::String(String::from("isn't quoted"))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("FILE").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())), - ScanToken::Token(Token::String(String::from("is quoted"))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("FILE").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("lab").unwrap())), - ScanToken::Token(Token::String(String::from("not quoted here either"))), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("FIL").unwrap())), + Ok(Token::Id(Identifier::new("label").unwrap())), + Ok(Token::String(String::from("isn't quoted"))), + Ok(Token::End), + Ok(Token::Id(Identifier::new("FILE").unwrap())), + Ok(Token::Id(Identifier::new("lab").unwrap())), + Ok(Token::String(String::from("is quoted"))), + Ok(Token::End), + Ok(Token::Id(Identifier::new("FILE").unwrap())), + Ok(Token::Id(Identifier::new("lab").unwrap())), + Ok(Token::String(String::from("not quoted here either"))), + Ok(Token::End), ], ); } @@ -549,23 +549,23 @@ end data "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("begin").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::String(String::from("123"))), - ScanToken::Token(Token::String(String::from("xxx"))), - ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("BEG").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("DAT").unwrap())), - ScanToken::Token(Token::String(String::from("5 6 7 /* x"))), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::String(String::from("end data"))), - ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("begin").unwrap())), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::End), + Ok(Token::String(String::from("123"))), + Ok(Token::String(String::from("xxx"))), + Ok(Token::Id(Identifier::new("end").unwrap())), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::End), + Ok(Token::End), + Ok(Token::Id(Identifier::new("BEG").unwrap())), + Ok(Token::Id(Identifier::new("DAT").unwrap())), + Ok(Token::String(String::from("5 6 7 /* x"))), + Ok(Token::String(String::from(""))), + Ok(Token::String(String::from("end data"))), + Ok(Token::Id(Identifier::new("end").unwrap())), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::End), ], ); } @@ -585,29 +585,29 @@ end "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("do").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("b").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("c").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("y").unwrap())), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Id(Identifier::new("d").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("e").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("f").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::String(String::from(" do repeat a=1 thru 5."))), - ScanToken::Token(Token::String(String::from("another command."))), - ScanToken::Token(Token::String(String::from("second command"))), - ScanToken::Token(Token::String(String::from("+ third command."))), - ScanToken::Token(Token::String(String::from( + Ok(Token::Id(Identifier::new("do").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Id(Identifier::new("b").unwrap())), + Ok(Token::Id(Identifier::new("c").unwrap())), + Ok(Token::Id(Identifier::new("y").unwrap())), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Id(Identifier::new("d").unwrap())), + Ok(Token::Id(Identifier::new("e").unwrap())), + Ok(Token::Id(Identifier::new("f").unwrap())), + Ok(Token::End), + Ok(Token::String(String::from(" do repeat a=1 thru 5."))), + Ok(Token::String(String::from("another command."))), + Ok(Token::String(String::from("second command"))), + Ok(Token::String(String::from("+ third command."))), + Ok(Token::String(String::from( "end /* x */ /* y */ repeat print.", ))), - ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("end").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::End), ], ); } @@ -632,38 +632,38 @@ end repeat "#, Syntax::Batch, &[ - ScanToken::Token(Token::Id(Identifier::new("do").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("b").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("c").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("y").unwrap())), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Id(Identifier::new("d").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("e").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("f").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::String(String::from("do repeat a=1 thru 5"))), - ScanToken::Token(Token::String(String::from("another command"))), - ScanToken::Token(Token::String(String::from("second command"))), - ScanToken::Token(Token::String(String::from("+ third command"))), - ScanToken::Token(Token::String(String::from( + Ok(Token::Id(Identifier::new("do").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Id(Identifier::new("b").unwrap())), + Ok(Token::Id(Identifier::new("c").unwrap())), + Ok(Token::Id(Identifier::new("y").unwrap())), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Id(Identifier::new("d").unwrap())), + Ok(Token::Id(Identifier::new("e").unwrap())), + Ok(Token::Id(Identifier::new("f").unwrap())), + Ok(Token::End), + Ok(Token::String(String::from("do repeat a=1 thru 5"))), + Ok(Token::String(String::from("another command"))), + Ok(Token::String(String::from("second command"))), + Ok(Token::String(String::from("+ third command"))), + Ok(Token::String(String::from( "end /* x */ /* y */ repeat print", ))), - ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("do").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("#a").unwrap())), - ScanToken::Token(Token::Punct(Punct::Equals)), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::String(String::from(" inner command"))), - ScanToken::Token(Token::Id(Identifier::new("end").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::Id(Identifier::new("end").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("do").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), + Ok(Token::Id(Identifier::new("#a").unwrap())), + Ok(Token::Punct(Punct::Equals)), + Ok(Token::Number(1.0)), + Ok(Token::End), + Ok(Token::String(String::from(" inner command"))), + Ok(Token::Id(Identifier::new("end").unwrap())), + Ok(Token::Id(Identifier::new("repeat").unwrap())), ], ); } @@ -681,26 +681,26 @@ fourth command. "#, Syntax::Batch, &[ - ScanToken::Token(Token::Id(Identifier::new("first").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("another").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("line").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("of").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("first").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("second").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("third").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("fourth").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("fifth").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("command").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("first").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::Id(Identifier::new("another").unwrap())), + Ok(Token::Id(Identifier::new("line").unwrap())), + Ok(Token::Id(Identifier::new("of").unwrap())), + Ok(Token::Id(Identifier::new("first").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("second").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("third").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("fourth").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("fifth").unwrap())), + Ok(Token::Id(Identifier::new("command").unwrap())), + Ok(Token::End), ], ); } @@ -709,7 +709,6 @@ mod define { use crate::{ identifier::Identifier, lex::{ - scan::ScanToken, segment::Syntax, token::{Punct, Token}, }, @@ -726,13 +725,13 @@ var1 var2 var3 "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from("var1 var2 var3"))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from("var1 var2 var3"))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -745,13 +744,13 @@ var1 var2 var3 "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from(" var1 var2 var3"))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from(" var1 var2 var3"))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -764,13 +763,13 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from("var1 var2 var3"))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from("var1 var2 var3"))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -782,13 +781,13 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from("var1 var2 var3"))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from("var1 var2 var3"))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -801,12 +800,12 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -821,14 +820,14 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::String(String::from(""))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from(""))), + Ok(Token::String(String::from(""))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -841,23 +840,23 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("b").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("c").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("b").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("c").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -874,23 +873,23 @@ var1 var2 var3!enddefine. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Id(Identifier::new("a").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("b").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("c").unwrap())), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Id(Identifier::new("a").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("b").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("c").unwrap())), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -907,19 +906,19 @@ content 2 "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("y").unwrap())), - ScanToken::Token(Token::Punct(Punct::Comma)), - ScanToken::Token(Token::Id(Identifier::new("z").unwrap())), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from("content 1"))), - ScanToken::Token(Token::String(String::from("content 2"))), - ScanToken::Token(Token::Id(Identifier::new("!enddefine").unwrap())), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("y").unwrap())), + Ok(Token::Punct(Punct::Comma)), + Ok(Token::Id(Identifier::new("z").unwrap())), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from("content 1"))), + Ok(Token::String(String::from("content 2"))), + Ok(Token::Id(Identifier::new("!enddefine").unwrap())), + Ok(Token::End), ], ); } @@ -932,15 +931,15 @@ data list /x 1. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("list").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::End), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::Id(Identifier::new("list").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Number(1.0)), + Ok(Token::End), ], ); } @@ -954,16 +953,16 @@ data list /x 1. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("list").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::Id(Identifier::new("list").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Number(1.0)), + Ok(Token::End), ], ); } @@ -977,18 +976,18 @@ data list /x 1. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("list").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::End), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::End), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::Id(Identifier::new("list").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Number(1.0)), + Ok(Token::End), ], ); } @@ -1003,15 +1002,15 @@ data list /x 1. "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::End), - ScanToken::Token(Token::Id(Identifier::new("data").unwrap())), - ScanToken::Token(Token::Id(Identifier::new("list").unwrap())), - ScanToken::Token(Token::Punct(Punct::Slash)), - ScanToken::Token(Token::Id(Identifier::new("x").unwrap())), - ScanToken::Token(Token::Number(1.0)), - ScanToken::Token(Token::End), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::End), + Ok(Token::Id(Identifier::new("data").unwrap())), + Ok(Token::Id(Identifier::new("list").unwrap())), + Ok(Token::Punct(Punct::Slash)), + Ok(Token::Id(Identifier::new("x").unwrap())), + Ok(Token::Number(1.0)), + Ok(Token::End), ], ); } @@ -1025,12 +1024,12 @@ content line 2 "#, Syntax::Auto, &[ - ScanToken::Token(Token::Id(Identifier::new("define").unwrap())), - ScanToken::Token(Token::String(String::from("!macro1"))), - ScanToken::Token(Token::Punct(Punct::LParen)), - ScanToken::Token(Token::Punct(Punct::RParen)), - ScanToken::Token(Token::String(String::from("content line 1"))), - ScanToken::Token(Token::String(String::from("content line 2"))), + Ok(Token::Id(Identifier::new("define").unwrap())), + Ok(Token::String(String::from("!macro1"))), + Ok(Token::Punct(Punct::LParen)), + Ok(Token::Punct(Punct::RParen)), + Ok(Token::String(String::from("content line 1"))), + Ok(Token::String(String::from("content line 2"))), ], ); } diff --git a/rust/pspp/src/lex/segment/mod.rs b/rust/pspp/src/lex/segment/mod.rs index 21ab7a96d4..9cb1cc410d 100644 --- a/rust/pspp/src/lex/segment/mod.rs +++ b/rust/pspp/src/lex/segment/mod.rs @@ -34,11 +34,14 @@ //! form a single string token [Token::String]. Still other segments are //! ignored (e.g. [Segment::Spaces]) or trigger special behavior such as error //! messages later in tokenization (e.g. [Segment::ExpectedQuote]). +//! +//! [Token::Id]: crate::lex::token::Token::Id +//! [Token::String]: crate::lex::token::Token::String -use std::cmp::Ordering; +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] -#[cfg(doc)] -use crate::lex::token::Token; +use std::cmp::Ordering; use crate::{ identifier::{id_match, id_match_n, IdentifierChar}, @@ -65,6 +68,8 @@ use super::command_name::{command_match, COMMAND_NAMES}; pub enum Syntax { /// Try to interpret input correctly regardless of whether it is written /// for interactive or batch syntax. + /// + /// This is `Syntax::default()`. #[default] Auto, @@ -76,43 +81,127 @@ pub enum Syntax { } /// The type of a segment. +/// +/// A [Segment] is a label for a string slice and is normally paired with one. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum Segment { + /// A number. Number, + + /// A quoted string (`'...'` or `"..."`).. QuotedString, + + /// A hexadecimal string (`X'...'` or `X"..."`). HexString, + + /// A Unicode string (`U'...'` or `U"..."`). UnicodeString, + + /// An unquoted string. + /// + /// Unquoted strings appear only in a few special-case constructs, such as + /// the `FILE LABEL` command. UnquotedString, + + /// An identifier. Identifier, + + /// A punctuator or operator. Punct, + + /// `#!` at the beginning of a syntax file only. Shbang, + + /// Spaces. Spaces, + + /// A comment (`/* ... */`). Comment, + + /// New-line. Newline, + + /// A comment command (`* ...` or `COMMENT ...`). CommentCommand, + + /// In a `DO REPEAT` command, one of the lines to be repeated. DoRepeatCommand, + + /// Indicates `DO REPEAT` nested more deeply than supported. DoRepeatOverflow, + + /// A line of inline data inside `BEGIN DATA`...`END DATA`. InlineData, + + /// In `!DEFINE`, an identifier for the macro being defined. + /// + /// Distinguished from [Identifier](Self::Identifier) because a `MacroName` + /// must never be macro-expanded. MacroName, + + /// Contents of `!DEFINE`...`!ENDDEFINE`. MacroBody, + + /// Represents the `DOCUMENT` beginning a `DOCUMENT` command. + /// + /// This token is not associated with any text: the actual `DOCUMENT` + /// keyword is part of the following [Document](Self::Document) segment. + /// This is because documents include the `DOCUMENT` keyword. StartDocument, + + /// One of the lines of documents in a `DOCUMENT` command. + /// + /// The first line of a document includes the `DOCUMENT` keyword itself. Document, + + /// A command separator. + /// + /// This segment is usually for `+`, `-`, or `.` at the beginning of a line. StartCommand, + + /// A command separator. + /// + /// This segment is usually for a blank line. It also appears at the end of + /// a file. SeparateCommands, + + /// A command separator. + /// + /// This segment is for `.` at the end of a line. EndCommand, + + /// Missing quote at the end of a line. + /// + /// This segment contains a partial quoted string. It starts with a quote + /// mark (`"` or `'`, possibly preceded by `X` or `U`) but goes to the end + /// of the line without the matching end quote mark. ExpectedQuote, + + /// Missing exponent in number. + /// + /// This segment contains a number that ends with `E` or `E+` or `E-` + /// without a following exponent. ExpectedExponent, + + /// Unexpected character. + /// + /// The segment is a single character that isn't valid in syntax. UnexpectedChar, } bitflags! { #[derive(Copy, Clone, Debug)] - pub struct Substate: u8 { + struct Substate: u8 { const START_OF_LINE = 1; const START_OF_COMMAND = 2; } } +/// Used by [Segmenter] to indicate that more input is needed. +#[derive(Copy, Clone, Debug)] +pub struct Incomplete; + +/// Labels syntax input with [Segment]s. #[derive(Copy, Clone)] pub struct Segmenter { state: (State, Substate), @@ -120,9 +209,6 @@ pub struct Segmenter { syntax: Syntax, } -#[derive(Copy, Clone, Debug)] -pub struct Incomplete; - impl Segmenter { /// Returns a segmenter with the given `syntax`. /// @@ -147,6 +233,7 @@ impl Segmenter { } } + /// Returns the [Syntax] variant passed in to [new](Self::new). pub fn syntax(&self) -> Syntax { self.syntax } @@ -162,8 +249,8 @@ impl Segmenter { /// Returns the style of command prompt to display to an interactive user /// for input in the current state.. The return value is most accurate in /// with [Syntax::Interactive] syntax and at the beginning of a line (that - /// is, if [`Segmenter::push`] consumed as much as possible of the input up - /// to a new-line). + /// is, if [Segmenter::push] consumed as much as possible of the input up to + /// a new-line). pub fn prompt(&self) -> PromptStyle { match self.state.0 { State::Shbang => PromptStyle::First, @@ -202,36 +289,6 @@ impl Segmenter { } } - /// Attempts to label a prefix of the remaining input with a segment type. - /// The caller supplies a prefix of the remaining input as `input`. If - /// `eof` is true, then `input` is the entire (remainder) of the input; if - /// `eof` is false, then further input is potentially available. - /// - /// The input may contain '\n' or '\r\n' line ends in any combination. - /// - /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes - /// in the segment at the beginning of `input` (a number in - /// `0..=input.len()`) and the type of that segment. The next call should - /// not include those bytes in `input`, because they have (figuratively) - /// been consumed by the segmenter. - /// - /// Segments can have zero length, including segment types `Type::End`, - /// `Type::SeparateCommands`, `Type::StartDocument`, `Type::InlineData`, and - /// `Type::Spaces`. - /// - /// Failure occurs only if the segment type of the bytes in `input` cannot - /// yet be determined. In this case, this function returns `Err(Incomplete)`. If - /// more input is available, the caller should obtain some more, then call - /// again with a longer `input`. If this is not enough, the process might - /// need to repeat again and again. If input is exhausted, then the caller - /// may call again setting `eof` to true. This function will never return - /// `Err(Incomplete)` when `eof` is true. - /// - /// The caller must not, in a sequence of calls, supply contradictory input. - /// That is, bytes provided as part of `input` in one call, but not - /// consumed, must not be provided with *different* values on subsequent - /// calls. This is because the function must often make decisions based on - /// looking ahead beyond the bytes that it consumes. fn push_rest<'a>( &mut self, input: &'a str, @@ -279,6 +336,36 @@ impl Segmenter { } } + /// Attempts to label a prefix of the remaining input with a segment type. + /// The caller supplies a prefix of the remaining input as `input`. If + /// `eof` is true, then `input` is the entire (remainder) of the input; if + /// `eof` is false, then further input is potentially available. + /// + /// The input may contain `\n` or `\r\n` line ends in any combination. + /// + /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes + /// in the segment at the beginning of `input` (a number in + /// `0..=input.len()`) and the type of that segment. The next call should + /// not include those bytes in `input`, because the segmenter has + /// (figuratively) consumed them. + /// + /// Segments can have zero length, including segment types + /// [Segment::SeparateCommands], [Segment::StartDocument], + /// [Segment::InlineData], and [Segment::Spaces]. + /// + /// Failure occurs only if the segment type of the bytes in `input` cannot + /// yet be determined. In this case, this function returns + /// `Err(Incomplete)`. If more input is available, the caller should obtain + /// some more, then call again with a longer `input`. If this is still not + /// enough, the process might need to repeat again and again. If input is + /// exhausted, then the caller may call again setting `eof` to true. This + /// function will never return `Err(Incomplete)` when `eof` is true. + /// + /// The caller must not, in a sequence of calls, supply contradictory input. + /// That is, bytes provided as part of `input` in one call, but not + /// consumed, must not be provided with *different* values on subsequent + /// calls. This is because the function must often make decisions based on + /// looking ahead beyond the bytes that it consumes. pub fn push(&mut self, input: &str, eof: bool) -> Result, Incomplete> { Ok(self .push_rest(input, eof)? diff --git a/rust/pspp/src/macros.rs b/rust/pspp/src/macros.rs index 110c171b12..e34a247cbc 100644 --- a/rust/pspp/src/macros.rs +++ b/rust/pspp/src/macros.rs @@ -31,7 +31,7 @@ use unicase::UniCase; use crate::{ identifier::Identifier, lex::{ - scan::{ScanError, ScanToken, StringScanner, StringSegmenter}, + scan::{ScanError, StringScanner, StringSegmenter}, segment::Syntax, token::{Punct, Token}, }, @@ -277,11 +277,11 @@ fn tokenize_string_into( ) { for (syntax, token) in StringSegmenter::new(s, mode, true) { match token { - ScanToken::Token(token) => output.push(MacroToken { + Ok(token) => output.push(MacroToken { token, syntax: String::from(syntax), }), - ScanToken::Error(scan_error) => error(MacroError::ScanError(scan_error)), + Err(scan_error) => error(MacroError::ScanError(scan_error)), } } } @@ -298,7 +298,7 @@ fn tokenize_string( fn try_unquote_string(input: &str, mode: Syntax) -> Option { let mut scanner = StringScanner::new(input, mode, true); - let Some(ScanToken::Token(Token::String(unquoted))) = scanner.next() else { + let Some(Ok(Token::String(unquoted))) = scanner.next() else { return None; }; let None = scanner.next() else { return None }; -- 2.30.2