Stop distinguishing macro tokens from other tokens.
authorBen Pfaff <blp@cs.stanford.edu>
Wed, 17 Jul 2024 20:57:10 +0000 (13:57 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Wed, 17 Jul 2024 20:57:10 +0000 (13:57 -0700)
rust/src/identifier.rs
rust/src/lex/scan/mod.rs
rust/src/lex/scan/test.rs
rust/src/lex/segment/mod.rs
rust/src/lex/segment/test.rs
rust/src/lex/token.rs
rust/src/macros.rs

index 3be08083ca43db272e1dca7425302b5e418bca84..bfb4331991199bf5fc71b455038abe99285a680f 100644 (file)
@@ -30,7 +30,7 @@ pub trait IdentifierChar {
 
 impl IdentifierChar for char {
     fn ascii_may_start_id(self) -> bool {
-        matches!(self, 'a'..='z' | 'A'..='Z' | '@' | '#' | '$')
+        matches!(self, 'a'..='z' | 'A'..='Z' | '@' | '#' | '$' | '!')
     }
 
     fn may_start_id(self) -> bool {
@@ -39,13 +39,12 @@ impl IdentifierChar for char {
         } else {
             use MajorCategory::*;
 
-            ([L, M, S].contains(&self.get_major_category()) || "@#$".contains(self))
-                && self != char::REPLACEMENT_CHARACTER
+            [L, M, S].contains(&self.get_major_category()) && self != char::REPLACEMENT_CHARACTER
         }
     }
 
     fn ascii_may_continue_id(self) -> bool {
-        self.ascii_may_start_id() || matches!(self, '0'..='9' | '.' | '_')
+        matches!(self, 'a'..='z' | 'A'..='Z' | '0'..='9' | '@' | '#' | '$' | '.' | '_')
     }
 
     fn may_continue_id(self) -> bool {
@@ -54,8 +53,7 @@ impl IdentifierChar for char {
         } else {
             use MajorCategory::*;
 
-            ([L, M, S, N].contains(&self.get_major_category()) || "@#$._".contains(self))
-                && self != char::REPLACEMENT_CHARACTER
+            [L, M, S, N].contains(&self.get_major_category()) && self != char::REPLACEMENT_CHARACTER
         }
     }
 }
@@ -68,6 +66,9 @@ pub enum Error {
     #[error("\"{0}\" may not be used as an identifier because it is a reserved word.")]
     Reserved(String),
 
+    #[error("\"!\" is not a valid identifier.")]
+    Bang,
+
     #[error("\"{0}\" may not be used as an identifier because it begins with disallowed character \"{1}\".")]
     BadFirstCharacter(String, char),
 
@@ -90,15 +91,58 @@ pub enum Error {
     },
 }
 
-pub fn is_reserved_word(s: &str) -> bool {
-    for word in [
-        "and", "or", "not", "eq", "ge", "gt", "le", "lt", "ne", "all", "by", "to", "with",
-    ] {
-        if s.eq_ignore_ascii_case(word) {
-            return true;
+pub enum ReservedWord {
+    And,
+    Or,
+    Not,
+    Eq,
+    Ge,
+    Gt,
+    Le,
+    Lt,
+    Ne,
+    All,
+    By,
+    To,
+    With,
+}
+
+impl TryFrom<&str> for ReservedWord {
+    type Error = ();
+
+    fn try_from(source: &str) -> Result<Self, Self::Error> {
+        if !(2..=4).contains(&source.len()) {
+            Err(())
+        } else {
+            let b = source.as_bytes();
+            let c0 = b[0].to_ascii_uppercase();
+            let c1 = b[1].to_ascii_uppercase();
+            match (source.len(), c0, c1) {
+                (2, b'B', b'Y') => Ok(Self::By),
+                (2, b'E', b'Q') => Ok(Self::Eq),
+                (2, b'G', b'T') => Ok(Self::Gt),
+                (2, b'G', b'E') => Ok(Self::Ge),
+                (2, b'L', b'T') => Ok(Self::Lt),
+                (2, b'L', b'E') => Ok(Self::Le),
+                (2, b'N', b'E') => Ok(Self::Ne),
+                (3, b'N', b'O') if b[2].to_ascii_uppercase() == b'T' => Ok(Self::Not),
+                (2, b'O', b'R') => Ok(Self::Or),
+                (2, b'T', b'O') => Ok(Self::To),
+                (3, b'A', b'L') if b[2].to_ascii_uppercase() == b'L' => Ok(Self::All),
+                (3, b'A', b'N') if b[2].to_ascii_uppercase() == b'D' => Ok(Self::And),
+                (4, b'W', b'I')
+                    if b[2].to_ascii_uppercase() == b'T' && b[3].to_ascii_uppercase() == b'H' =>
+                {
+                    Ok(Self::With)
+                }
+                _ => Err(()),
+            }
         }
     }
-    false
+}
+
+pub fn is_reserved_word(s: &str) -> bool {
+    ReservedWord::try_from(s).is_ok()
 }
 
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
@@ -160,6 +204,9 @@ impl Identifier {
         if is_reserved_word(s) {
             return Err(Error::Reserved(s.into()));
         }
+        if s == "!" {
+            return Err(Error::Bang);
+        }
 
         let mut i = s.chars();
         let first = i.next().unwrap();
index cbeac4095d414fa01b8acfcfe8b50e5c9d61982b..e099fcd402c69ff62ce15edfcaf9800f2bacdb48 100644 (file)
 //! are the same as the tokens used by the PSPP parser with a few additional
 //! types.
 
+use crate::identifier::ReservedWord;
+
 use super::{
     segment::{Mode, Segment, Segmenter},
-    token::{MacroToken, Punct, Token},
+    token::{Punct, Token},
 };
 use std::collections::VecDeque;
 use thiserror::Error as ThisError;
@@ -150,27 +152,26 @@ impl ScanToken {
             | Segment::MacroBody
             | Segment::MacroName => Some(Self::Token(Token::String(String::from(s)))),
 
-            Segment::ReservedWord => {
-                let c0 = s.as_bytes()[0].to_ascii_uppercase();
-                let c1 = s.as_bytes()[1].to_ascii_uppercase();
-                match (c0, c1) {
-                    (b'B', _) => Some(Self::Token(Token::Punct(Punct::By))),
-                    (b'E', _) => Some(Self::Token(Token::Punct(Punct::Eq))),
-                    (b'G', b'T') => Some(Self::Token(Token::Punct(Punct::Gt))),
-                    (b'G', _) => Some(Self::Token(Token::Punct(Punct::Ge))),
-                    (b'L', b'T') => Some(Self::Token(Token::Punct(Punct::Lt))),
-                    (b'L', _) => Some(Self::Token(Token::Punct(Punct::Le))),
-                    (b'N', b'E') => Some(Self::Token(Token::Punct(Punct::Ne))),
-                    (b'N', _) => Some(Self::Token(Token::Punct(Punct::Not))),
-                    (b'O', _) => Some(Self::Token(Token::Punct(Punct::Or))),
-                    (b'T', _) => Some(Self::Token(Token::Punct(Punct::To))),
-                    (b'A', b'L') => Some(Self::Token(Token::Punct(Punct::All))),
-                    (b'A', _) => Some(Self::Token(Token::Punct(Punct::And))),
-                    (b'W', _) => Some(Self::Token(Token::Punct(Punct::With))),
-                    _ => unreachable!(),
-                }
+            Segment::Identifier => {
+                if let Ok(reserved_word) = ReservedWord::try_from(s) {
+                    match reserved_word {
+                        ReservedWord::And => Some(Self::Token(Token::Punct(Punct::And))),
+                        ReservedWord::Or => Some(Self::Token(Token::Punct(Punct::Or))),
+                        ReservedWord::Not => Some(Self::Token(Token::Punct(Punct::Not))),
+                        ReservedWord::Eq => Some(Self::Token(Token::Punct(Punct::Eq))),
+                        ReservedWord::Ge => Some(Self::Token(Token::Punct(Punct::Ge))),
+                        ReservedWord::Gt => Some(Self::Token(Token::Punct(Punct::Gt))),
+                        ReservedWord::Le => Some(Self::Token(Token::Punct(Punct::Le))),
+                        ReservedWord::Lt => Some(Self::Token(Token::Punct(Punct::Lt))),
+                        ReservedWord::Ne => Some(Self::Token(Token::Punct(Punct::Ne))),
+                        ReservedWord::All => Some(Self::Token(Token::Punct(Punct::All))),
+                        ReservedWord::By => Some(Self::Token(Token::Punct(Punct::By))),
+                        ReservedWord::To => Some(Self::Token(Token::Punct(Punct::To))),
+                        ReservedWord::With => Some(Self::Token(Token::Punct(Punct::With))),
+                    }
+                } else {
+                    Some(Self::Token(Token::Id(String::from(s))))}
             }
-            Segment::Identifier => Some(Self::Token(Token::Id(String::from(s)))),
             Segment::Punct => match s {
                 "(" => Some(Self::Token(Token::Punct(Punct::LParen))),
                 ")" => Some(Self::Token(Token::Punct(Punct::RParen))),
@@ -196,12 +197,13 @@ impl ScanToken {
                 "<>" => Some(Self::Token(Token::Punct(Punct::Ne))),
                 "~=" => Some(Self::Token(Token::Punct(Punct::Ne))),
                 ">=" => Some(Self::Token(Token::Punct(Punct::Ge))),
-                "!" => Some(Self::Token(Token::MacroToken(MacroToken::Bang))),
-                "%" => Some(Self::Token(Token::MacroToken(MacroToken::Percent))),
-                "?" => Some(Self::Token(Token::MacroToken(MacroToken::Question))),
-                "`" => Some(Self::Token(Token::MacroToken(MacroToken::Backtick))),
-                "_" => Some(Self::Token(Token::MacroToken(MacroToken::Underscore))),
-                "." => Some(Self::Token(Token::MacroToken(MacroToken::Dot))),
+                "!" => Some(Self::Token(Token::Punct(Punct::Bang))),
+                "%" => Some(Self::Token(Token::Punct(Punct::Percent))),
+                "?" => Some(Self::Token(Token::Punct(Punct::Question))),
+                "`" => Some(Self::Token(Token::Punct(Punct::Backtick))),
+                "_" =>Some(Self::Token(Token::Punct(Punct::Underscore))),
+                "." =>Some(Self::Token(Token::Punct(Punct::Dot))),
+                "!*" => Some(Self::Token(Token::Punct(Punct::BangAsterisk))),
                 _ => unreachable!("bad punctuator {s:?}"),
             },
             Segment::Shbang
@@ -210,9 +212,6 @@ impl ScanToken {
             | Segment::Newline
             | Segment::CommentCommand => None,
             Segment::DoRepeatOverflow => Some(Self::Error(ScanError::DoRepeatOverflow)),
-            Segment::MacroId => Some(Self::Token(Token::MacroToken(MacroToken::MacroId(
-                String::from(s),
-            )))),
             Segment::StartDocument => Some(Self::Token(Token::Id(String::from("DOCUMENT")))),
             Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
                 Some(Self::Token(Token::EndCommand))
index d009131f63c2741f0b7ec0c974afb66d8aead8e1..6affeac0e495b98fdc610942de1b9c9b6e4bfab4 100644 (file)
@@ -1,6 +1,6 @@
 use crate::lex::{
     segment::Mode,
-    token::{MacroToken, Punct, Token},
+    token::{Punct, Token},
 };
 
 use super::{ScanError, ScanToken, StringLexer};
@@ -13,10 +13,6 @@ fn print_token(token: &Token) {
         Token::String(s) => print!("Token::String(String::from({s:?}))"),
         Token::EndCommand => print!("Token::EndCommand"),
         Token::Punct(punct) => print!("Token::Punct(Punct::{punct:?})"),
-        Token::MacroToken(MacroToken::MacroId(id)) => {
-            print!("Token::MacroToken(MacroToken::MacroId(String::from({id:?})))")
-        }
-        Token::MacroToken(m) => print!("Token::MacroToken(MacroToken::{m:?})"),
     }
 }
 
@@ -67,16 +63,14 @@ WXYZ. /* unterminated end of line comment
             ScanToken::Token(Token::Id(String::from("$x"))),
             ScanToken::Token(Token::Id(String::from("@efg"))),
             ScanToken::Token(Token::Id(String::from("@@."))),
-            ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                "!abcd",
-            )))),
-            ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
-            ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!*")))),
+            ScanToken::Token(Token::Id(String::from("!abcd"))),
+            ScanToken::Token(Token::Punct(Punct::BangAsterisk)),
+            ScanToken::Token(Token::Punct(Punct::BangAsterisk)),
             ScanToken::Token(Token::Id(String::from("a"))),
             ScanToken::Token(Token::Id(String::from("#.#"))),
-            ScanToken::Token(Token::MacroToken(MacroToken::Dot)),
+            ScanToken::Token(Token::Punct(Punct::Dot)),
             ScanToken::Token(Token::Id(String::from("x"))),
-            ScanToken::Token(Token::MacroToken(MacroToken::Underscore)),
+            ScanToken::Token(Token::Punct(Punct::Underscore)),
             ScanToken::Token(Token::Id(String::from("z"))),
             ScanToken::Token(Token::EndCommand),
             ScanToken::Token(Token::Id(String::from("abcd."))),
@@ -199,12 +193,12 @@ fn test_punctuation() {
             ScanToken::Token(Token::Punct(Punct::LSquare)),
             ScanToken::Token(Token::Punct(Punct::RSquare)),
             ScanToken::Token(Token::Punct(Punct::Exp)),
-            ScanToken::Token(Token::MacroToken(MacroToken::Percent)),
+            ScanToken::Token(Token::Punct(Punct::Percent)),
             ScanToken::Token(Token::Punct(Punct::Colon)),
             ScanToken::Token(Token::Punct(Punct::Semicolon)),
-            ScanToken::Token(Token::MacroToken(MacroToken::Question)),
-            ScanToken::Token(Token::MacroToken(MacroToken::Underscore)),
-            ScanToken::Token(Token::MacroToken(MacroToken::Backtick)),
+            ScanToken::Token(Token::Punct(Punct::Question)),
+            ScanToken::Token(Token::Punct(Punct::Underscore)),
+            ScanToken::Token(Token::Punct(Punct::Backtick)),
             ScanToken::Token(Token::Punct(Punct::LCurly)),
             ScanToken::Token(Token::Punct(Punct::RCurly)),
             ScanToken::Token(Token::Punct(Punct::Not)),
@@ -306,7 +300,7 @@ fn test_negative_numbers() {
             ScanToken::Token(Token::Number(-0.0112)),
             ScanToken::Token(Token::Number(-1.0)),
             ScanToken::Token(Token::Punct(Punct::Dash)),
-            ScanToken::Token(Token::MacroToken(MacroToken::Dot)),
+            ScanToken::Token(Token::Punct(Punct::Dot)),
             ScanToken::Error(ScanError::ExpectedExponent(String::from("-1e"))),
             ScanToken::Token(Token::Punct(Punct::Dash)),
             ScanToken::Token(Token::Id(String::from("e1"))),
@@ -395,7 +389,7 @@ fn test_shbang() {
         Mode::Auto,
         &[
             ScanToken::Token(Token::Id(String::from("#"))),
-            ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from("!")))),
+            ScanToken::Token(Token::Punct(Punct::Bang)),
             ScanToken::Token(Token::Punct(Punct::Slash)),
             ScanToken::Token(Token::Id(String::from("usr"))),
             ScanToken::Token(Token::Punct(Punct::Slash)),
@@ -697,7 +691,7 @@ mod define {
     use crate::lex::{
         scan::ScanToken,
         segment::Mode,
-        token::{MacroToken, Punct, Token},
+        token::{Punct, Token},
     };
 
     use super::check_scan;
@@ -716,9 +710,7 @@ var1 var2 var3
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -737,9 +729,7 @@ var1 var2 var3
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from(" var1 var2 var3"))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -758,9 +748,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -778,9 +766,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from("var1 var2 var3"))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -798,9 +784,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::String(String::from("!macro1"))),
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -822,9 +806,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from(""))),
                 ScanToken::Token(Token::String(String::from(""))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -853,9 +835,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -888,9 +868,7 @@ var1 var2 var3!enddefine.
                 ScanToken::Token(Token::Punct(Punct::LParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::Punct(Punct::RParen)),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
@@ -919,9 +897,7 @@ content 2
                 ScanToken::Token(Token::Punct(Punct::RParen)),
                 ScanToken::Token(Token::String(String::from("content 1"))),
                 ScanToken::Token(Token::String(String::from("content 2"))),
-                ScanToken::Token(Token::MacroToken(MacroToken::MacroId(String::from(
-                    "!enddefine",
-                )))),
+                ScanToken::Token(Token::Id(String::from("!enddefine"))),
                 ScanToken::Token(Token::EndCommand),
             ],
         );
index eae0b4810cf49fe56a8026ba8da5262ac49191b3..f53b46eb30aaac5bcc4893a5ac77477034f79448 100644 (file)
 //! such as error messages later in tokenization (e.g. SEG_EXPECTED_QUOTE).
 
 use crate::{
-    identifier::{id_match, id_match_n, is_reserved_word, IdentifierChar},
+    identifier::{id_match, id_match_n, IdentifierChar},
     prompt::PromptStyle,
 };
 use bitflags::bitflags;
 
-use super::{
-    command_name::{command_match, COMMAND_NAMES},
-};
+use super::command_name::{command_match, COMMAND_NAMES};
 
 /// Segmentation mode.
 ///
@@ -64,7 +62,6 @@ pub enum Segment {
     HexString,
     UnicodeString,
     UnquotedString,
-    ReservedWord,
     Identifier,
     Punct,
     Shbang,
@@ -75,7 +72,6 @@ pub enum Segment {
     DoRepeatCommand,
     DoRepeatOverflow,
     InlineData,
-    MacroId,
     MacroName,
     MacroBody,
     StartDocument,
@@ -388,7 +384,7 @@ fn is_start_of_string(input: &str, eof: bool) -> Result<bool, Incomplete> {
         'x' | 'X' | 'u' | 'U' => {
             let (c, _rest) = take(rest, eof)?;
             Ok(c == Some('\'') || c == Some('"'))
-        },
+        }
         '\'' | '"' => Ok(true),
         '\n' | '\r' if is_end_of_line(input, eof)? => Ok(true),
         _ => Ok(false),
@@ -586,14 +582,14 @@ impl Segmenter {
             '!' => {
                 let (c, rest2) = take(rest, eof)?;
                 match c {
-                    Some('*') => Ok((rest2, Segment::MacroId)),
+                    Some('*') => Ok((rest2, Segment::Punct)),
                     Some(_) => self.parse_id(input, eof),
                     None => Ok((rest, Segment::Punct)),
                 }
             }
             c if c.is_whitespace() => Ok((skip_spaces(rest, eof)?, Segment::Spaces)),
             c if c.may_start_id() => self.parse_id(input, eof),
-            '!'..='~' if c != '\\' && c != '^' => {
+            '#'..='~' if c != '\\' && c != '^' => {
                 self.state.1 = Substate::empty();
                 Ok((rest, Segment::Punct))
             }
@@ -656,13 +652,11 @@ impl Segmenter {
                 | Segment::HexString
                 | Segment::UnicodeString
                 | Segment::UnquotedString
-                | Segment::ReservedWord
                 | Segment::Punct
                 | Segment::CommentCommand
                 | Segment::DoRepeatCommand
                 | Segment::DoRepeatOverflow
                 | Segment::InlineData
-                | Segment::MacroId
                 | Segment::MacroName
                 | Segment::MacroBody
                 | Segment::StartDocument
@@ -744,14 +738,14 @@ impl Segmenter {
         }
 
         self.state.1 = Substate::empty();
-        let segment = if is_reserved_word(identifier) {
-            Segment::ReservedWord
-        } else if identifier.starts_with('!') {
-            Segment::MacroId
-        } else {
-            Segment::Identifier
-        };
-        Ok((rest, segment))
+        Ok((
+            rest,
+            if identifier != "!" {
+                Segment::Identifier
+            } else {
+                Segment::Punct
+            },
+        ))
     }
     fn parse_digraph<'a>(
         &mut self,
@@ -1105,8 +1099,7 @@ impl Segmenter {
     ///   - The `DEFINE` keyword.
     ///
     ///   - An identifier.  We transform this into `Type::MacroName` instead of
-    ///     `Type::Identifier` or `Type::MacroId` because this identifier must
-    ///     never be macro-expanded.
+    ///     `Type::Identifier` because this identifier must never  be macro-expanded.
     ///
     ///   - Anything but `(`.
     ///
@@ -1125,7 +1118,7 @@ impl Segmenter {
     ) -> Result<(&'a str, Segment), Incomplete> {
         let (rest, segment) = self.subparse(input, eof)?;
         match segment {
-            Segment::Identifier | Segment::MacroId if self.state.0 == State::Define1 => {
+            Segment::Identifier if self.state.0 == State::Define1 => {
                 self.state.0 = State::Define2;
                 return Ok((rest, Segment::MacroName));
             }
index dd2d50cb62a960ad34ff3259e9bf7317dcdce92f..05f0a23d6e59fb6848961924de8d8faf45713c63 100644 (file)
@@ -1,8 +1,12 @@
 use crate::prompt::PromptStyle;
 
-use super::{Mode, Segmenter, Segment};
+use super::{Mode, Segment, Segmenter};
 
-fn push_segment<'a>(segmenter: &mut Segmenter, input: &'a str, one_byte: bool) -> (&'a str, Segment) {
+fn push_segment<'a>(
+    segmenter: &mut Segmenter,
+    input: &'a str,
+    one_byte: bool,
+) -> (&'a str, Segment) {
     if one_byte {
         for len in input.char_indices().map(|(pos, _c)| pos) {
             if let Ok((rest, segment)) = segmenter.push(&input[..len], false) {
@@ -93,7 +97,8 @@ fn check_segmentation(
             let mut expect_segments: Vec<_> = expect_segments.iter().copied().collect();
             assert_eq!(expect_segments.pop(), Some((Segment::End, "")));
             assert_eq!(expect_segments.pop(), Some((Segment::Newline, "\n")));
-            while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) = expect_segments.last()
+            while let Some((Segment::SeparateCommands | Segment::EndCommand, "")) =
+                expect_segments.last()
             {
                 expect_segments.pop();
             }
@@ -141,6 +146,7 @@ grève Ângstrom poté
 f@#_.#6
 GhIjK
 .x 1y _z
+!abc abc!
 "#,
         Mode::Auto,
         &[
@@ -152,7 +158,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "abcd"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!abcd"),
+            (Segment::Identifier, "!abcd"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "A"),
             (Segment::Spaces, " "),
@@ -162,7 +168,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "ABCD"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!ABCD"),
+            (Segment::Identifier, "!ABCD"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "aB"),
             (Segment::Spaces, " "),
@@ -170,7 +176,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "aBcD"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!aBcD"),
+            (Segment::Identifier, "!aBcD"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "$x"),
             (Segment::Spaces, " "),
@@ -178,7 +184,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "$z"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!$z"),
+            (Segment::Identifier, "!$z"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "grève"),
             (Segment::Spaces, "\u{00a0}"),
@@ -196,7 +202,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "#d"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!#d"),
+            (Segment::Identifier, "!#d"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "@efg"),
             (Segment::Spaces, " "),
@@ -206,7 +212,7 @@ GhIjK
             (Segment::Spaces, " "),
             (Segment::Identifier, "@#@"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!@"),
+            (Segment::Identifier, "!@"),
             (Segment::Spaces, " "),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "##"),
@@ -230,6 +236,11 @@ GhIjK
             (Segment::Punct, "_"),
             (Segment::Identifier, "z"),
             (Segment::Newline, "\n"),
+            (Segment::Identifier, "!abc"),
+            (Segment::Spaces, " "),
+            (Segment::Identifier, "abc"),
+            (Segment::Punct, "!"),
+            (Segment::Newline, "\n"),
             (Segment::End, ""),
         ],
         &[
@@ -244,6 +255,7 @@ GhIjK
             PromptStyle::Later,
             PromptStyle::Later,
             PromptStyle::Later,
+            PromptStyle::Later,
         ],
     );
 }
@@ -380,57 +392,57 @@ and. with.
 "#,
         Mode::Auto,
         &[
-            (Segment::ReservedWord, "and"),
+            (Segment::Identifier, "and"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "or"),
+            (Segment::Identifier, "or"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "not"),
+            (Segment::Identifier, "not"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "eq"),
+            (Segment::Identifier, "eq"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "ge"),
+            (Segment::Identifier, "ge"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "gt"),
+            (Segment::Identifier, "gt"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "le"),
+            (Segment::Identifier, "le"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "lt"),
+            (Segment::Identifier, "lt"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "ne"),
+            (Segment::Identifier, "ne"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "all"),
+            (Segment::Identifier, "all"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "by"),
+            (Segment::Identifier, "by"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "to"),
+            (Segment::Identifier, "to"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "with"),
+            (Segment::Identifier, "with"),
             (Segment::Newline, "\n"),
-            (Segment::ReservedWord, "AND"),
+            (Segment::Identifier, "AND"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "OR"),
+            (Segment::Identifier, "OR"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "NOT"),
+            (Segment::Identifier, "NOT"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "EQ"),
+            (Segment::Identifier, "EQ"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "GE"),
+            (Segment::Identifier, "GE"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "GT"),
+            (Segment::Identifier, "GT"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "LE"),
+            (Segment::Identifier, "LE"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "LT"),
+            (Segment::Identifier, "LT"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "NE"),
+            (Segment::Identifier, "NE"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "ALL"),
+            (Segment::Identifier, "ALL"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "BY"),
+            (Segment::Identifier, "BY"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "TO"),
+            (Segment::Identifier, "TO"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "WITH"),
+            (Segment::Identifier, "WITH"),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "andx"),
             (Segment::Spaces, " "),
@@ -460,7 +472,7 @@ and. with.
             (Segment::Newline, "\n"),
             (Segment::Identifier, "and."),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "with"),
+            (Segment::Identifier, "with"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
             (Segment::End, ""),
@@ -543,7 +555,7 @@ fn test_punctuation() {
             (Segment::Punct, "["),
             (Segment::Punct, "]"),
             (Segment::Punct, "**"),
-            (Segment::MacroId, "!*"),
+            (Segment::Punct, "!*"),
             (Segment::Newline, "\n"),
             (Segment::Punct, "%"),
             (Segment::Spaces, " "),
@@ -563,7 +575,7 @@ fn test_punctuation() {
             (Segment::Spaces, " "),
             (Segment::Punct, "~"),
             (Segment::Spaces, " "),
-            (Segment::MacroId, "!*"),
+            (Segment::Punct, "!*"),
             (Segment::Newline, "\n"),
             (Segment::End, ""),
         ],
@@ -889,7 +901,7 @@ title my title.
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "#"),
-            (Segment::MacroId, "!"),
+            (Segment::Punct, "!"),
             (Segment::Spaces, " "),
             (Segment::Punct, "/"),
             (Segment::Identifier, "usr"),
@@ -953,7 +965,7 @@ next command.
             (Segment::Spaces, " "),
             (Segment::Identifier, "ambiguous"),
             (Segment::Spaces, " "),
-            (Segment::ReservedWord, "with"),
+            (Segment::Identifier, "with"),
             (Segment::Spaces, " "),
             (Segment::Identifier, "COMPUTE"),
             (Segment::EndCommand, "."),
@@ -1227,7 +1239,7 @@ not data
             (Segment::Number, "123"),
             (Segment::EndCommand, "."),
             (Segment::Newline, "\n"),
-            (Segment::ReservedWord, "not"),
+            (Segment::Identifier, "not"),
             (Segment::Spaces, " "),
             (Segment::Identifier, "data"),
             (Segment::Newline, "\n"),
@@ -1310,7 +1322,10 @@ end repeat.
             (Segment::Newline, "\n"),
             (Segment::DoRepeatCommand, "+ third command."),
             (Segment::Newline, "\n"),
-            (Segment::DoRepeatCommand, "end /* x */ /* y */ repeat print."),
+            (
+                Segment::DoRepeatCommand,
+                "end /* x */ /* y */ repeat print.",
+            ),
             (Segment::Newline, "\n"),
             (Segment::Identifier, "end"),
             (Segment::Newline, "\n"),
@@ -1539,7 +1554,7 @@ var1 var2 var3 "!enddefine"
                 (Segment::Newline, "\n"),
                 (Segment::MacroBody, "var1 var2 var3 \"!enddefine\""),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1563,7 +1578,7 @@ var1 var2 var3 "!enddefine"
                 (Segment::Punct, ")"),
                 (Segment::MacroBody, " var1 var2 var3 /* !enddefine"),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1587,7 +1602,7 @@ var1 var2 var3!enddefine.
                 (Segment::Punct, ")"),
                 (Segment::Newline, "\n"),
                 (Segment::MacroBody, "var1 var2 var3"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1609,7 +1624,7 @@ var1 var2 var3!enddefine.
                 (Segment::Punct, "("),
                 (Segment::Punct, ")"),
                 (Segment::MacroBody, "var1 var2 var3"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1632,7 +1647,7 @@ var1 var2 var3!enddefine.
                 (Segment::Punct, "("),
                 (Segment::Punct, ")"),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1661,7 +1676,7 @@ var1 var2 var3!enddefine.
                 (Segment::Newline, "\n"),
                 (Segment::MacroBody, ""),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1702,7 +1717,7 @@ var1 var2 var3!enddefine.
                 (Segment::Punct, ")"),
                 (Segment::Punct, ")"),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1748,7 +1763,7 @@ var1 var2 var3!enddefine.
                 (Segment::Newline, "\n"),
                 (Segment::Punct, ")"),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
@@ -1793,7 +1808,7 @@ content 2
                 (Segment::Newline, "\n"),
                 (Segment::MacroBody, "content 2"),
                 (Segment::Newline, "\n"),
-                (Segment::MacroId, "!enddefine"),
+                (Segment::Identifier, "!enddefine"),
                 (Segment::EndCommand, "."),
                 (Segment::Newline, "\n"),
                 (Segment::End, ""),
index 868a79dac948240aa98874776b7047f6a08768e1..8467a7e927a193a75ca08e7620ba2cd87c27c2c0 100644 (file)
@@ -22,9 +22,6 @@ pub enum Token {
 
     /// Operators, punctuators, and reserved words.
     Punct(Punct),
-
-    /// Tokens that only appear in macros.
-    MacroToken(MacroToken),
 }
 
 fn is_printable(c: char) -> bool {
@@ -79,7 +76,6 @@ impl Display for Token {
             }
             Token::EndCommand => write!(f, "."),
             Token::Punct(punct) => punct.fmt(f),
-            Token::MacroToken(mt) => mt.fmt(f),
         }
     }
 }
@@ -186,6 +182,32 @@ pub enum Punct {
 
     /// `**`.
     Exp,
+
+    /// `!` (only appears in macros).
+    Bang,
+
+    /// `%` (only appears in macros).
+    Percent,
+
+    /// `?` (only appears in macros).
+    Question,
+
+    /// ```` (only appears in macros).
+    Backtick,
+
+    /// `.`.
+    ///
+    /// This represents a dot in the middle of a line by itself, where it does not end a command.
+    Dot,
+
+    /// `_` (only appears in macros).
+    ///
+    /// Although underscores may appear within identifiers, they can't be the
+    /// first character, so this represents an underscore found on its own.
+    Underscore,
+
+    /// `!*` (only appears in macros).
+    BangAsterisk,
 }
 
 impl Punct {
@@ -219,6 +241,13 @@ impl Punct {
             Self::To => "TO",
             Self::With => "WITH",
             Self::Exp => "**",
+            Self::Bang => "!",
+            Self::Percent => "%",
+            Self::Question => "?",
+            Self::Backtick => "`",
+            Self::Dot => ".",
+            Self::Underscore => "_",
+            Self::BangAsterisk => "!*",
         }
     }
 }
@@ -227,54 +256,3 @@ impl Display for Punct {
         write!(f, "{}", self.as_str())
     }
 }
-
-/// Tokens that only appear in macros.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum MacroToken {
-    /// Identifier starting with `!`.
-    MacroId(String),
-
-    /// `!`.
-    Bang,
-
-    /// `%`.
-    Percent,
-
-    /// `?`.
-    Question,
-
-    /// ````.
-    Backtick,
-
-    /// `.` (in the middle of a line by itself, where it does not end a command).
-    Dot,
-
-    /// `_`.
-    ///
-    /// Although underscores may appear within identifiers, they can't be the
-    /// first character, so this represents an underscore found on its own.
-    Underscore,
-}
-
-impl MacroToken {
-    pub fn as_str(&self) -> &str {
-        match self {
-            MacroToken::MacroId(id) => &id,
-            MacroToken::Bang => "!",
-            MacroToken::Percent => "%",
-            MacroToken::Question => "?",
-            MacroToken::Backtick => "`",
-            MacroToken::Dot => ".",
-            MacroToken::Underscore => "_",
-        }
-    }
-}
-
-impl Display for MacroToken {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        match self {
-            MacroToken::MacroId(id) => write!(f, "{id}"),
-            _ => write!(f, "{}", self.as_str()),
-        }
-    }
-}
index 2f9188003a306f37d28b12c1df893b6176852ad7..33dac111120059c2a317ab8521e261960956f409 100644 (file)
@@ -3,10 +3,7 @@ use std::collections::HashMap;
 use thiserror::Error as ThisError;
 use unicase::UniCase;
 
-use crate::{
-    lex::token::{MacroToken, Token},
-    message::Location,
-};
+use crate::{lex::token::Token, message::Location};
 
 #[derive(Clone, Debug, ThisError)]
 pub enum MacroError {
@@ -20,13 +17,24 @@ pub enum MacroError {
         macro_: String,
     },
 
-    /// Expected a particular token.
+    /// Expected a particular token at end of command.
     #[error("Reached end of command expecting {token:?} in argument {arg} to macro {macro_}.")]
     ExpectedToken {
         token: String,
         arg: String,
         macro_: String,
     },
+
+    /// Expected a particular token, got a different one.
+    #[error(
+        "Found `{actual}` while expecting `{expected}` reading argument {arg} to macro {macro_}."
+    )]
+    UnexpectedToken {
+        actual: String,
+        expected: String,
+        arg: String,
+        macro_: String,
+    },
 }
 
 /// A PSPP macro as defined with `!DEFINE`.
@@ -54,14 +62,14 @@ impl Macro {
         } else if let ValueType::Enclose(_, _) = self.parameters[0].arg {
             MacroCallState::Enclose
         } else {
-            MacroCallState::NewArg
+            MacroCallState::Arg
         }
     }
 }
 
 struct Parameter {
-    /// `!name` or `!1`.
-    name: String,
+    /// `name` or `1`.
+    name: UniCase<String>,
 
     /// Default value.
     ///
@@ -99,6 +107,7 @@ enum ValueType {
 
 /// A token and the syntax that was tokenized to produce it.  The syntax allows
 /// the token to be turned back into syntax accurately.
+#[derive(Clone)]
 struct BodyToken {
     /// The token.
     token: Token,
@@ -110,11 +119,8 @@ struct BodyToken {
 type MacroSet = HashMap<UniCase<String>, Macro>;
 
 pub enum MacroCallState {
-    /// Starting a new argument.
-    NewArg,
-
     /// Accumulating tokens toward the end of any type of argument.
-    ContinueArg,
+    Arg,
 
     /// Expecting the opening delimiter of an ARG_ENCLOSE argument.
     Enclose,
@@ -134,7 +140,8 @@ pub struct MacroCall<'a> {
     macros: &'a MacroSet,
     macro_: &'a Macro,
     state: MacroCallState,
-    args: Vec<Vec<BodyToken>>,
+    args: Box<[Option<Vec<BodyToken>>]>,
+    arg_index: usize,
 
     /// Length of macro call so far.
     n_tokens: usize,
@@ -146,11 +153,9 @@ impl<'a> MacroCall<'a> {
             return None;
         }
         let macro_name = match token {
-            Token::Id(s) => s,
-            Token::MacroToken(MacroToken::MacroId(s)) => s,
+            Token::Id(s) => s.clone(),
             _ => return None,
-        }
-        .clone();
+        };
         // XXX Unicase::new() is very expensive. We probably need to define our
         // own Unicase-alike that has a proper Borrow<> implementation.
         let Some(macro_) = macros.get(&UniCase::new(macro_name)) else {
@@ -160,39 +165,180 @@ impl<'a> MacroCall<'a> {
             macros,
             macro_,
             state: macro_.initial_state(),
-            args: Vec::with_capacity(macro_.parameters.len()),
+            args: (0..macro_.parameters.len()).map(|_| None).collect(),
+            arg_index: 0,
             n_tokens: 1,
         })
     }
 
-    fn push_continue_arg(&mut self, token: &Token, syntax: &String, error: &impl Fn(MacroError)) {
-        if let Token::EndCommand | Token::End = token {
-            let param = &self.macro_.parameters[self.args.len() - 1];
-            let arg = self.args.last().unwrap();
-            match param.arg {
-                ValueType::NTokens(n) => error(MacroError::ExpectedMoreTokens {
-                    n: n - arg.len(),
-                    arg: param.name.clone(),
-                    macro_: self.macro_.name.clone(),
-                }),
-                ValueType::CharEnd(end) | ValueType::Enclose(_, end) => todo!(),
-                ValueType::CmdEnd => todo!(),
+    fn finished(&mut self) -> Option<usize> {
+        self.state = MacroCallState::Finished;
+        for (i, arg) in self.args.iter_mut().enumerate() {
+            if arg.is_none() {
+                *arg = Some(self.macro_.parameters[i].default.clone());
             }
         }
+        Some(self.n_tokens)
     }
-    fn push_new_arg(&mut self, token: &Token, syntax: &String, error: &impl Fn(MacroError)) {
+
+    fn next_arg(&mut self) -> Option<usize> {
+        if self.macro_.parameters.is_empty() {
+            self.finished()
+        } else {
+            let param = &self.macro_.parameters[self.arg_index];
+            if param.is_positional() {
+                self.arg_index += 1;
+                if self.arg_index >= self.args.len() {
+                    self.finished()
+                } else {
+                    let param = &self.macro_.parameters[self.arg_index];
+                    self.state = if !param.is_positional() {
+                        MacroCallState::Keyword
+                    } else if let ValueType::Enclose(_, _) = param.arg {
+                        MacroCallState::Enclose
+                    } else {
+                        MacroCallState::Arg
+                    };
+                    None
+                }
+            } else {
+                if self.args.iter().any(|arg| arg.is_none()) {
+                    self.state = MacroCallState::Keyword;
+                    None
+                } else {
+                    self.finished()
+                }
+            }
+        }
+    }
+
+    fn push_arg(
+        &mut self,
+        token: &Token,
+        syntax: &str,
+        error: &impl Fn(MacroError),
+    ) -> Option<usize> {
+        let param = &self.macro_.parameters[self.args.len() - 1];
         if let Token::EndCommand | Token::End = token {
-            return self.mc_finished();
+            if let Some(arg) = &self.args[self.arg_index] {
+                let param = &self.macro_.parameters[self.args.len() - 1];
+
+                match &param.arg {
+                    ValueType::NTokens(n) => error(MacroError::ExpectedMoreTokens {
+                        n: n - arg.len(),
+                        arg: param.name.clone(),
+                        macro_: self.macro_.name.clone(),
+                    }),
+                    ValueType::CharEnd(end) | ValueType::Enclose(_, end) => {
+                        error(MacroError::ExpectedToken {
+                            token: end.to_string(),
+                            arg: param.name.clone(),
+                            macro_: self.macro_.name.clone(),
+                        })
+                    }
+                    ValueType::CmdEnd => {
+                        // This is OK, it's the expected way to end the argument.
+                    }
+                }
+            }
+            return self.finished();
+        }
+
+        self.n_tokens += 1;
+        let arg = self.args[self.arg_index].get_or_insert(Vec::new());
+        let (
+            add_token, // Should we add `mt` to the current arg?
+            next_arg,  // Should we advance to the next arg?
+        ) = match &param.arg {
+            ValueType::NTokens(n) => (arg.len() + 1 >= *n, true),
+            ValueType::CharEnd(end) | ValueType::Enclose(_, end) => {
+                let at_end = token == end;
+                (at_end, !at_end)
+            }
+            ValueType::CmdEnd => (false, true),
+        };
+        if add_token {
+            if true
+            // !macro_expand_arg (&mt->token, mc->me, *argp)
+            {
+                arg.push(BodyToken {
+                    token: token.clone(),
+                    syntax: String::from(syntax),
+                });
+            }
+        }
+        if next_arg {
+            self.next_arg()
+        } else {
+            None
+        }
+    }
+
+    fn push_enclose(
+        &mut self,
+        token: &Token,
+        syntax: &str,
+        error: &impl Fn(MacroError),
+    ) -> Option<usize> {
+        let param = &self.macro_.parameters[self.arg_index];
+        let ValueType::Enclose(start, _) = &param.arg else {
+            unreachable!()
+        };
+        if token == start {
+            self.n_tokens += 1;
+            self.args[self.arg_index].get_or_insert(Vec::new());
+            self.state = MacroCallState::Arg;
+            None
+        } else if param.is_positional() && matches!(token, Token::End | Token::EndCommand) {
+            self.finished()
+        } else {
+            error(MacroError::UnexpectedToken {
+                actual: String::from(syntax),
+                expected: start.to_string(),
+                arg: param.name.clone(),
+                macro_: self.macro_.name.clone(),
+            });
+            self.finished()
         }
-        self.args.push(Vec::new());
-        self.state = MacroCallState::ContinueArg;
-        self.push_continue_arg(token, syntax, error);
     }
-    pub fn push(&mut self, token: &Token, syntax: &String, error: &impl Fn(MacroError)) -> ! {
+
+    fn push_keyword(
+        &mut self,
+        token: &Token,
+        syntax: &str,
+        error: &impl Fn(MacroError),
+    ) -> Option<usize> {
+        let Token::Id(id) = token else {
+            return self.finished();
+        };
+        let Some(arg_idx) = self
+            .macro_
+            .parameters
+            .iter()
+            .position(|param| param.name == UniCase::new(id))
+        else {};
+    }
+
+    /// Adds `token`, which has the given `syntax`, to the collection of tokens
+    /// in `self` that potentially need to be macro expanded.
+    ///
+    /// Returns `None` if the macro expander needs more tokens, for macro
+    /// arguments or to decide whether this is actually a macro invocation.  The
+    /// caller should call `push` again with the next token.
+    ///
+    /// Returns `Some(n)` if the macro was complete with `n` tokens.  The caller
+    /// should call [`Self::expand`] to obtain the expansion.  (If `n == 0`,
+    /// then the tokens did not actually invoke a macro at all and the expansion
+    /// will be empty.)
+    pub fn push(
+        &mut self,
+        token: &Token,
+        syntax: &str,
+        error: &impl Fn(MacroError),
+    ) -> Option<usize> {
         match self.state {
-            MacroCallState::NewArg => self.push_new_arg(token, syntax, error),
-            MacroCallState::ContinueArg => self.push_continue_arg(token, syntax, error),
-            MacroCallState::Enclose => todo!(),
+            MacroCallState::Arg => self.push_arg(token, syntax, error),
+            MacroCallState::Enclose => self.push_enclose(token, syntax, error),
             MacroCallState::Keyword => todo!(),
             MacroCallState::Equals => todo!(),
             MacroCallState::Finished => todo!(),