!IF and !LET
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 22 Jul 2024 02:03:37 +0000 (19:03 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 22 Jul 2024 02:03:37 +0000 (19:03 -0700)
rust/src/macros.rs

index 883d1c19408fcd0a2e5c7af9e3d7befba7a24223..df3f568b30c58bc15c6114ed1fad50337b109e83 100644 (file)
@@ -1,10 +1,10 @@
+use lazy_static::lazy_static;
+use num::Integer;
 use std::{
     cmp::Ordering,
-    collections::{BTreeMap, HashMap},
+    collections::{BTreeMap, HashMap, HashSet},
     mem::take,
 };
-
-use num::Integer;
 use thiserror::Error as ThisError;
 use unicase::UniCase;
 
@@ -72,6 +72,30 @@ pub enum MacroError {
     /// Expecting literal.
     #[error("Expecting literal or function invocation in macro expression.")]
     ExpectingLiteral,
+
+    /// Expecting `!THEN`.
+    #[error("`!THEN` expected in macro `!IF` construct.")]
+    ExpectingThen,
+
+    /// Expecting `!ELSE` or `!THEN`.
+    #[error("`!ELSE` or `!THEN` expected in macro `!IF` construct.")]
+    ExpectingElseOrIfEnd,
+
+    /// Expecting `!IFEND`.
+    #[error("`!IFEND` expected in macro `!IF` construct.")]
+    ExpectingIfEnd,
+
+    /// Expecting macro variable name.
+    #[error("Expecting macro variable name following `!LET`.")]
+    ExpectingMacroVarName,
+
+    /// Invalid `!LET` variable name.
+    #[error("Cannot use argument name or macro keyword {0} as `!LET` variable name.")]
+    BadLetVarName(Identifier),
+
+    /// Expecting `=` following `!LET`.
+    #[error("Expecting `=` following `!LET`.")]
+    ExpectingEquals,
 }
 
 /// A PSPP macro as defined with `!DEFINE`.
@@ -178,6 +202,7 @@ fn unquote_string(input: String, mode: Mode) -> String {
     return unquoted;
 }
 
+#[derive(Clone)]
 struct MacroTokens<'a>(&'a [MacroToken]);
 
 impl<'a> MacroTokens<'a> {
@@ -643,7 +668,7 @@ struct Expander<'a> {
     expand: Option<&'a bool>,
 
     /// Variables from `!DO` and `!LET`.
-    vars: &'a BTreeMap<Identifier, String>,
+    vars: &'a mut BTreeMap<Identifier, String>,
 
     // Only set if inside a `!DO` loop. If true, break out of the loop.
     break_: Option<&'a mut bool>,
@@ -663,6 +688,47 @@ fn bool_to_string(b: bool) -> String {
     }
 }
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum IfEndClause {
+    Else,
+    IfEnd,
+}
+
+fn macro_keywords() -> HashSet<Identifier> {
+    let mut keywords = HashSet::new();
+    for kw in [
+        "!BREAK",
+        "!CHAREND",
+        "!CMDEND",
+        "!DEFAULT",
+        "!DO",
+        "!DOEND",
+        "!ELSE",
+        "!ENCLOSE",
+        "!ENDDEFINE",
+        "!IF",
+        "!IFEND",
+        "!IN",
+        "!LET",
+        "!NOEXPAND",
+        "!OFFEXPAND",
+        "!ONEXPAND",
+        "!POSITIONAL",
+        "!THEN",
+        "!TOKENS",
+    ] {
+        keywords.insert(Identifier::new(kw).unwrap());
+    }
+    keywords
+}
+
+fn is_macro_keyword(s: &Identifier) -> bool {
+    lazy_static! {
+        static ref KEYWORDS: HashSet<Identifier> = macro_keywords();
+    }
+    KEYWORDS.contains(s)
+}
+
 impl<'a> Expander<'a> {
     fn may_expand(&self) -> bool {
         self.expand.map(|b| *b).unwrap_or(false)
@@ -672,16 +738,13 @@ impl<'a> Expander<'a> {
         self.break_.as_ref().map(|b| **b).unwrap_or(false)
     }
 
-    fn expand(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) {
+    fn expand(&mut self, input: &mut MacroTokens, output: &mut Vec<MacroToken>) {
         if self.nesting_countdown == 0 {
             (self.error)(MacroError::TooDeep { limit: MNEST });
-            output.extend(input.iter().cloned());
+            output.extend(take(&mut input.0).iter().cloned());
         } else {
-            let mut i = 0;
-            while i < input.len() && !self.should_break() {
-                let consumed = self.expand__(&input[i..], output);
-                debug_assert!(consumed > 0);
-                i += consumed;
+            while !input.0.is_empty() && !self.should_break() {
+                self.expand__(input, output);
             }
         }
     }
@@ -690,7 +753,7 @@ impl<'a> Expander<'a> {
         let param = &self.macro_.unwrap().parameters[param_idx];
         let arg = &self.args.unwrap()[param_idx].as_ref().unwrap();
         if self.may_expand() && param.expand_value {
-            let vars = BTreeMap::new();
+            let mut vars = BTreeMap::new();
             let mut stack = take(&mut self.stack);
             stack.push(Frame {
                 name: Some(param.name.clone()),
@@ -698,13 +761,14 @@ impl<'a> Expander<'a> {
             });
             let mut subexpander = Expander {
                 stack,
-                vars: &vars,
+                vars: &mut vars,
                 break_: None,
                 macro_: None,
                 args: None,
                 ..*self
             };
-            subexpander.expand(&arg, output);
+            let mut arg_tokens = MacroTokens(&arg);
+            subexpander.expand(&mut arg_tokens, output);
             self.stack = subexpander.stack;
             self.stack.pop();
         } else {
@@ -828,21 +892,122 @@ impl<'a> Expander<'a> {
         Some(lhs)
     }
 
-    fn evaluate_expression(&mut self, input: &[MacroToken]) -> Option<String> {
-        let mut tokens = MacroTokens(input);
-        self.evaluate_or(&mut tokens)
+    fn evaluate_expression(&mut self, input: &mut MacroTokens) -> Option<String> {
+        self.evaluate_or(input)
     }
 
-    fn expand_if(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) -> usize {
-        self.evaluate_expression(input);
-        todo!()
+    fn find_ifend_clause<'b>(
+        input: &mut MacroTokens<'b>,
+    ) -> Option<(MacroTokens<'b>, IfEndClause)> {
+        let input_copy = input.clone();
+        let mut nesting = 0;
+        while !input.0.is_empty() {
+            if input.match_("!IF") {
+                nesting += 1;
+            } else if input.match_("!IFEND") {
+                if nesting == 0 {
+                    return Some((
+                        MacroTokens(&input_copy.0[..input_copy.0.len() - input.0.len() - 1]),
+                        IfEndClause::IfEnd,
+                    ));
+                }
+                nesting -= 1;
+            } else if input.match_("!ELSE") && nesting == 0 {
+                return Some((
+                    MacroTokens(&input_copy.0[..input_copy.0.len() - input.0.len() - 1]),
+                    IfEndClause::Else,
+                ));
+            } else {
+                input.advance();
+            }
+        }
+        return None;
+    }
+    fn expand_if(&mut self, orig_input: &mut MacroTokens, output: &mut Vec<MacroToken>) -> bool {
+        let mut input = orig_input.clone();
+        if !input.match_("!IF") {
+            return false;
+        }
+        let Some(result) = self.evaluate_expression(&mut input) else {
+            return false;
+        };
+        if !input.match_("!THEN") {
+            (self.error)(MacroError::ExpectingThen);
+            return false;
+        }
+
+        let Some((if_tokens, clause)) = Self::find_ifend_clause(&mut input) else {
+            (self.error)(MacroError::ExpectingElseOrIfEnd);
+            return false;
+        };
+
+        let else_tokens = match clause {
+            IfEndClause::Else => {
+                let Some((else_tokens, IfEndClause::IfEnd)) = Self::find_ifend_clause(&mut input)
+                else {
+                    (self.error)(MacroError::ExpectingIfEnd);
+                    return false;
+                };
+                Some(else_tokens)
+            }
+            IfEndClause::IfEnd => None,
+        };
+
+        let subinput = match result.as_str() {
+            "0" => else_tokens,
+            _ => Some(if_tokens),
+        };
+        if let Some(mut subinput) = subinput {
+            self.stack.push(Frame {
+                name: Some(Identifier::new("!IF").unwrap()),
+                location: None,
+            });
+            self.expand(&mut subinput, output);
+        }
+        *orig_input = input;
+        true
     }
 
-    fn expand__(&mut self, input: &[MacroToken], output: &mut Vec<MacroToken>) -> usize {
+    fn expand_let(&mut self, orig_input: &mut MacroTokens) -> bool {
+        let mut input = orig_input.clone();
+        if !input.match_("!LET") {
+            return false;
+        }
+
+        let Some(var_name) = input.0.get(0).map(|mt| mt.token.macro_id()).flatten() else {
+            (self.error)(MacroError::ExpectingMacroVarName);
+            return false;
+        };
+        if is_macro_keyword(var_name)
+            || self
+                .macro_
+                .map(|m| m.find_parameter(var_name))
+                .flatten()
+                .is_some()
+        {
+            (self.error)(MacroError::BadLetVarName(var_name.clone()));
+            return false;
+        }
+        input.advance();
+
+        if !input.match_("=") {
+            (self.error)(MacroError::ExpectingEquals);
+            return false;
+        }
+
+        let Some(value) = self.evaluate_expression(&mut input) else {
+            return false;
+        };
+        self.vars.insert(var_name.clone(), value);
+        *orig_input = input;
+        true
+    }
+
+    fn expand__(&mut self, input: &mut MacroTokens, output: &mut Vec<MacroToken>) {
         // Recursive macro calls.
         if self.may_expand() {
-            if let Some(call) = Call::for_tokens(self.macros, input, &self.error) {
-                let vars = BTreeMap::new();
+            if let Some(call) = Call::for_tokens(self.macros, &input.0, &self.error) {
+                let mut vars = BTreeMap::new();
                 let mut stack = take(&mut self.stack);
                 stack.push(Frame {
                     name: Some(call.0.macro_.name.clone()),
@@ -850,7 +1015,7 @@ impl<'a> Expander<'a> {
                 });
                 let mut subexpander = Expander {
                     break_: None,
-                    vars: &vars,
+                    vars: &mut vars,
                     nesting_countdown: self.nesting_countdown.saturating_sub(1),
                     stack,
                     ..*self
@@ -858,12 +1023,13 @@ impl<'a> Expander<'a> {
                 subexpander.expand(input, output);
                 self.stack = subexpander.stack;
                 self.stack.pop();
-                return call.0.n_tokens;
+                input.0 = &[];
+                return;
             }
         }
 
         // Only identifiers beginning with `!` receive further processing.
-        let id = match &input[0].token {
+        let id = match &input.0[0].token {
             Token::Id(id) if id.0.starts_with('!') => id,
             Token::Punct(Punct::BangAsterisk) => {
                 if let Some(macro_) = self.macro_ {
@@ -873,11 +1039,12 @@ impl<'a> Expander<'a> {
                 } else {
                     (self.error)(MacroError::InvalidBangAsterisk);
                 }
-                return 1;
+                input.advance();
+                return;
             }
             _ => {
-                output.push(input[0].clone());
-                return 1;
+                output.push(input.advance().clone());
+                return;
             }
         };
 
@@ -885,22 +1052,24 @@ impl<'a> Expander<'a> {
         if let Some(macro_) = self.macro_ {
             if let Some(param_idx) = macro_.find_parameter(id) {
                 self.expand_arg(param_idx, output);
-                return 1;
+                input.advance();
+                return;
             }
         }
 
         // Variables set by `!DO` or `!LET`.
         if let Some(value) = self.vars.get(id) {
             tokenize_string(value.as_str(), self.mode, output, &self.error);
-            return 1;
+            input.advance();
+            return;
         }
 
-        // XXX Macro functions.
-        if id == "!IF" {
-            let n = self.expand_if(&input[1..], output);
-            if n > 0 {
-                return n;
-            }
+        // Macro functions.
+        if self.expand_if(input, output) {
+            return;
+        }
+        if self.expand_let(input) {
+            return;
         }
 
         todo!()
@@ -931,7 +1100,7 @@ impl<'a> Call<'a> {
         F: Fn(MacroError) + 'a,
     {
         let error: Box<dyn Fn(MacroError) + 'a> = Box::new(error);
-        let vars = BTreeMap::new();
+        let mut vars = BTreeMap::new();
         let mut me = Expander {
             macros: self.0.macros,
             error: &error,
@@ -949,11 +1118,12 @@ impl<'a> Call<'a> {
                     location: Some(self.0.macro_.location.clone()),
                 },
             ],
-            vars: &vars,
+            vars: &mut vars,
             break_: None,
             expand: None,
         };
-        me.expand(&self.0.macro_.body, output);
+        let mut body = MacroTokens(&self.0.macro_.body);
+        me.expand(&mut body, output);
     }
 }