cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet},
mem::take,
+ num::NonZeroUsize,
+ ops::RangeInclusive,
};
use thiserror::Error as ThisError;
use unicase::UniCase;
/// `!BREAK` outside `!DO`.
#[error("`!BREAK` outside `!DO`.")]
BreakOutsideDo,
+
+ /// `,` or `)` expected in call to macro function.
+ #[error("`,` or `)` expected in call to macro function `{0}`.")]
+ ExpectingCommaOrRParen(Identifier),
+
+ /// Macro function takes one argument.
+ #[error("Macro function `{name}` takes one argument (not {n_args}).")]
+ ExpectingOneArg { name: Identifier, n_args: usize },
+
+ /// Macro function takes two arguments.
+ #[error("Macro function `{name}` takes two arguments (not {n_args}).")]
+ ExpectingTwoArgs { name: Identifier, n_args: usize },
+
+ /// Macro function takes two or three arguments.
+ #[error("Macro function `{name}` takes two or three arguments (not {n_args}).")]
+ ExpectingTwoOrThreeArgs { name: Identifier, n_args: usize },
+
+ /// Macro function needs at least one argument).
+ #[error("Macro function `{name}` needs at least one argument).")]
+ ExpectingOneOrMoreArgs { name: Identifier },
+
+ /// Argument to `!BLANKS` must be non-negative integer (not `{0}`).
+ #[error("Argument to `!BLANKS` must be non-negative integer (not `{0}`).")]
+ InvalidBlanks(String),
+
+ /// Second argument of `!SUBSTR` must be positive integer (not `{0}`).
+ #[error("Second argument of `!SUBSTR` must be positive integer (not `{0}`).")]
+ InvalidSubstr2(String),
+
+ /// Third argument of `!SUBSTR` must be non-negative integer (not `{0}`).
+ #[error("Third argument of `!SUBSTR` must be non-negative integer (not `{0}`).")]
+ InvalidSubstr3(String),
}
/// A PSPP macro as defined with `!DEFINE`.
syntax: String,
}
-fn tokenize_string(s: &str, mode: Mode, output: &mut Vec<MacroToken>, error: &impl Fn(MacroError)) {
+fn tokenize_string_into(s: &str, mode: Mode, error: &impl Fn(MacroError), output: &mut Vec<MacroToken>) {
for (syntax, token) in StringSegmenter::new(s, mode, true) {
match token {
ScanToken::Token(token) => output.push(MacroToken {
}
}
-fn unquote_string(input: String, mode: Mode) -> String {
- let mut scanner = StringScanner::new(&input, mode, true);
+fn tokenize_string(s: &str, mode: Mode, error: &impl Fn(MacroError)) -> Vec<MacroToken> {
+ let mut tokens = Vec::new();
+ tokenize_string_into(s, mode, error, &mut tokens);
+ tokens
+}
+
+fn try_unquote_string(input: &String, mode: Mode) -> Option<String> {
+ let mut scanner = StringScanner::new(input, mode, true);
let Some(ScanToken::Token(Token::String(unquoted))) = scanner.next() else {
- return input;
+ return None;
};
- let None = scanner.next() else { return input };
- return unquoted;
+ let None = scanner.next() else { return None };
+ return Some(unquoted);
+}
+
+fn unquote_string(input: String, mode: Mode) -> String {
+ try_unquote_string(&input, mode).unwrap_or(input)
}
#[derive(Clone)]
struct MacroTokens<'a>(&'a [MacroToken]);
impl<'a> MacroTokens<'a> {
+ fn is_empty(&self) -> bool {
+ self.0.is_empty()
+ }
fn match_(&mut self, s: &str) -> bool {
if let Some((first, rest)) = self.0.split_first() {
if first.syntax.eq_ignore_ascii_case(s) {
}
None
}
+ fn macro_id(&self) -> Option<&Identifier> {
+ self.0.get(0).map(|mt| mt.token.macro_id()).flatten()
+ }
fn take_macro_id(&mut self) -> Option<&Identifier> {
let result = self.0.get(0).map(|mt| mt.token.macro_id()).flatten();
if result.is_some() {
}
result
}
+ fn take(&mut self) -> Option<&MacroToken> {
+ match self.0.split_first() {
+ Some((first, rest)) => {
+ self.0 = rest;
+ Some(first)
+ }
+ None => None,
+ }
+ }
fn advance(&mut self) -> &MacroToken {
let (first, rest) = self.0.split_first().unwrap();
self.0 = rest;
(self.error)(MacroError::TooDeep { limit: MNEST });
output.extend(take(&mut input.0).iter().cloned());
} else {
- while !input.0.is_empty() && !self.should_break() {
+ while !input.is_empty() && !self.should_break() {
self.expand__(input, output);
}
}
output.extend(arg.iter().cloned());
}
}
+ fn parse_function_args(
+ &mut self,
+ function: &Identifier,
+ input: &mut MacroTokens,
+ ) -> Option<Vec<String>> {
+ input.advance();
+ input.advance();
+ let mut args = Vec::new();
+ if input.match_(")") {
+ return Some(args);
+ }
+ loop {
+ args.push(self.parse_function_arg(input)?);
+ match input.take() {
+ Some(MacroToken {
+ token: Token::Punct(Punct::Comma),
+ ..
+ }) => (),
+ Some(MacroToken {
+ token: Token::Punct(Punct::RParen),
+ ..
+ }) => return Some(args),
+ _ => {
+ (self.error)(MacroError::ExpectingCommaOrRParen(function.clone()));
+ return None;
+ }
+ }
+ }
+ }
+
+ fn expand_blanks(e: &mut Expander, args: Vec<String>) -> Option<String> {
+ let Ok(n) = args[0].trim().parse::<usize>() else {
+ (e.error)(MacroError::InvalidBlanks(args[0].clone()));
+ return None;
+ };
+ Some(std::iter::repeat(' ').take(n).collect())
+ }
+
+ fn expand_concat(e: &mut Expander, args: Vec<String>) -> Option<String> {
+ Some(
+ args.into_iter()
+ .map(|arg| unquote_string(arg, e.mode))
+ .collect(),
+ )
+ }
+
+ fn expand_eval(e: &mut Expander, args: Vec<String>) -> Option<String> {
+ let tokens = tokenize_string(&args[0], e.mode, e.error);
+ let mut stack = take(&mut e.stack);
+ stack.push(Frame {
+ name: Some(Identifier::new("!EVAL").unwrap()),
+ location: None,
+ });
+ let mut break_ = false;
+ let mut subexpander = Expander {
+ break_: Some(&mut break_),
+ stack,
+ vars: e.vars,
+ ..*e
+ };
+ let mut output = Vec::new();
+ subexpander.expand(&mut MacroTokens(tokens.as_slice()), &mut output);
+ subexpander.stack.pop();
+ e.stack = subexpander.stack;
+ let mut output_string = String::new();
+ macro_tokens_to_syntax(&mut output, &mut output_string);
+ Some(output_string)
+ }
+
+ fn expand_head(e: &mut Expander, mut args: Vec<String>) -> Option<String> {
+ let arg = unquote_string(args.remove(0), e.mode);
+ let mut output = tokenize_string(&arg, e.mode, e.error);
+ if output.is_empty() {
+ Some(String::new())
+ } else {
+ Some(output.swap_remove(0).syntax)
+ }
+ }
+
+ fn expand_index(_e: &mut Expander, args: Vec<String>) -> Option<String> {
+ let haystack = &args[0];
+ let needle = &args[1];
+ let position = haystack.find(needle);
+ Some(format!(
+ "{}",
+ position.map_or(0, |position| &haystack[0..position].chars().count() + 1)
+ ))
+ }
+
+ fn expand_length(_e: &mut Expander, args: Vec<String>) -> Option<String> {
+ Some(format!("{}", args[0].chars().count()))
+ }
+
+ fn expand_quote(e: &mut Expander, mut args: Vec<String>) -> Option<String> {
+ let arg = args.remove(0);
+ if try_unquote_string(&arg, e.mode).is_some() {
+ Some(arg)
+ } else {
+ let mut output = String::with_capacity(arg.len() + 2);
+ output.push('\'');
+ for c in arg.chars() {
+ if c == '"' {
+ output.push('\'');
+ }
+ output.push(c);
+ }
+ output.push('\'');
+ Some(output)
+ }
+ }
+
+ fn expand_substr(e: &mut Expander, args: Vec<String>) -> Option<String> {
+ let Ok(start) = args[1].trim().parse::<NonZeroUsize>() else {
+ (e.error)(MacroError::InvalidSubstr3(args[0].clone()));
+ return None;
+ };
+ let start = start.get();
+ let Ok(count) = args[2].trim().parse::<usize>() else {
+ (e.error)(MacroError::InvalidSubstr2(args[0].clone()));
+ return None;
+ };
+
+ Some(args[0].chars().skip(start - 1).take(count).collect())
+ }
+
+ fn expand_tail(e: &mut Expander, mut args: Vec<String>) -> Option<String> {
+ let arg = unquote_string(args.remove(0), e.mode);
+ let mut output = tokenize_string(&arg, e.mode, e.error);
+ Some(
+ output
+ .pop()
+ .map_or_else(|| String::new(), |tail| tail.syntax),
+ )
+ }
+
+ fn expand_unquote(e: &mut Expander, mut args: Vec<String>) -> Option<String> {
+ Some(unquote_string(args.remove(0), e.mode))
+ }
+
+ fn expand_upcase(e: &mut Expander, mut args: Vec<String>) -> Option<String> {
+ Some(unquote_string(args.remove(0), e.mode).to_uppercase())
+ }
+
+ fn expand_macro_function(&mut self, orig_input: &mut MacroTokens) -> Option<String> {
+ let mut input = orig_input.clone();
+ let name = input.macro_id()?;
+ if name == "!NULL" {
+ return Some(String::new());
+ }
+ if input.0.len() < 2 || !matches!(input.0[1].token, Token::Punct(Punct::LParen)) {
+ return None;
+ }
+
+ struct MacroFunction {
+ name: Identifier,
+ args: RangeInclusive<usize>,
+ parser: fn(&mut Expander, Vec<String>) -> Option<String>,
+ }
+ impl MacroFunction {
+ fn new(
+ name: &str,
+ args: RangeInclusive<usize>,
+ parser: fn(&mut Expander, Vec<String>) -> Option<String>,
+ ) -> Self {
+ Self {
+ name: Identifier::new(name).unwrap(),
+ args,
+ parser,
+ }
+ }
+ }
+ lazy_static! {
+ static ref MACRO_FUNCTIONS: [MacroFunction; 11] = [
+ MacroFunction::new("!BLANKS", 1..=1, Expander::expand_blanks),
+ MacroFunction::new("!CONCAT", 1..=usize::MAX, Expander::expand_concat),
+ MacroFunction::new("!HEAD", 1..=1, Expander::expand_head),
+ MacroFunction::new("!INDEX", 2..=2, Expander::expand_index),
+ MacroFunction::new("!LENGTH", 1..=1, Expander::expand_length),
+ MacroFunction::new("!QUOTE", 1..=1, Expander::expand_quote),
+ MacroFunction::new("!SUBSTR", 2..=3, Expander::expand_substr),
+ MacroFunction::new("!TAIL", 1..=1, Expander::expand_tail),
+ MacroFunction::new("!UNQUOTE", 1..=1, Expander::expand_unquote),
+ MacroFunction::new("!UPCASE", 1..=1, Expander::expand_upcase),
+ MacroFunction::new("!EVAL", 1..=1, Expander::expand_eval),
+ ];
+ }
+
+ let function = MACRO_FUNCTIONS.iter().find(|mf| &mf.name == name)?;
+
+ let args = self.parse_function_args(&function.name, &mut input)?;
+
+ let n_args = args.len();
+ if !function.args.contains(&n_args) {
+ let name = function.name.clone();
+ let error = match &function.args {
+ x if x == &(1..=1) => MacroError::ExpectingOneArg { name, n_args },
+ x if x == &(2..=2) => MacroError::ExpectingTwoArgs { name, n_args },
+ x if x == &(2..=3) => MacroError::ExpectingTwoOrThreeArgs { name, n_args },
+ x if x == &(1..=usize::MAX) => MacroError::ExpectingOneOrMoreArgs { name },
+ _ => unreachable!(),
+ };
+ (self.error)(error);
+ return None;
+ }
+
+ *orig_input = input;
+ (function.parser)(self, args)
+ }
/// Parses one function argument from `input`. Each argument to a macro
/// function is one of:
return Some(value.clone());
}
- todo!() // expand macro function
+ if let Some(output) = self.expand_macro_function(input) {
+ return Some(output);
+ }
}
Token::Punct(Punct::BangAsterisk) => {
let mut arg = String::new();
fn evaluate_number(&mut self, input: &mut MacroTokens) -> Option<f64> {
let s = self.evaluate_expression(input)?;
- let mut tokens = Vec::new();
- tokenize_string(&s, self.mode, &mut tokens, self.error);
+ let tokens = tokenize_string(&s, self.mode, self.error);
let (
Some(MacroToken {
token: Token::Number(number),
) -> Option<(MacroTokens<'b>, IfEndClause)> {
let input_copy = input.clone();
let mut nesting = 0;
- while !input.0.is_empty() {
+ while !input.is_empty() {
if input.match_("!IF") {
nesting += 1;
} else if input.match_("!IFEND") {
fn find_doend<'b>(&mut self, input: &mut MacroTokens<'b>) -> Option<MacroTokens<'b>> {
let input_copy = input.clone();
let mut nesting = 0;
- while !input.0.is_empty() {
+ while !input.is_empty() {
if input.match_("!DO") {
nesting += 1;
} else if input.match_("!DOEND") {
return None;
}
- fn expand_do(&mut self, orig_input: &mut MacroTokens) -> bool {
+ fn expand_do(&mut self, orig_input: &mut MacroTokens, output: &mut Vec<MacroToken>) -> bool {
let mut input = orig_input.clone();
if !input.match_("!DO") {
return false;
return false;
};
- let mut stack = take(&mut self.stack);
- stack.push(Frame {
- name: Some(Identifier::new("!DO").unwrap()),
- location: None,
- });
- let mut break_ = false;
- let mut subexpander = Expander {
- break_: Some(&mut break_),
- stack,
- vars: self.vars,
- ..*self
- };
-
let (items, miterate_error) = if input.match_("!IN") {
let Some(list) = self.evaluate_expression(&mut input) else {
return false;
};
- let mut items = Vec::new();
- tokenize_string(list.as_str(), self.mode, &mut items, &self.error);
+ let items = tokenize_string(list.as_str(), self.mode, &self.error);
(
DoInput::from_list(items),
MacroError::MiterateList(MITERATE),
return false;
};
+ let mut stack = take(&mut self.stack);
+ stack.push(Frame {
+ name: Some(Identifier::new("!DO").unwrap()),
+ location: None,
+ });
+ let mut break_ = false;
+ let mut subexpander = Expander {
+ break_: Some(&mut break_),
+ stack,
+ vars: self.vars,
+ ..*self
+ };
+
for (i, item) in items.enumerate() {
- if break_ {
+ if subexpander.should_break() {
break;
}
if i >= MITERATE {
} else {
vars.insert(var_name.clone(), item);
}
+ subexpander.expand(&mut body.clone(), output);
}
*orig_input = input;
true
stack,
..*self
};
- subexpander.expand(input, output);
+ let mut body = MacroTokens(call.0.macro_.body.as_slice());
+ subexpander.expand(&mut body, output);
self.stack = subexpander.stack;
self.stack.pop();
- input.0 = &[];
+ input.0 = &input.0[call.len()..];
return;
}
}
// Variables set by `!DO` or `!LET`.
if let Some(value) = self.vars.borrow().get(id) {
- tokenize_string(value.as_str(), self.mode, output, &self.error);
+ tokenize_string_into(value.as_str(), self.mode, &self.error, output);
input.advance();
return;
}
if self.expand_let(input) {
return;
}
- if self.expand_do(input) {
+ if self.expand_do(input, output) {
return;
}
let mut body = MacroTokens(&self.0.macro_.body);
me.expand(&mut body, output);
}
+
+ pub fn len(&self) -> usize {
+ self.0.n_tokens
+ }
}
const MNEST: usize = 50;