newlexer

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)
diff --git a/rust/pspp-lsp/src/main.rs b/rust/pspp-lsp/src/main.rs

index b465a00b693bd044d12be1617edb1db1c3867487..349b1afee05fefdaa7d80888cb1a6ac8546fa432 100644 (file)
--- a/rust/pspp-lsp/src/main.rs
+++ b/rust/pspp-lsp/src/main.rs
@@ -23,6 +23,7 @@ async fn main() {
      Server::new(stdin, stdout, socket).serve(service).await;
  }
  
+#[allow(dead_code)]
  #[derive(Debug)]
  struct Backend {
      client: Client,
@@ -31,7 +32,7 @@ struct Backend {
  
  #[tower_lsp::async_trait]
  impl LanguageServer for Backend {
-    async fn initialize(&self, params: InitializeParams) -> Result<InitializeResult> {
+    async fn initialize(&self, _params: InitializeParams) -> Result<InitializeResult> {
          Ok(InitializeResult {
              server_info: None,
              capabilities: ServerCapabilities {
diff --git a/rust/pspp/src/engine.rs b/rust/pspp/src/engine.rs

index 2ccb1a68320ea852bc2b96bbe3e3a6a64089c0eb..851d427dddd7fc0609a0503bc0d0b727c5498d6f 100644 (file)
--- a/rust/pspp/src/engine.rs
+++ b/rust/pspp/src/engine.rs
@@ -1,6 +1,9 @@
  use crate::{
      command::parse,
-    lex::{lexer::{Lexer, Source}, token::Token},
+    lex::{
+        lexer::{Lexer, Source},
+        token::Token,
+    },
      message::Diagnostic,
  };
  
@@ -31,22 +34,17 @@ impl Engine {
  mod tests {
      use encoding_rs::UTF_8;
  
-    use crate::lex::{
-        lexer::{ErrorHandling, Source},
-        segment::Mode,
-    };
+    use crate::lex::lexer::{Source, SourceFile};
  
      use super::Engine;
  
      #[test]
      fn test_echo() {
          let mut engine = Engine::new();
-        engine.run(Source::for_file_contents(
+        engine.run(Source::new_default(SourceFile::for_file_contents(
              "ECHO 'hi there'.\nECHO 'bye there'.\n".to_string(),
              Some("test.sps".to_string()),
              UTF_8,
-            Mode::default(),
-            ErrorHandling::default(),
-        ));
+        )));
      }
  }
diff --git a/rust/pspp/src/lex/lexer.rs b/rust/pspp/src/lex/lexer.rs

index c3e273037c33cff4fb68f7736ddd5a1ed3487d07..dac66928df1ced3b11ce7060d6a8491399596ad5 100644 (file)
--- a/rust/pspp/src/lex/lexer.rs
+++ b/rust/pspp/src/lex/lexer.rs
@@ -1,10 +1,11 @@
  use std::{
      borrow::{Borrow, Cow},
      collections::{HashMap, VecDeque},
-    fmt::Write,
+    fmt::{Debug, Formatter, Result as FmtResult, Write},
      fs,
      io::Result as IoResult,
-    mem,
+    iter::once,
+    mem::{self, take},
      ops::{Range, RangeInclusive},
      path::Path,
      sync::Arc,
@@ -24,7 +25,7 @@ use crate::{
  
  use super::{
      scan::{MergeResult, ScanError, ScanToken},
-    segment::{Mode, Segment, Segmenter},
+    segment::{Segment, Segmenter, Syntax},
      token::Token,
  };
  
@@ -45,6 +46,115 @@ pub enum ErrorHandling {
      Stop,
  }
  
+pub struct SourceFile {
+    /// `None` if this reader is not associated with a file.
+    file_name: Option<Arc<String>>,
+
+    /// Encoding.
+    #[allow(dead_code)]
+    encoding: &'static Encoding,
+
+    /// Source file contents.
+    buffer: String,
+
+    /// Byte offsets into `buffer` of starts of lines.  The first element is 0.
+    lines: Vec<usize>,
+}
+
+impl SourceFile {
+    fn new(buffer: String, encoding: &'static Encoding, file_name: Option<String>) -> Self {
+        let lines = once(0)
+            .chain(buffer.match_indices('\n').map(|(index, _s)| index + 1))
+            .filter(|index| *index < buffer.len())
+            .collect::<Vec<_>>();
+        Self {
+            file_name: file_name.map(Arc::new),
+            encoding,
+            buffer,
+            lines,
+        }
+    }
+
+    pub fn for_file<P>(path: P, encoding: Option<&'static Encoding>) -> IoResult<Self>
+    where
+        P: AsRef<Path>,
+    {
+        let bytes = fs::read(path.as_ref())?;
+        let encoding = encoding.unwrap_or_else(|| {
+            let mut encoding_detector = EncodingDetector::new();
+            encoding_detector.feed(&bytes, true);
+            encoding_detector.guess(None, true)
+        });
+        let (contents, _malformed) = encoding.decode_with_bom_removal(&bytes);
+        Ok(Self::new(
+            contents.to_string(),
+            encoding,
+            Some(path.as_ref().to_string_lossy().to_string()),
+        ))
+    }
+
+    pub fn for_file_contents(
+        contents: String,
+        file_name: Option<String>,
+        encoding: &'static Encoding,
+    ) -> Self {
+        Self::new(contents, encoding, file_name)
+    }
+
+    pub fn for_string(contents: String, encoding: &'static Encoding) -> Self {
+        Self::new(contents, encoding, None)
+    }
+
+    fn offset_to_point(&self, offset: usize) -> Point {
+        let line = self
+            .lines
+            .partition_point(|&line_start| line_start <= offset);
+        Point {
+            line: line as i32,
+            column: Some(
+                self.buffer
+                    .get(self.lines[line - 1]..offset)
+                    .unwrap_or_default()
+                    .width() as i32
+                    + 1,
+            ),
+        }
+    }
+
+    /// Returns the syntax for 1-based line-number `line_number`.
+    fn get_line(&self, line_number: i32) -> &str {
+        if (1..=self.lines.len() as i32).contains(&line_number) {
+            let line_number = line_number as usize;
+            let start = self.lines[line_number - 1];
+            let end = self.lines.get(line_number).copied().unwrap_or(
+                self.buffer[start..]
+                    .find('\n')
+                    .map(|ofs| ofs + start)
+                    .unwrap_or(self.buffer.len()),
+            );
+            self.buffer[start..end].strip_newline()
+        } else {
+            ""
+        }
+    }
+    fn token_location(&self, range: RangeInclusive<&LexToken>) -> Location {
+        Location {
+            file_name: self.file_name.clone(),
+            span: Some(
+                self.offset_to_point(range.start().pos.start)
+                    ..self.offset_to_point(range.end().pos.end),
+            ),
+            omit_underlines: false,
+        }
+    }
+}
+
+impl Default for SourceFile {
+    fn default() -> Self {
+        Self::new(String::new(), UTF_8, None)
+    }
+}
+
  ///  # Token pipeline
  ///
  ///  Tokens pass through a pipeline with the following stages.  Each token
@@ -68,15 +178,7 @@ pub struct Source {
      /// Error-handling mode.
      error_handling: ErrorHandling,
  
-    /// Encoding.
-    #[allow(dead_code)]
-    encoding: &'static Encoding,
-
-    /// `None` if this reader is not associated with a file.
-    file_name: Option<Arc<String>>,
-
-    /// Source file contents.
-    buffer: String,
+    file: SourceFile,
  
      /// 0-based line number of the first line not yet written to the journal.
      journal_line: usize,
@@ -84,9 +186,6 @@ pub struct Source {
      /// Byte offset of first character not yet scanned as token.
      seg_pos: usize,
  
-    /// Byte offsets into `buffer` of starts of lines.  The first element is 0.
-    lines: Vec<usize>,
-
      /// Tokens that need to pass through the macro preprocessor to end up in
      /// `merge`.
      pp: VecDeque<LexToken>,
@@ -112,78 +211,49 @@ impl Default for Source {
      fn default() -> Self {
          Self {
              error_handling: ErrorHandling::default(),
-            encoding: UTF_8,
-            file_name: None,
-            buffer: String::new(),
+            file: SourceFile::default(),
              journal_line: 0,
              seg_pos: 0,
-            lines: vec![0],
              pp: VecDeque::new(),
              merge: VecDeque::new(),
              eof: false,
              parse: Vec::new(),
              parse_ofs: 0,
-            segmenter: Segmenter::new(Mode::default(), false),
+            segmenter: Segmenter::new(Syntax::default(), false),
              suppress_next_newline: false,
          }
      }
  }
  
-impl Source {
-    pub fn for_file<P>(
-        path: P,
-        encoding: Option<&'static Encoding>,
-        syntax: Mode,
-        error_handling: ErrorHandling,
-    ) -> IoResult<Self>
-    where
-        P: AsRef<Path>,
-    {
-        let bytes = fs::read(path.as_ref())?;
-        let encoding = encoding.unwrap_or_else(|| {
-            let mut encoding_detector = EncodingDetector::new();
-            encoding_detector.feed(&bytes, true);
-            encoding_detector.guess(None, true)
-        });
-        let (contents, _malformed) = encoding.decode_with_bom_removal(&bytes);
-        Ok(Self::for_file_contents(
-            contents.to_string(),
-            Some(path.as_ref().to_string_lossy().to_string()),
-            encoding,
-            syntax,
-            error_handling,
-        ))
+trait StripNewline {
+    fn strip_newline(&self) -> &str;
+}
+
+impl StripNewline for str {
+    fn strip_newline(&self) -> &str {
+        self.strip_suffix("\r\n")
+            .unwrap_or(self.strip_suffix('\n').unwrap_or(self))
      }
+}
  
-    pub fn for_file_contents(
-        contents: String,
-        file_name: Option<String>,
-        encoding: &'static Encoding,
-        syntax: Mode,
-        error_handling: ErrorHandling,
-    ) -> Self {
+impl Source {
+    pub fn new(file: SourceFile, syntax: Syntax, error_handling: ErrorHandling) -> Self {
          Self {
-            buffer: contents,
-            file_name: file_name.map(Arc::new),
-            encoding,
+            file,
              error_handling,
              segmenter: Segmenter::new(syntax, false),
-            ..Self::default()
+            ..Source::default()
          }
      }
  
-    pub fn for_string(contents: String, encoding: &'static Encoding) -> Self {
-        Self {
-            buffer: contents,
-            encoding,
-            ..Self::default()
-        }
+    pub fn new_default(file: SourceFile) -> Self {
+        Self::new(file, Syntax::default(), ErrorHandling::default())
      }
  
      fn get_pp(&mut self, context: &Context) -> bool {
          let Some((seg_len, seg_type)) = self
              .segmenter
-            .push(&self.buffer[self.seg_pos..], true)
+            .push(&self.file.buffer[self.seg_pos..], true)
              .unwrap()
          else {
              return false;
@@ -191,11 +261,8 @@ impl Source {
  
          let pos = self.seg_pos..self.seg_pos + seg_len;
          self.seg_pos += seg_len;
-        if seg_type == Segment::Newline {
-            self.lines.push(self.seg_pos);
-        }
  
-        let scan_token = ScanToken::from_segment(&self.buffer[pos.clone()], seg_type);
+        let scan_token = ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type);
  
          let n_lines = match (seg_type, self.suppress_next_newline) {
              (Segment::EndCommand, false) => {
@@ -210,21 +277,11 @@ impl Source {
              _ => 0,
          };
          for line_num in self.journal_line..self.journal_line + n_lines {
-            let start_ofs = self.lines[line_num];
-            let end_ofs = self
-                .lines
-                .get(line_num + 1)
-                .copied()
-                .unwrap_or(self.buffer.len());
-            let line = &self.buffer[start_ofs..end_ofs];
-            let _line = line
-                .strip_suffix("\r\n")
-                .unwrap_or(line.strip_suffix('\n').unwrap_or(line));
+            let _line = &self.file.get_line(line_num as i32).strip_newline();
              // XXX submit the line as syntax
          }
          self.journal_line += n_lines;
  
-        let pos = pos.start..pos.end;
          match scan_token {
              None => false,
              Some(ScanToken::Token(token)) => {
@@ -243,8 +300,11 @@ impl Source {
              Some(ScanToken::Error(error)) => {
                  (context.error)(
                      Location {
-                        file_name: self.file_name.clone(),
-                        span: Some(self.offset_to_point(pos.start)..self.offset_to_point(pos.end)),
+                        file_name: self.file.file_name.clone(),
+                        span: Some(
+                            self.file.offset_to_point(pos.start)
+                                ..self.file.offset_to_point(pos.end),
+                        ),
                          omit_underlines: false,
                      },
                      error.into(),
@@ -255,14 +315,9 @@ impl Source {
      }
  
      fn get_merge(&mut self, context: &Context) -> bool {
-        println!("{}:{}", file!(), line!());
          if self.pp.is_empty() && !self.get_pp(context) {
              return false;
          }
-        println!("{}:{} pp.len()={}", file!(), line!(), self.pp.len());
-        for pp in &self.pp {
-            println!("{:?}", &pp.token);
-        }
  
          if !Settings::global().macros.expand {
              self.merge.append(&mut self.pp);
@@ -283,7 +338,7 @@ impl Source {
                  unreachable!();
              }
              let token = &self.pp[ofs];
-            if parser.push(&token.token, &self.buffer[token.pos.clone()], &|e| {
+            if parser.push(&token.token, &self.file.buffer[token.pos.clone()], &|e| {
                  println!("{e:?}")
              }) == ParseStatus::Complete
              {
@@ -302,8 +357,8 @@ impl Source {
          let c1 = &self.pp[call.len() - 1];
          let mut expansion = Vec::new();
          call.expand(
-            self.segmenter.mode(),
-            self.token_location(c0..=c1),
+            self.segmenter.syntax(),
+            self.file.token_location(c0..=c1),
              &mut expansion,
              |e| println!("{e:?}"),
          );
@@ -349,7 +404,6 @@ impl Source {
                  }
              }) {
                  Ok(Some(MergeResult::Copy)) => {
-                    println!("{}:{}", file!(), line!());
                      self.parse.push(self.merge.pop_front().unwrap());
                      return true;
                  }
@@ -370,7 +424,6 @@ impl Source {
                          },
                      });
                      self.merge.drain(..n);
-                    println!("{}:{}", file!(), line!());
                      return true;
                  }
                  Ok(None) => return false,
@@ -384,58 +437,13 @@ impl Source {
          }
      }
  
-    fn offset_to_point(&self, offset: usize) -> Point {
-        let line = self
-            .lines
-            .partition_point(|&line_start| line_start <= offset);
-        Point {
-            line: line as i32,
-            column: Some(
-                self.buffer
-                    .get(self.lines[line - 1]..offset)
-                    .unwrap_or_default()
-                    .width() as i32
-                    + 1,
-            ),
-        }
-    }
-
-    /// Returns the syntax for 1-based line-number `line_number`.
-    fn get_line(&self, line_number: i32) -> &str {
-        if (1..=self.lines.len() as i32).contains(&line_number) {
-            let line_number = line_number as usize;
-            let start = self.lines[line_number - 1];
-            let end = self.lines.get(line_number).copied().unwrap_or(
-                self.buffer[start..]
-                    .find('\n')
-                    .map(|ofs| ofs + start)
-                    .unwrap_or(self.buffer.len()),
-            );
-            let line = &self.buffer[start..end];
-            line.strip_suffix("\r\n")
-                .unwrap_or(line.strip_suffix('\n').unwrap_or(line))
-        } else {
-            ""
-        }
-    }
-
-    fn token_location(&self, range: RangeInclusive<&LexToken>) -> Location {
-        Location {
-            file_name: self.file_name.clone(),
-            span: Some(
-                self.offset_to_point(range.start().pos.start)
-                    ..self.offset_to_point(range.end().pos.end),
-            ),
-            omit_underlines: false,
-        }
-    }
-
      fn ofs_location(&self, range: RangeInclusive<usize>) -> Location {
          if *range.start() <= *range.end() && *range.end() < self.parse.len() {
-            self.token_location(&self.parse[*range.start()]..=&self.parse[*range.end()])
+            self.file
+                .token_location(&self.parse[*range.start()]..=&self.parse[*range.end()])
          } else {
              Location {
-                file_name: self.file_name.clone(),
+                file_name: self.file.file_name.clone(),
                  span: None,
                  omit_underlines: false,
              }
@@ -487,11 +495,11 @@ impl Source {
  
          let token0 = &self.parse[*ofs.start()];
          let token1 = &self.parse[*ofs.end()];
-        Some(&self.buffer[token0.pos.start..token1.pos.end])
+        Some(&self.file.buffer[token0.pos.start..token1.pos.end])
      }
  
      fn is_empty(&self) -> bool {
-        self.buffer.is_empty()
+        self.file.buffer.is_empty()
      }
  
      fn diagnostic(
@@ -530,7 +538,7 @@ impl Source {
                  (l0..=l1).collect()
              };
              for line_number in lines {
-                source.push((line_number, self.get_line(line_number).to_string()));
+                source.push((line_number, self.file.get_line(line_number).to_string()));
              }
          }
  
@@ -584,12 +592,23 @@ struct LexToken {
      macro_rep: Option<MacroRepresentation>,
  }
  
+struct LexError {
+    error: ScanError,
+    pos: Range<usize>,
+}
+
  impl Borrow<Token> for LexToken {
      fn borrow(&self) -> &Token {
          &self.token
      }
  }
  
+impl LexToken {
+    fn representation<'a>(&self, source: &'a SourceFile) -> &'a str {
+        &source.buffer[self.pos.clone()]
+    }
+}
+
  struct MacroRepresentation {
      /// An entire macro expansion.
      expansion: Arc<String>,
@@ -631,20 +650,15 @@ impl Lexer {
          }
  
          while self.source.parse_ofs == self.source.parse.len() {
-            println!("{}:{}", file!(), line!());
              let context = Context {
                  macros: &self.macros,
                  error: &self.error,
              };
-            println!("{}:{}", file!(), line!());
              if !self.source.get_parse(&context) {
-                println!("{}:{}", file!(), line!());
                  if !self.pop_stack() {
-                    println!("{}:{}", file!(), line!());
                      return &Token::End;
                  }
              }
-            println!("{}:{}", file!(), line!());
          }
          self.source.token()
      }
@@ -777,14 +791,14 @@ pub enum Error {
  mod tests {
      use encoding_rs::UTF_8;
  
-    use crate::lex::{segment::Mode, token::Token};
+    use crate::lex::token::Token;
  
-    use super::{ErrorHandling, Lexer, Source};
+    use super::{Lexer, Source, SourceFile};
  
      #[test]
      fn test() {
          let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
-        lexer.include(Source::for_string(
+        lexer.include(Source::new_default(SourceFile::for_string(
              String::from(
                  r#"#! /usr/local/bin/pspp
  DATA LIST LIST NOTABLE /a.
@@ -796,7 +810,7 @@ LIST.
  "#,
              ),
              UTF_8,
-        ));
+        )));
          loop {
              lexer.get();
              let token = lexer.token();
@@ -810,7 +824,7 @@ LIST.
      #[test]
      fn test_scan_errors() {
          let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
-        lexer.include(Source::for_file_contents(
+        lexer.include(Source::new_default(SourceFile::for_file_contents(
              String::from(
                  r#"x'123'
  x'1x'
@@ -827,9 +841,7 @@ u'110000'
              ),
              Some(String::from("syntax.sps")),
              UTF_8,
-            Mode::default(),
-            ErrorHandling::default(),
-        ));
+        )));
          loop {
              lexer.get();
              let token = lexer.token();
@@ -843,16 +855,14 @@ u'110000'
      #[test]
      fn test_null_byte() {
          let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
-        lexer.include(Source::for_file_contents(
+        lexer.include(Source::new_default(SourceFile::for_file_contents(
              String::from(
                  "datA dist list notable file='input.txt'/a b c.
  lis|.\0",
              ),
              Some(String::from("syntax.sps")),
              UTF_8,
-            Mode::default(),
-            ErrorHandling::default(),
-        ));
+        )));
          loop {
              lexer.get();
              let token = lexer.token();
@@ -863,3 +873,273 @@ lis|.\0",
          }
      }
  }
+
+struct Tokens {
+    file: Arc<SourceFile>,
+    tokens: Vec<LexToken>,
+}
+
+impl Debug for Tokens {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "Tokens {{ ")?;
+        for (index, token) in self.tokens.iter().enumerate() {
+            if index > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{:?}", token.representation(&self.file))?;
+        }
+        write!(f, " }}")
+    }
+}
+
+struct NewLexer<'a> {
+    backing: &'a Tokens,
+    tokens: &'a [LexToken],
+    pos: usize,
+}
+
+impl<'a> NewLexer<'a> {
+    pub fn new(backing: &'a Tokens) -> Self {
+        Self {
+            backing,
+            tokens: backing.tokens.as_slice(),
+            pos: 0,
+        }
+    }
+
+    pub fn get(&mut self) {
+        if !self.at_end() {
+            self.pos += 1;
+        }
+    }
+
+    pub fn at_end(&self) -> bool {
+        self.pos >= self.tokens.len()
+    }
+
+    pub fn match_(&mut self, token: &Token) -> bool {
+        if self.token() == token {
+            self.get();
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn token(&self) -> &Token {
+        self.tokens
+            .get(self.pos)
+            .map_or(&Token::End, |token| &token.token)
+    }
+
+    pub fn next(&self, ofs: isize) -> &Token {
+        ofs.checked_add(self.pos as isize)
+            .and_then(|index| usize::try_from(index).ok())
+            .and_then(|index| self.tokens.get(index))
+            .map_or(&Token::End, |token| &token.token)
+    }
+}
+
+struct NewSource {
+    file: Arc<SourceFile>,
+    segmenter: Segmenter,
+    seg_pos: usize,
+    lookahead: VecDeque<LexToken>,
+}
+
+impl NewSource {
+    pub fn new_default(file: SourceFile) -> Self {
+        Self::new(file, Syntax::default())
+    }
+
+    pub fn new(file: SourceFile, syntax: Syntax) -> Self {
+        Self {
+            file: Arc::new(file),
+            segmenter: Segmenter::new(syntax, false),
+            seg_pos: 0,
+            lookahead: VecDeque::new(),
+        }
+    }
+
+    pub fn read_command(&mut self, macros: &MacroSet) -> Option<Tokens> {
+        loop {
+            if let Some(end) = self
+                .lookahead
+                .iter()
+                .position(|token| token.token == Token::EndCommand)
+            {
+                return Some(Tokens {
+                    file: self.file.clone(),
+                    tokens: self.lookahead.drain(..=end).collect(),
+                });
+            }
+            if !self.read_lookahead(macros) {
+                return None;
+            }
+        }
+    }
+
+    pub fn read_lookahead(&mut self, macros: &MacroSet) -> bool {
+        let mut errors = Vec::new();
+        let mut pp = VecDeque::new();
+        while let Some((seg_len, seg_type)) = self
+            .segmenter
+            .push(&self.file.buffer[self.seg_pos..], true)
+            .unwrap()
+        {
+            let pos = self.seg_pos..self.seg_pos + seg_len;
+            self.seg_pos += seg_len;
+
+            match ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type) {
+                None => (),
+                Some(ScanToken::Token(token)) => {
+                    let end = token == Token::EndCommand;
+                    pp.push_back(LexToken {
+                        token,
+                        pos,
+                        macro_rep: None,
+                    });
+                    if end {
+                        break;
+                    }
+                }
+                Some(ScanToken::Error(error)) => errors.push(LexError { error, pos }),
+            }
+        }
+        if pp.is_empty() {
+            return false;
+        }
+
+        let mut merge = if !Settings::global().macros.expand || macros.is_empty() {
+            take(&mut pp)
+        } else {
+            let mut merge = VecDeque::new();
+            while !pp.is_empty() {
+                self.expand_macro(macros, &mut pp, &mut merge);
+            }
+            merge
+        };
+
+        while let Ok(Some(result)) =
+            ScanToken::merge(|index| Ok(merge.get(index).map(|token| &token.token)))
+        {
+            match result {
+                MergeResult::Copy => self.lookahead.push_back(merge.pop_front().unwrap()),
+                MergeResult::Expand { n, token } => {
+                    let first = &merge[0];
+                    let last = &merge[n - 1];
+                    self.lookahead.push_back(LexToken {
+                        token,
+                        pos: first.pos.start..last.pos.end,
+                        macro_rep: match (&first.macro_rep, &last.macro_rep) {
+                            (Some(a), Some(b)) if Arc::ptr_eq(&a.expansion, &b.expansion) => {
+                                Some(MacroRepresentation {
+                                    expansion: a.expansion.clone(),
+                                    pos: *a.pos.start()..=*b.pos.end(),
+                                })
+                            }
+                            _ => None,
+                        },
+                    });
+                    merge.drain(..n);
+                }
+            }
+        }
+        true
+    }
+
+    fn expand_macro(
+        &self,
+        macros: &MacroSet,
+        src: &mut VecDeque<LexToken>,
+        dst: &mut VecDeque<LexToken>,
+    ) {
+        // Now pass tokens one-by-one to the macro expander.
+        let Some(mut parser) = Parser::new(macros, &src[0].token) else {
+            // Common case where there is no macro to expand.
+            dst.push_back(src.pop_front().unwrap());
+            return;
+        };
+        for ofs in 1.. {
+            let token = &src[ofs];
+            if parser.push(&token.token, &self.file.buffer[token.pos.clone()], &|e| {
+                println!("{e:?}")
+            }) == ParseStatus::Complete
+            {
+                break;
+            }
+        }
+        let call = parser.finish();
+        if call.len() == 0 {
+            // False alarm: no macro to expand after all.
+            dst.push_back(src.pop_front().unwrap());
+            return;
+        }
+
+        // Expand the tokens.
+        let c0 = &src[0];
+        let c1 = &src[call.len() - 1];
+        let mut expansion = Vec::new();
+        call.expand(
+            self.segmenter.syntax(),
+            self.file.token_location(c0..=c1),
+            &mut expansion,
+            |e| println!("{e:?}"),
+        );
+
+        if Settings::global().macros.print_expansions {
+            // XXX
+        }
+
+        // Append the macro expansion tokens to the lookahead.
+        let mut macro_rep = String::new();
+        let mut pos = Vec::with_capacity(expansion.len());
+        for [prefix, token] in macro_tokens_to_syntax(expansion.as_slice()) {
+            macro_rep.push_str(prefix);
+            let len = macro_rep.len();
+            pos.push(len..=len + token.len() - 1);
+        }
+        let macro_rep = Arc::new(macro_rep);
+        for (index, token) in expansion.into_iter().enumerate() {
+            let lt = LexToken {
+                token: token.token,
+                pos: c0.pos.start..c1.pos.end,
+                macro_rep: Some(MacroRepresentation {
+                    expansion: Arc::clone(&macro_rep),
+                    pos: pos[index].clone(),
+                }),
+            };
+            dst.push_back(lt);
+        }
+        src.drain(..call.len());
+    }
+}
+
+#[cfg(test)]
+mod new_lexer_tests {
+    use encoding_rs::UTF_8;
+
+    use crate::macros::MacroSet;
+
+    use super::{NewLexer, NewSource, Source, SourceFile};
+
+    #[test]
+    fn test() {
+        let code = r#"DATA LIST LIST /A * B * X * Y * .
+BEGIN DATA.
+2 3 4 5
+END DATA.
+
+CROSSTABS VARIABLES X (1,7) Y (1,7) /TABLES X BY Y.
+"#;
+        let file = SourceFile::for_file_contents(
+            String::from(code),
+            Some(String::from("crosstabs.sps")),
+            UTF_8,
+        );
+        let mut source = NewSource::new_default(file);
+        while let Some(tokens) = source.read_command(&MacroSet::new()) {
+            println!("{tokens:?}");
+        }
+    }
+}
diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs

index 568f3e1f1d2a65bd117df6679c850675e824d483..e525f4c4b761c89db407d0417ef552e2358d8a15 100644 (file)
--- a/rust/pspp/src/lex/scan/mod.rs
+++ b/rust/pspp/src/lex/scan/mod.rs
@@ -13,7 +13,7 @@
  use crate::identifier::{Identifier, ReservedWord};
  
  use super::{
-    segment::{Mode, Segment, Segmenter},
+    segment::{Syntax, Segment, Segmenter},
      token::{Punct, Token},
  };
  use std::collections::VecDeque;
@@ -321,7 +321,7 @@ pub struct StringSegmenter<'a> {
  }
  
  impl<'a> StringSegmenter<'a> {
-    pub fn new(input: &'a str, mode: Mode, is_snippet: bool) -> Self {
+    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
          Self {
              input,
              segmenter: Segmenter::new(mode, is_snippet),
@@ -353,7 +353,7 @@ pub struct StringScanner<'a> {
  }
  
  impl<'a> StringScanner<'a> {
-    pub fn new(input: &'a str, mode: Mode, is_snippet: bool) -> Self {
+    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
          Self {
              input,
              eof: false,
diff --git a/rust/pspp/src/lex/scan/test.rs b/rust/pspp/src/lex/scan/test.rs

index 888f579857af9356f3e41e87b31177fe4a902197..2f43f6a74da793d81e1957df33322fcd768e2831 100644 (file)
--- a/rust/pspp/src/lex/scan/test.rs
+++ b/rust/pspp/src/lex/scan/test.rs
@@ -1,7 +1,7 @@
  use crate::{
      identifier::Identifier,
      lex::{
-        segment::Mode,
+        segment::Syntax,
          token::{Punct, Token},
      },
  };
@@ -20,7 +20,7 @@ fn print_token(token: &Token) {
  }
  
  #[track_caller]
-fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) {
+fn check_scan(input: &str, mode: Syntax, expected: &[ScanToken]) {
      let tokens = StringScanner::new(input, mode, false).collect::<Vec<_>>();
  
      if &tokens != expected {
@@ -59,7 +59,7 @@ QrStUv./* end of line comment */
  WXYZ. /* unterminated end of line comment
  �. /* U+FFFD is not valid in an identifier
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("aB").unwrap())),
@@ -101,7 +101,7 @@ AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
  andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
  and. with.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Punct(Punct::And)),
              ScanToken::Token(Token::Punct(Punct::Or)),
@@ -157,7 +157,7 @@ fn test_punctuation() {
  ~&|=>=><=<~=<>(),-+*/[]**
  % : ; ? _ ` { } ~
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Punct(Punct::Not)),
              ScanToken::Token(Token::Punct(Punct::And)),
@@ -224,7 +224,7 @@ fn test_positive_numbers() {
  1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
  . 1e e1 1e+ 1e-
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Number(0.0)),
              ScanToken::Token(Token::Number(1.0)),
@@ -277,7 +277,7 @@ fn test_negative_numbers() {
   -/**/1
   -. -1e -e1 -1e+ -1e- -1.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Number(-0.0)),
              ScanToken::Token(Token::Number(-1.0)),
@@ -354,7 +354,7 @@ x"4142"
  "�あいうえお"
  "abc"+U"FFFD"+u'3048'+"xyz"
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::String(String::from("x"))),
              ScanToken::Token(Token::String(String::from("y"))),
@@ -395,7 +395,7 @@ fn test_shbang() {
          r#"#! /usr/bin/pspp
  #! /usr/bin/pspp
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("#").unwrap())),
              ScanToken::Token(Token::Punct(Punct::Bang)),
@@ -430,7 +430,7 @@ com is ambiguous with COMPUTE.
  next command.
  
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::EndCommand),
              ScanToken::Token(Token::EndCommand),
@@ -472,7 +472,7 @@ isn't parsed as tokens
  
  second paragraph.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())),
              ScanToken::Token(Token::String(String::from("DOCUMENT one line."))),
@@ -508,7 +508,7 @@ FILE /*
  /**/  lab not quoted here either
  
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("FIL").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("label").unwrap())),
@@ -542,7 +542,7 @@ end  data
  end data
  .
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("begin").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
@@ -579,7 +579,7 @@ end /* x */ /* y */ repeat print.
  end
   repeat.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
@@ -627,7 +627,7 @@ do
    inner command
  end repeat
  "#,
-        Mode::Batch,
+        Syntax::Batch,
          &[
              ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
@@ -677,7 +677,7 @@ third command
  fourth command.
     fifth command.
  "#,
-        Mode::Batch,
+        Syntax::Batch,
          &[
              ScanToken::Token(Token::Id(Identifier::new("first").unwrap())),
              ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
@@ -709,7 +709,7 @@ mod define {
          identifier::Identifier,
          lex::{
              scan::ScanToken,
-            segment::Mode,
+            segment::Syntax,
              token::{Punct, Token},
          },
      };
@@ -723,7 +723,7 @@ mod define {
  var1 var2 var3
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -743,7 +743,7 @@ var1 var2 var3
              r#"define !macro1() var1 var2 var3
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -763,7 +763,7 @@ var1 var2 var3
              r#"define !macro1()
  var1 var2 var3!enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -782,7 +782,7 @@ var1 var2 var3!enddefine.
          check_scan(
              r#"define !macro1()var1 var2 var3!enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -802,7 +802,7 @@ var1 var2 var3!enddefine.
              r#"define !macro1()
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -823,7 +823,7 @@ var1 var2 var3!enddefine.
  
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -844,7 +844,7 @@ var1 var2 var3!enddefine.
              r#"define !macro1(a(), b(), c())
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -878,7 +878,7 @@ var1 var2 var3!enddefine.
  )
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -912,7 +912,7 @@ content 1
  content 2
  !enddefine.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -938,7 +938,7 @@ content 2
              r#"define !macro1.
  data list /x 1.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -961,7 +961,7 @@ data list /x 1.
  x.
  data list /x 1.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -985,7 +985,7 @@ data list /x 1.
  x.
  data list /x 1.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -1012,7 +1012,7 @@ data list /x 1.
              r#"define !macro1.
  data list /x 1.
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
@@ -1035,7 +1035,7 @@ data list /x 1.
  content line 1
  content line 2
  "#,
-            Mode::Auto,
+            Syntax::Auto,
              &[
                  ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
                  ScanToken::Token(Token::String(String::from("!macro1"))),
diff --git a/rust/pspp/src/lex/segment/mod.rs b/rust/pspp/src/lex/segment/mod.rs

index 7aed90e16412d31275dc4c557dba2c11358c206f..5448aa81ae7e19f27d365ff9ef11dee1b788a186 100644 (file)
--- a/rust/pspp/src/lex/segment/mod.rs
+++ b/rust/pspp/src/lex/segment/mod.rs
@@ -27,30 +27,30 @@ use bitflags::bitflags;
  
  use super::command_name::{command_match, COMMAND_NAMES};
  
-/// Segmentation mode.
+/// Syntax variant.
  ///
-/// PSPP syntax is written in one of two modes which are broadly defined as
-/// follows:
+/// PSPP syntax is written in one of two syntax variant which are broadly
+/// defined as follows:
  ///
-/// - In interactive mode, commands end with a period at the end of the line
+/// - In interactive syntax, commands end with a period at the end of the line
  ///   or with a blank line.
  ///
-/// - In batch mode, the second and subsequent lines of a command are indented
+/// - In batch syntax, the second and subsequent lines of a command are indented
  ///   from the left margin.
  ///
-/// The segmenter can also try to automatically detect the mode in use, using a
-/// heuristic that is usually correct.
+/// The segmenter can also try to automatically detect the kind of syntax in
+/// use, using a heuristic that is usually correct.
  #[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
-pub enum Mode {
+pub enum Syntax {
      /// Try to interpret input correctly regardless of whether it is written
-    /// for interactive or batch mode.
+    /// for interactive or batch syntax.
      #[default]
      Auto,
  
-    /// Interactive syntax mode.
+    /// Interactive syntax.
      Interactive,
  
-    /// Batch syntax mode.
+    /// Batch syntax.
      Batch,
  }
  
@@ -96,14 +96,14 @@ bitflags! {
  pub struct Segmenter {
      state: (State, Substate),
      nest: u8,
-    mode: Mode,
+    syntax: Syntax,
  }
  
  #[derive(Copy, Clone, Debug)]
  pub struct Incomplete;
  
  impl Segmenter {
-    /// Returns a segmenter with the given syntax `mode`.
+    /// Returns a segmenter with the given `syntax`.
      ///
      /// If `is_snippet` is false, then the segmenter will parse as if it's being
      /// given a whole file.  This means, for example, that it will interpret `-`
@@ -114,20 +114,20 @@ impl Segmenter {
      /// given an isolated piece of syntax.  This means that, for example, that
      /// it will interpret `-` or `+` at the beginning of the syntax as an
      /// operator token or (if followed by a digit) as part of a number.
-    pub fn new(mode: Mode, is_snippet: bool) -> Self {
+    pub fn new(syntax: Syntax, is_snippet: bool) -> Self {
          Self {
              state: if is_snippet {
                  (State::General, Substate::empty())
              } else {
                  (State::Shbang, Substate::empty())
              },
-            mode,
+            syntax,
              nest: 0,
          }
      }
  
-    pub fn mode(&self) -> Mode {
-        self.mode
+    pub fn syntax(&self) -> Syntax {
+        self.syntax
      }
  
      fn start_of_line(&self) -> bool {
@@ -140,9 +140,9 @@ impl Segmenter {
  
      /// Returns the style of command prompt to display to an interactive user
      /// for input in the current state..  The return value is most accurate in
-    /// mode `Mode::Interactive` and at the beginning of a line (that is, if
-    /// [`Segmenter::push`] consumed as much as possible of the input up to a
-    /// new-line).
+    /// with [Syntax::Interactive] syntax and at the beginning of a line (that
+    /// is, if [`Segmenter::push`] consumed as much as possible of the input up
+    /// to a new-line).
      pub fn prompt(&self) -> PromptStyle {
          match self.state.0 {
              State::Shbang => PromptStyle::First,
@@ -466,10 +466,10 @@ impl Segmenter {
          self.push_rest(input, eof)
      }
      fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
-        match self.mode {
-            Mode::Auto => detect_command_name(input, eof),
-            Mode::Interactive => Ok(false),
-            Mode::Batch => Ok(true),
+        match self.syntax {
+            Syntax::Auto => detect_command_name(input, eof),
+            Syntax::Interactive => Ok(false),
+            Syntax::Batch => Ok(true),
          }
      }
      fn parse_start_of_line<'a>(
@@ -644,7 +644,7 @@ impl Segmenter {
          mut input: &'a str,
          eof: bool,
      ) -> Result<(&'a str, &'a str), Incomplete> {
-        let mut sub = Segmenter::new(self.mode, true);
+        let mut sub = Segmenter::new(self.syntax, true);
          loop {
              let Some((seg_len, seg_type)) = sub.push(input, eof)? else {
                  return Ok((input, input));
@@ -973,7 +973,7 @@ impl Segmenter {
          eof: bool,
      ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
          let mut sub = Segmenter {
-            mode: self.mode,
+            syntax: self.syntax,
              state: (State::General, self.state.1),
              nest: 0,
          };
diff --git a/rust/pspp/src/lex/segment/test.rs b/rust/pspp/src/lex/segment/test.rs

index 0553c7d1a726c71fa4869b1ab65529731ecf9c1d..3e01ee3ee86673457d0532656f1dd3588881afbe 100644 (file)
--- a/rust/pspp/src/lex/segment/test.rs
+++ b/rust/pspp/src/lex/segment/test.rs
@@ -1,6 +1,6 @@
  use crate::prompt::PromptStyle;
  
-use super::{Mode, Segment, Segmenter};
+use super::{Syntax, Segment, Segmenter};
  
  fn push_segment<'a>(
      segmenter: &mut Segmenter,
@@ -19,7 +19,7 @@ fn push_segment<'a>(
  
  fn _check_segmentation(
      mut input: &str,
-    mode: Mode,
+    mode: Syntax,
      expect_segments: &[(Segment, &str)],
      expect_prompts: &[PromptStyle],
      one_byte: bool,
@@ -65,7 +65,7 @@ fn _check_segmentation(
  
  fn check_segmentation(
      input: &str,
-    mode: Mode,
+    mode: Syntax,
      expect_segments: &[(Segment, &str)],
      expect_prompts: &[PromptStyle],
  ) {
@@ -110,7 +110,7 @@ fn check_segmentation(
  
  #[allow(dead_code)]
  fn print_segmentation(mut input: &str) {
-    let mut segmenter = Segmenter::new(Mode::Interactive, false);
+    let mut segmenter = Segmenter::new(Syntax::Interactive, false);
      while let Some((seg_len, seg_type)) = segmenter.push(input, true).unwrap() {
          let (token, rest) = input.split_at(seg_len);
          print!("{seg_type:?} {token:?}");
@@ -138,7 +138,7 @@ GhIjK
  .x 1y _z
  !abc abc!
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Identifier, "a"),
              (Segment::Spaces, " "),
@@ -270,7 +270,7 @@ wxyz./* unterminated end of line comment
  WXYZ. /* unterminated end of line comment
  WxYz./* unterminated end of line comment 
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Identifier, "abcd."),
              (Segment::Spaces, " "),
@@ -378,7 +378,7 @@ AND OR NOT EQ GE GT LE LT NE ALL BY TO WITH
  andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
  and. with.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Identifier, "and"),
              (Segment::Spaces, " "),
@@ -480,7 +480,7 @@ fn test_punctuation() {
  ~&|=>=><=<~=<>(),-+*/[]**!*
  % : ; ? _ ` { } ~ !*
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Punct, "~"),
              (Segment::Spaces, " "),
@@ -580,7 +580,7 @@ fn test_positive_numbers() {
  1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
  . 1e e1 1e+ 1e- 1.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Number, "0"),
              (Segment::Spaces, " "),
@@ -678,7 +678,7 @@ fn test_negative_numbers() {
   -/**/1
   -. -1e -e1 -1e+ -1e- -1.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Spaces, " "),
              (Segment::Number, "-0"),
@@ -793,7 +793,7 @@ u'fffd' U"041"
  + /* also a punctuator on blank line
  - 'new command'
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::QuotedString, "'x'"),
              (Segment::Spaces, " "),
@@ -872,7 +872,7 @@ fn test_shbang() {
  title my title.
  #! /usr/bin/pspp
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::Shbang, "#! /usr/bin/pspp"),
              (Segment::Newline, "\n"),
@@ -918,7 +918,7 @@ com is ambiguous with COMPUTE.
  next command.
  
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::CommentCommand, "* Comment commands \"don't"),
              (Segment::Newline, "\n"),
@@ -1010,7 +1010,7 @@ isn't parsed as tokens
  
  second paragraph.
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::StartDocument, ""),
              (Segment::Document, "DOCUMENT one line."),
@@ -1067,7 +1067,7 @@ FILE /*
  /**/  lab not quoted here either
  
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::Identifier, "FIL"),
              (Segment::Spaces, " "),
@@ -1135,7 +1135,7 @@ begin data "xxx".
  begin data 123.
  not data
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::Identifier, "begin"),
              (Segment::Spaces, " "),
@@ -1268,7 +1268,7 @@ do
    inner command.
  end repeat.
  "#,
-        Mode::Interactive,
+        Syntax::Interactive,
          &[
              (Segment::Identifier, "do"),
              (Segment::Spaces, " "),
@@ -1406,7 +1406,7 @@ fn test_do_repeat_overflow() {
          .map(|_| PromptStyle::DoRepeat)
          .chain([PromptStyle::First, PromptStyle::First, PromptStyle::First])
          .collect();
-    check_segmentation(&s, Mode::Interactive, &expect_output, &expect_prompts);
+    check_segmentation(&s, Syntax::Interactive, &expect_output, &expect_prompts);
  }
  
  #[test]
@@ -1427,7 +1427,7 @@ do
    inner command
  end repeat
  "#,
-        Mode::Batch,
+        Syntax::Batch,
          &[
              (Segment::Identifier, "do"),
              (Segment::Spaces, " "),
@@ -1506,7 +1506,7 @@ end repeat
  
  mod define {
      use crate::{
-        lex::segment::{Mode, Segment},
+        lex::segment::{Syntax, Segment},
          prompt::PromptStyle,
      };
  
@@ -1519,7 +1519,7 @@ mod define {
  var1 var2 var3 "!enddefine"
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1543,7 +1543,7 @@ var1 var2 var3 "!enddefine"
              r#"define !macro1() var1 var2 var3 /* !enddefine
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1566,7 +1566,7 @@ var1 var2 var3 "!enddefine"
              r#"define !macro1()
  var1 var2 var3!enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1588,7 +1588,7 @@ var1 var2 var3!enddefine.
          check_segmentation(
              r#"define !macro1()var1 var2 var3!enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1610,7 +1610,7 @@ var1 var2 var3!enddefine.
              r#"define !macro1()
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1634,7 +1634,7 @@ var1 var2 var3!enddefine.
  
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1665,7 +1665,7 @@ var1 var2 var3!enddefine.
              r#"define !macro1(a(), b(), c())
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1704,7 +1704,7 @@ var1 var2 var3!enddefine.
  )
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1756,7 +1756,7 @@ content 1
  content 2
  !enddefine.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1796,7 +1796,7 @@ content 2
              r#"define !macro1.
  data list /x 1.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1825,7 +1825,7 @@ data list /x 1.
  x.
  data list /x 1.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1856,7 +1856,7 @@ data list /x 1.
  x.
  data list /x 1.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1890,7 +1890,7 @@ data list /x 1.
              r#"define !macro1.
  data list /x 1.
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1919,7 +1919,7 @@ data list /x 1.
  content line 1
  content line 2
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1945,7 +1945,7 @@ content line 2
          check_segmentation(
              r#"define !macro1()
  "#,
-            Mode::Interactive,
+            Syntax::Interactive,
              &[
                  (Segment::Identifier, "define"),
                  (Segment::Spaces, " "),
@@ -1970,7 +1970,7 @@ third command
  fourth command.
     fifth command.
  "#,
-        Mode::Batch,
+        Syntax::Batch,
          &[
              (Segment::Identifier, "first"),
              (Segment::Spaces, " "),
@@ -2041,7 +2041,7 @@ twostep cluster
  fourth command.
     fifth command.
  "#,
-        Mode::Auto,
+        Syntax::Auto,
          &[
              (Segment::Identifier, "command"),
              (Segment::Newline, "\n"),
diff --git a/rust/pspp/src/macros.rs b/rust/pspp/src/macros.rs

index b1e2f56a35082dd31bf6cc1eec12655f6cd42270..7b0a34e223faf56c8f45512e365c3c88c049a807 100644 (file)
--- a/rust/pspp/src/macros.rs
+++ b/rust/pspp/src/macros.rs
@@ -16,7 +16,7 @@ use crate::{
      identifier::Identifier,
      lex::{
          scan::{ScanError, ScanToken, StringScanner, StringSegmenter},
-        segment::Mode,
+        segment::Syntax,
          token::{Punct, Token},
      },
      message::Location,
@@ -255,7 +255,7 @@ pub struct MacroToken {
  
  fn tokenize_string_into(
      s: &str,
-    mode: Mode,
+    mode: Syntax,
      error: &impl Fn(MacroError),
      output: &mut Vec<MacroToken>,
  ) {
@@ -270,13 +270,13 @@ fn tokenize_string_into(
      }
  }
  
-fn tokenize_string(s: &str, mode: Mode, error: &impl Fn(MacroError)) -> Vec<MacroToken> {
+fn tokenize_string(s: &str, mode: Syntax, error: &impl Fn(MacroError)) -> Vec<MacroToken> {
      let mut tokens = Vec::new();
      tokenize_string_into(s, mode, error, &mut tokens);
      tokens
  }
  
-fn try_unquote_string(input: &String, mode: Mode) -> Option<String> {
+fn try_unquote_string(input: &String, mode: Syntax) -> Option<String> {
      let mut scanner = StringScanner::new(input, mode, true);
      let Some(ScanToken::Token(Token::String(unquoted))) = scanner.next() else {
          return None;
@@ -285,7 +285,7 @@ fn try_unquote_string(input: &String, mode: Mode) -> Option<String> {
      return Some(unquoted);
  }
  
-fn unquote_string(input: String, mode: Mode) -> String {
+fn unquote_string(input: String, mode: Syntax) -> String {
      try_unquote_string(&input, mode).unwrap_or(input)
  }
  
@@ -756,7 +756,7 @@ struct Expander<'a> {
      error: &'a Box<dyn Fn(MacroError) + 'a>,
  
      /// Tokenization mode.
-    mode: Mode,
+    mode: Syntax,
  
      /// Remaining nesting levels.
      nesting_countdown: usize,
@@ -1628,7 +1628,7 @@ impl<'a> Call<'a> {
          return None;
      }
  
-    pub fn expand<F>(&self, mode: Mode, call_loc: Location, output: &mut Vec<MacroToken>, error: F)
+    pub fn expand<F>(&self, mode: Syntax, call_loc: Location, output: &mut Vec<MacroToken>, error: F)
      where
          F: Fn(MacroError) + 'a,
      {
@@ -1667,3 +1667,4 @@ impl<'a> Call<'a> {
          self.0.n_tokens
      }
  }
+
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 25 Aug 2024 23:05:35 +0000 (16:05 -0700)
rust/pspp-lsp/src/main.rs		patch \| blob \| history
rust/pspp/src/engine.rs		patch \| blob \| history
rust/pspp/src/lex/lexer.rs		patch \| blob \| history
rust/pspp/src/lex/scan/mod.rs		patch \| blob \| history
rust/pspp/src/lex/scan/test.rs		patch \| blob \| history
rust/pspp/src/lex/segment/mod.rs		patch \| blob \| history
rust/pspp/src/lex/segment/test.rs		patch \| blob \| history
rust/pspp/src/macros.rs		patch \| blob \| history