use std::{
borrow::{Borrow, Cow},
collections::{HashMap, VecDeque},
- fmt::Write,
+ fmt::{Debug, Formatter, Result as FmtResult, Write},
fs,
io::Result as IoResult,
- mem,
+ iter::once,
+ mem::{self, take},
ops::{Range, RangeInclusive},
path::Path,
sync::Arc,
use super::{
scan::{MergeResult, ScanError, ScanToken},
- segment::{Mode, Segment, Segmenter},
+ segment::{Segment, Segmenter, Syntax},
token::Token,
};
Stop,
}
+pub struct SourceFile {
+ /// `None` if this reader is not associated with a file.
+ file_name: Option<Arc<String>>,
+
+ /// Encoding.
+ #[allow(dead_code)]
+ encoding: &'static Encoding,
+
+ /// Source file contents.
+ buffer: String,
+
+ /// Byte offsets into `buffer` of starts of lines. The first element is 0.
+ lines: Vec<usize>,
+}
+
+impl SourceFile {
+ fn new(buffer: String, encoding: &'static Encoding, file_name: Option<String>) -> Self {
+ let lines = once(0)
+ .chain(buffer.match_indices('\n').map(|(index, _s)| index + 1))
+ .filter(|index| *index < buffer.len())
+ .collect::<Vec<_>>();
+ Self {
+ file_name: file_name.map(Arc::new),
+ encoding,
+ buffer,
+ lines,
+ }
+ }
+
+ pub fn for_file<P>(path: P, encoding: Option<&'static Encoding>) -> IoResult<Self>
+ where
+ P: AsRef<Path>,
+ {
+ let bytes = fs::read(path.as_ref())?;
+ let encoding = encoding.unwrap_or_else(|| {
+ let mut encoding_detector = EncodingDetector::new();
+ encoding_detector.feed(&bytes, true);
+ encoding_detector.guess(None, true)
+ });
+ let (contents, _malformed) = encoding.decode_with_bom_removal(&bytes);
+ Ok(Self::new(
+ contents.to_string(),
+ encoding,
+ Some(path.as_ref().to_string_lossy().to_string()),
+ ))
+ }
+
+ pub fn for_file_contents(
+ contents: String,
+ file_name: Option<String>,
+ encoding: &'static Encoding,
+ ) -> Self {
+ Self::new(contents, encoding, file_name)
+ }
+
+ pub fn for_string(contents: String, encoding: &'static Encoding) -> Self {
+ Self::new(contents, encoding, None)
+ }
+
+ fn offset_to_point(&self, offset: usize) -> Point {
+ let line = self
+ .lines
+ .partition_point(|&line_start| line_start <= offset);
+ Point {
+ line: line as i32,
+ column: Some(
+ self.buffer
+ .get(self.lines[line - 1]..offset)
+ .unwrap_or_default()
+ .width() as i32
+ + 1,
+ ),
+ }
+ }
+
+ /// Returns the syntax for 1-based line-number `line_number`.
+ fn get_line(&self, line_number: i32) -> &str {
+ if (1..=self.lines.len() as i32).contains(&line_number) {
+ let line_number = line_number as usize;
+ let start = self.lines[line_number - 1];
+ let end = self.lines.get(line_number).copied().unwrap_or(
+ self.buffer[start..]
+ .find('\n')
+ .map(|ofs| ofs + start)
+ .unwrap_or(self.buffer.len()),
+ );
+ self.buffer[start..end].strip_newline()
+ } else {
+ ""
+ }
+ }
+ fn token_location(&self, range: RangeInclusive<&LexToken>) -> Location {
+ Location {
+ file_name: self.file_name.clone(),
+ span: Some(
+ self.offset_to_point(range.start().pos.start)
+ ..self.offset_to_point(range.end().pos.end),
+ ),
+ omit_underlines: false,
+ }
+ }
+}
+
+impl Default for SourceFile {
+ fn default() -> Self {
+ Self::new(String::new(), UTF_8, None)
+ }
+}
+
/// # Token pipeline
///
/// Tokens pass through a pipeline with the following stages. Each token
/// Error-handling mode.
error_handling: ErrorHandling,
- /// Encoding.
- #[allow(dead_code)]
- encoding: &'static Encoding,
-
- /// `None` if this reader is not associated with a file.
- file_name: Option<Arc<String>>,
-
- /// Source file contents.
- buffer: String,
+ file: SourceFile,
/// 0-based line number of the first line not yet written to the journal.
journal_line: usize,
/// Byte offset of first character not yet scanned as token.
seg_pos: usize,
- /// Byte offsets into `buffer` of starts of lines. The first element is 0.
- lines: Vec<usize>,
-
/// Tokens that need to pass through the macro preprocessor to end up in
/// `merge`.
pp: VecDeque<LexToken>,
fn default() -> Self {
Self {
error_handling: ErrorHandling::default(),
- encoding: UTF_8,
- file_name: None,
- buffer: String::new(),
+ file: SourceFile::default(),
journal_line: 0,
seg_pos: 0,
- lines: vec![0],
pp: VecDeque::new(),
merge: VecDeque::new(),
eof: false,
parse: Vec::new(),
parse_ofs: 0,
- segmenter: Segmenter::new(Mode::default(), false),
+ segmenter: Segmenter::new(Syntax::default(), false),
suppress_next_newline: false,
}
}
}
-impl Source {
- pub fn for_file<P>(
- path: P,
- encoding: Option<&'static Encoding>,
- syntax: Mode,
- error_handling: ErrorHandling,
- ) -> IoResult<Self>
- where
- P: AsRef<Path>,
- {
- let bytes = fs::read(path.as_ref())?;
- let encoding = encoding.unwrap_or_else(|| {
- let mut encoding_detector = EncodingDetector::new();
- encoding_detector.feed(&bytes, true);
- encoding_detector.guess(None, true)
- });
- let (contents, _malformed) = encoding.decode_with_bom_removal(&bytes);
- Ok(Self::for_file_contents(
- contents.to_string(),
- Some(path.as_ref().to_string_lossy().to_string()),
- encoding,
- syntax,
- error_handling,
- ))
+trait StripNewline {
+ fn strip_newline(&self) -> &str;
+}
+
+impl StripNewline for str {
+ fn strip_newline(&self) -> &str {
+ self.strip_suffix("\r\n")
+ .unwrap_or(self.strip_suffix('\n').unwrap_or(self))
}
+}
- pub fn for_file_contents(
- contents: String,
- file_name: Option<String>,
- encoding: &'static Encoding,
- syntax: Mode,
- error_handling: ErrorHandling,
- ) -> Self {
+impl Source {
+ pub fn new(file: SourceFile, syntax: Syntax, error_handling: ErrorHandling) -> Self {
Self {
- buffer: contents,
- file_name: file_name.map(Arc::new),
- encoding,
+ file,
error_handling,
segmenter: Segmenter::new(syntax, false),
- ..Self::default()
+ ..Source::default()
}
}
- pub fn for_string(contents: String, encoding: &'static Encoding) -> Self {
- Self {
- buffer: contents,
- encoding,
- ..Self::default()
- }
+ pub fn new_default(file: SourceFile) -> Self {
+ Self::new(file, Syntax::default(), ErrorHandling::default())
}
fn get_pp(&mut self, context: &Context) -> bool {
let Some((seg_len, seg_type)) = self
.segmenter
- .push(&self.buffer[self.seg_pos..], true)
+ .push(&self.file.buffer[self.seg_pos..], true)
.unwrap()
else {
return false;
let pos = self.seg_pos..self.seg_pos + seg_len;
self.seg_pos += seg_len;
- if seg_type == Segment::Newline {
- self.lines.push(self.seg_pos);
- }
- let scan_token = ScanToken::from_segment(&self.buffer[pos.clone()], seg_type);
+ let scan_token = ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type);
let n_lines = match (seg_type, self.suppress_next_newline) {
(Segment::EndCommand, false) => {
_ => 0,
};
for line_num in self.journal_line..self.journal_line + n_lines {
- let start_ofs = self.lines[line_num];
- let end_ofs = self
- .lines
- .get(line_num + 1)
- .copied()
- .unwrap_or(self.buffer.len());
- let line = &self.buffer[start_ofs..end_ofs];
- let _line = line
- .strip_suffix("\r\n")
- .unwrap_or(line.strip_suffix('\n').unwrap_or(line));
+ let _line = &self.file.get_line(line_num as i32).strip_newline();
// XXX submit the line as syntax
}
self.journal_line += n_lines;
- let pos = pos.start..pos.end;
match scan_token {
None => false,
Some(ScanToken::Token(token)) => {
Some(ScanToken::Error(error)) => {
(context.error)(
Location {
- file_name: self.file_name.clone(),
- span: Some(self.offset_to_point(pos.start)..self.offset_to_point(pos.end)),
+ file_name: self.file.file_name.clone(),
+ span: Some(
+ self.file.offset_to_point(pos.start)
+ ..self.file.offset_to_point(pos.end),
+ ),
omit_underlines: false,
},
error.into(),
}
fn get_merge(&mut self, context: &Context) -> bool {
- println!("{}:{}", file!(), line!());
if self.pp.is_empty() && !self.get_pp(context) {
return false;
}
- println!("{}:{} pp.len()={}", file!(), line!(), self.pp.len());
- for pp in &self.pp {
- println!("{:?}", &pp.token);
- }
if !Settings::global().macros.expand {
self.merge.append(&mut self.pp);
unreachable!();
}
let token = &self.pp[ofs];
- if parser.push(&token.token, &self.buffer[token.pos.clone()], &|e| {
+ if parser.push(&token.token, &self.file.buffer[token.pos.clone()], &|e| {
println!("{e:?}")
}) == ParseStatus::Complete
{
let c1 = &self.pp[call.len() - 1];
let mut expansion = Vec::new();
call.expand(
- self.segmenter.mode(),
- self.token_location(c0..=c1),
+ self.segmenter.syntax(),
+ self.file.token_location(c0..=c1),
&mut expansion,
|e| println!("{e:?}"),
);
}
}) {
Ok(Some(MergeResult::Copy)) => {
- println!("{}:{}", file!(), line!());
self.parse.push(self.merge.pop_front().unwrap());
return true;
}
},
});
self.merge.drain(..n);
- println!("{}:{}", file!(), line!());
return true;
}
Ok(None) => return false,
}
}
- fn offset_to_point(&self, offset: usize) -> Point {
- let line = self
- .lines
- .partition_point(|&line_start| line_start <= offset);
- Point {
- line: line as i32,
- column: Some(
- self.buffer
- .get(self.lines[line - 1]..offset)
- .unwrap_or_default()
- .width() as i32
- + 1,
- ),
- }
- }
-
- /// Returns the syntax for 1-based line-number `line_number`.
- fn get_line(&self, line_number: i32) -> &str {
- if (1..=self.lines.len() as i32).contains(&line_number) {
- let line_number = line_number as usize;
- let start = self.lines[line_number - 1];
- let end = self.lines.get(line_number).copied().unwrap_or(
- self.buffer[start..]
- .find('\n')
- .map(|ofs| ofs + start)
- .unwrap_or(self.buffer.len()),
- );
- let line = &self.buffer[start..end];
- line.strip_suffix("\r\n")
- .unwrap_or(line.strip_suffix('\n').unwrap_or(line))
- } else {
- ""
- }
- }
-
- fn token_location(&self, range: RangeInclusive<&LexToken>) -> Location {
- Location {
- file_name: self.file_name.clone(),
- span: Some(
- self.offset_to_point(range.start().pos.start)
- ..self.offset_to_point(range.end().pos.end),
- ),
- omit_underlines: false,
- }
- }
-
fn ofs_location(&self, range: RangeInclusive<usize>) -> Location {
if *range.start() <= *range.end() && *range.end() < self.parse.len() {
- self.token_location(&self.parse[*range.start()]..=&self.parse[*range.end()])
+ self.file
+ .token_location(&self.parse[*range.start()]..=&self.parse[*range.end()])
} else {
Location {
- file_name: self.file_name.clone(),
+ file_name: self.file.file_name.clone(),
span: None,
omit_underlines: false,
}
let token0 = &self.parse[*ofs.start()];
let token1 = &self.parse[*ofs.end()];
- Some(&self.buffer[token0.pos.start..token1.pos.end])
+ Some(&self.file.buffer[token0.pos.start..token1.pos.end])
}
fn is_empty(&self) -> bool {
- self.buffer.is_empty()
+ self.file.buffer.is_empty()
}
fn diagnostic(
(l0..=l1).collect()
};
for line_number in lines {
- source.push((line_number, self.get_line(line_number).to_string()));
+ source.push((line_number, self.file.get_line(line_number).to_string()));
}
}
macro_rep: Option<MacroRepresentation>,
}
+struct LexError {
+ error: ScanError,
+ pos: Range<usize>,
+}
+
impl Borrow<Token> for LexToken {
fn borrow(&self) -> &Token {
&self.token
}
}
+impl LexToken {
+ fn representation<'a>(&self, source: &'a SourceFile) -> &'a str {
+ &source.buffer[self.pos.clone()]
+ }
+}
+
struct MacroRepresentation {
/// An entire macro expansion.
expansion: Arc<String>,
}
while self.source.parse_ofs == self.source.parse.len() {
- println!("{}:{}", file!(), line!());
let context = Context {
macros: &self.macros,
error: &self.error,
};
- println!("{}:{}", file!(), line!());
if !self.source.get_parse(&context) {
- println!("{}:{}", file!(), line!());
if !self.pop_stack() {
- println!("{}:{}", file!(), line!());
return &Token::End;
}
}
- println!("{}:{}", file!(), line!());
}
self.source.token()
}
mod tests {
use encoding_rs::UTF_8;
- use crate::lex::{segment::Mode, token::Token};
+ use crate::lex::token::Token;
- use super::{ErrorHandling, Lexer, Source};
+ use super::{Lexer, Source, SourceFile};
#[test]
fn test() {
let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
- lexer.include(Source::for_string(
+ lexer.include(Source::new_default(SourceFile::for_string(
String::from(
r#"#! /usr/local/bin/pspp
DATA LIST LIST NOTABLE /a.
"#,
),
UTF_8,
- ));
+ )));
loop {
lexer.get();
let token = lexer.token();
#[test]
fn test_scan_errors() {
let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
- lexer.include(Source::for_file_contents(
+ lexer.include(Source::new_default(SourceFile::for_file_contents(
String::from(
r#"x'123'
x'1x'
),
Some(String::from("syntax.sps")),
UTF_8,
- Mode::default(),
- ErrorHandling::default(),
- ));
+ )));
loop {
lexer.get();
let token = lexer.token();
#[test]
fn test_null_byte() {
let mut lexer = Lexer::new(Box::new(|location, error| println!("{location}: {error}")));
- lexer.include(Source::for_file_contents(
+ lexer.include(Source::new_default(SourceFile::for_file_contents(
String::from(
"datA dist list notable file='input.txt'/a b c.
lis|.\0",
),
Some(String::from("syntax.sps")),
UTF_8,
- Mode::default(),
- ErrorHandling::default(),
- ));
+ )));
loop {
lexer.get();
let token = lexer.token();
}
}
}
+
+struct Tokens {
+ file: Arc<SourceFile>,
+ tokens: Vec<LexToken>,
+}
+
+impl Debug for Tokens {
+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+ write!(f, "Tokens {{ ")?;
+ for (index, token) in self.tokens.iter().enumerate() {
+ if index > 0 {
+ write!(f, ", ")?;
+ }
+ write!(f, "{:?}", token.representation(&self.file))?;
+ }
+ write!(f, " }}")
+ }
+}
+
+struct NewLexer<'a> {
+ backing: &'a Tokens,
+ tokens: &'a [LexToken],
+ pos: usize,
+}
+
+impl<'a> NewLexer<'a> {
+ pub fn new(backing: &'a Tokens) -> Self {
+ Self {
+ backing,
+ tokens: backing.tokens.as_slice(),
+ pos: 0,
+ }
+ }
+
+ pub fn get(&mut self) {
+ if !self.at_end() {
+ self.pos += 1;
+ }
+ }
+
+ pub fn at_end(&self) -> bool {
+ self.pos >= self.tokens.len()
+ }
+
+ pub fn match_(&mut self, token: &Token) -> bool {
+ if self.token() == token {
+ self.get();
+ true
+ } else {
+ false
+ }
+ }
+
+ pub fn token(&self) -> &Token {
+ self.tokens
+ .get(self.pos)
+ .map_or(&Token::End, |token| &token.token)
+ }
+
+ pub fn next(&self, ofs: isize) -> &Token {
+ ofs.checked_add(self.pos as isize)
+ .and_then(|index| usize::try_from(index).ok())
+ .and_then(|index| self.tokens.get(index))
+ .map_or(&Token::End, |token| &token.token)
+ }
+}
+
+struct NewSource {
+ file: Arc<SourceFile>,
+ segmenter: Segmenter,
+ seg_pos: usize,
+ lookahead: VecDeque<LexToken>,
+}
+
+impl NewSource {
+ pub fn new_default(file: SourceFile) -> Self {
+ Self::new(file, Syntax::default())
+ }
+
+ pub fn new(file: SourceFile, syntax: Syntax) -> Self {
+ Self {
+ file: Arc::new(file),
+ segmenter: Segmenter::new(syntax, false),
+ seg_pos: 0,
+ lookahead: VecDeque::new(),
+ }
+ }
+
+ pub fn read_command(&mut self, macros: &MacroSet) -> Option<Tokens> {
+ loop {
+ if let Some(end) = self
+ .lookahead
+ .iter()
+ .position(|token| token.token == Token::EndCommand)
+ {
+ return Some(Tokens {
+ file: self.file.clone(),
+ tokens: self.lookahead.drain(..=end).collect(),
+ });
+ }
+ if !self.read_lookahead(macros) {
+ return None;
+ }
+ }
+ }
+
+ pub fn read_lookahead(&mut self, macros: &MacroSet) -> bool {
+ let mut errors = Vec::new();
+ let mut pp = VecDeque::new();
+ while let Some((seg_len, seg_type)) = self
+ .segmenter
+ .push(&self.file.buffer[self.seg_pos..], true)
+ .unwrap()
+ {
+ let pos = self.seg_pos..self.seg_pos + seg_len;
+ self.seg_pos += seg_len;
+
+ match ScanToken::from_segment(&self.file.buffer[pos.clone()], seg_type) {
+ None => (),
+ Some(ScanToken::Token(token)) => {
+ let end = token == Token::EndCommand;
+ pp.push_back(LexToken {
+ token,
+ pos,
+ macro_rep: None,
+ });
+ if end {
+ break;
+ }
+ }
+ Some(ScanToken::Error(error)) => errors.push(LexError { error, pos }),
+ }
+ }
+ if pp.is_empty() {
+ return false;
+ }
+
+ let mut merge = if !Settings::global().macros.expand || macros.is_empty() {
+ take(&mut pp)
+ } else {
+ let mut merge = VecDeque::new();
+ while !pp.is_empty() {
+ self.expand_macro(macros, &mut pp, &mut merge);
+ }
+ merge
+ };
+
+ while let Ok(Some(result)) =
+ ScanToken::merge(|index| Ok(merge.get(index).map(|token| &token.token)))
+ {
+ match result {
+ MergeResult::Copy => self.lookahead.push_back(merge.pop_front().unwrap()),
+ MergeResult::Expand { n, token } => {
+ let first = &merge[0];
+ let last = &merge[n - 1];
+ self.lookahead.push_back(LexToken {
+ token,
+ pos: first.pos.start..last.pos.end,
+ macro_rep: match (&first.macro_rep, &last.macro_rep) {
+ (Some(a), Some(b)) if Arc::ptr_eq(&a.expansion, &b.expansion) => {
+ Some(MacroRepresentation {
+ expansion: a.expansion.clone(),
+ pos: *a.pos.start()..=*b.pos.end(),
+ })
+ }
+ _ => None,
+ },
+ });
+ merge.drain(..n);
+ }
+ }
+ }
+ true
+ }
+
+ fn expand_macro(
+ &self,
+ macros: &MacroSet,
+ src: &mut VecDeque<LexToken>,
+ dst: &mut VecDeque<LexToken>,
+ ) {
+ // Now pass tokens one-by-one to the macro expander.
+ let Some(mut parser) = Parser::new(macros, &src[0].token) else {
+ // Common case where there is no macro to expand.
+ dst.push_back(src.pop_front().unwrap());
+ return;
+ };
+ for ofs in 1.. {
+ let token = &src[ofs];
+ if parser.push(&token.token, &self.file.buffer[token.pos.clone()], &|e| {
+ println!("{e:?}")
+ }) == ParseStatus::Complete
+ {
+ break;
+ }
+ }
+ let call = parser.finish();
+ if call.len() == 0 {
+ // False alarm: no macro to expand after all.
+ dst.push_back(src.pop_front().unwrap());
+ return;
+ }
+
+ // Expand the tokens.
+ let c0 = &src[0];
+ let c1 = &src[call.len() - 1];
+ let mut expansion = Vec::new();
+ call.expand(
+ self.segmenter.syntax(),
+ self.file.token_location(c0..=c1),
+ &mut expansion,
+ |e| println!("{e:?}"),
+ );
+
+ if Settings::global().macros.print_expansions {
+ // XXX
+ }
+
+ // Append the macro expansion tokens to the lookahead.
+ let mut macro_rep = String::new();
+ let mut pos = Vec::with_capacity(expansion.len());
+ for [prefix, token] in macro_tokens_to_syntax(expansion.as_slice()) {
+ macro_rep.push_str(prefix);
+ let len = macro_rep.len();
+ pos.push(len..=len + token.len() - 1);
+ }
+ let macro_rep = Arc::new(macro_rep);
+ for (index, token) in expansion.into_iter().enumerate() {
+ let lt = LexToken {
+ token: token.token,
+ pos: c0.pos.start..c1.pos.end,
+ macro_rep: Some(MacroRepresentation {
+ expansion: Arc::clone(¯o_rep),
+ pos: pos[index].clone(),
+ }),
+ };
+ dst.push_back(lt);
+ }
+ src.drain(..call.len());
+ }
+}
+
+#[cfg(test)]
+mod new_lexer_tests {
+ use encoding_rs::UTF_8;
+
+ use crate::macros::MacroSet;
+
+ use super::{NewLexer, NewSource, Source, SourceFile};
+
+ #[test]
+ fn test() {
+ let code = r#"DATA LIST LIST /A * B * X * Y * .
+BEGIN DATA.
+2 3 4 5
+END DATA.
+
+CROSSTABS VARIABLES X (1,7) Y (1,7) /TABLES X BY Y.
+"#;
+ let file = SourceFile::for_file_contents(
+ String::from(code),
+ Some(String::from("crosstabs.sps")),
+ UTF_8,
+ );
+ let mut source = NewSource::new_default(file);
+ while let Some(tokens) = source.read_command(&MacroSet::new()) {
+ println!("{tokens:?}");
+ }
+ }
+}
use crate::{
identifier::Identifier,
lex::{
- segment::Mode,
+ segment::Syntax,
token::{Punct, Token},
},
};
}
#[track_caller]
-fn check_scan(input: &str, mode: Mode, expected: &[ScanToken]) {
+fn check_scan(input: &str, mode: Syntax, expected: &[ScanToken]) {
let tokens = StringScanner::new(input, mode, false).collect::<Vec<_>>();
if &tokens != expected {
WXYZ. /* unterminated end of line comment
�. /* U+FFFD is not valid in an identifier
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("a").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("aB").unwrap())),
andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
and. with.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Punct(Punct::And)),
ScanToken::Token(Token::Punct(Punct::Or)),
~&|=>=><=<~=<>(),-+*/[]**
% : ; ? _ ` { } ~
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Punct(Punct::Not)),
ScanToken::Token(Token::Punct(Punct::And)),
1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
. 1e e1 1e+ 1e-
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Number(0.0)),
ScanToken::Token(Token::Number(1.0)),
-/**/1
-. -1e -e1 -1e+ -1e- -1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Number(-0.0)),
ScanToken::Token(Token::Number(-1.0)),
"�あいうえお"
"abc"+U"FFFD"+u'3048'+"xyz"
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::String(String::from("x"))),
ScanToken::Token(Token::String(String::from("y"))),
r#"#! /usr/bin/pspp
#! /usr/bin/pspp
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("#").unwrap())),
ScanToken::Token(Token::Punct(Punct::Bang)),
next command.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::EndCommand),
ScanToken::Token(Token::EndCommand),
second paragraph.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("DOCUMENT").unwrap())),
ScanToken::Token(Token::String(String::from("DOCUMENT one line."))),
/**/ lab not quoted here either
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("FIL").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("label").unwrap())),
end data
.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("begin").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("data").unwrap())),
end
repeat.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
inner command
end repeat
"#,
- Mode::Batch,
+ Syntax::Batch,
&[
ScanToken::Token(Token::Id(Identifier::new("do").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("repeat").unwrap())),
fourth command.
fifth command.
"#,
- Mode::Batch,
+ Syntax::Batch,
&[
ScanToken::Token(Token::Id(Identifier::new("first").unwrap())),
ScanToken::Token(Token::Id(Identifier::new("command").unwrap())),
identifier::Identifier,
lex::{
scan::ScanToken,
- segment::Mode,
+ segment::Syntax,
token::{Punct, Token},
},
};
var1 var2 var3
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1() var1 var2 var3
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1()
var1 var2 var3!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
check_scan(
r#"define !macro1()var1 var2 var3!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1()
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1(a(), b(), c())
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
)
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
content 2
!enddefine.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1.
data list /x 1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
x.
data list /x 1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
x.
data list /x 1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
r#"define !macro1.
data list /x 1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
content line 1
content line 2
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
ScanToken::Token(Token::Id(Identifier::new("define").unwrap())),
ScanToken::Token(Token::String(String::from("!macro1"))),
use super::command_name::{command_match, COMMAND_NAMES};
-/// Segmentation mode.
+/// Syntax variant.
///
-/// PSPP syntax is written in one of two modes which are broadly defined as
-/// follows:
+/// PSPP syntax is written in one of two syntax variant which are broadly
+/// defined as follows:
///
-/// - In interactive mode, commands end with a period at the end of the line
+/// - In interactive syntax, commands end with a period at the end of the line
/// or with a blank line.
///
-/// - In batch mode, the second and subsequent lines of a command are indented
+/// - In batch syntax, the second and subsequent lines of a command are indented
/// from the left margin.
///
-/// The segmenter can also try to automatically detect the mode in use, using a
-/// heuristic that is usually correct.
+/// The segmenter can also try to automatically detect the kind of syntax in
+/// use, using a heuristic that is usually correct.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
-pub enum Mode {
+pub enum Syntax {
/// Try to interpret input correctly regardless of whether it is written
- /// for interactive or batch mode.
+ /// for interactive or batch syntax.
#[default]
Auto,
- /// Interactive syntax mode.
+ /// Interactive syntax.
Interactive,
- /// Batch syntax mode.
+ /// Batch syntax.
Batch,
}
pub struct Segmenter {
state: (State, Substate),
nest: u8,
- mode: Mode,
+ syntax: Syntax,
}
#[derive(Copy, Clone, Debug)]
pub struct Incomplete;
impl Segmenter {
- /// Returns a segmenter with the given syntax `mode`.
+ /// Returns a segmenter with the given `syntax`.
///
/// If `is_snippet` is false, then the segmenter will parse as if it's being
/// given a whole file. This means, for example, that it will interpret `-`
/// given an isolated piece of syntax. This means that, for example, that
/// it will interpret `-` or `+` at the beginning of the syntax as an
/// operator token or (if followed by a digit) as part of a number.
- pub fn new(mode: Mode, is_snippet: bool) -> Self {
+ pub fn new(syntax: Syntax, is_snippet: bool) -> Self {
Self {
state: if is_snippet {
(State::General, Substate::empty())
} else {
(State::Shbang, Substate::empty())
},
- mode,
+ syntax,
nest: 0,
}
}
- pub fn mode(&self) -> Mode {
- self.mode
+ pub fn syntax(&self) -> Syntax {
+ self.syntax
}
fn start_of_line(&self) -> bool {
/// Returns the style of command prompt to display to an interactive user
/// for input in the current state.. The return value is most accurate in
- /// mode `Mode::Interactive` and at the beginning of a line (that is, if
- /// [`Segmenter::push`] consumed as much as possible of the input up to a
- /// new-line).
+ /// with [Syntax::Interactive] syntax and at the beginning of a line (that
+ /// is, if [`Segmenter::push`] consumed as much as possible of the input up
+ /// to a new-line).
pub fn prompt(&self) -> PromptStyle {
match self.state.0 {
State::Shbang => PromptStyle::First,
self.push_rest(input, eof)
}
fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
- match self.mode {
- Mode::Auto => detect_command_name(input, eof),
- Mode::Interactive => Ok(false),
- Mode::Batch => Ok(true),
+ match self.syntax {
+ Syntax::Auto => detect_command_name(input, eof),
+ Syntax::Interactive => Ok(false),
+ Syntax::Batch => Ok(true),
}
}
fn parse_start_of_line<'a>(
mut input: &'a str,
eof: bool,
) -> Result<(&'a str, &'a str), Incomplete> {
- let mut sub = Segmenter::new(self.mode, true);
+ let mut sub = Segmenter::new(self.syntax, true);
loop {
let Some((seg_len, seg_type)) = sub.push(input, eof)? else {
return Ok((input, input));
eof: bool,
) -> Result<Option<(&'a str, Segment)>, Incomplete> {
let mut sub = Segmenter {
- mode: self.mode,
+ syntax: self.syntax,
state: (State::General, self.state.1),
nest: 0,
};
use crate::prompt::PromptStyle;
-use super::{Mode, Segment, Segmenter};
+use super::{Syntax, Segment, Segmenter};
fn push_segment<'a>(
segmenter: &mut Segmenter,
fn _check_segmentation(
mut input: &str,
- mode: Mode,
+ mode: Syntax,
expect_segments: &[(Segment, &str)],
expect_prompts: &[PromptStyle],
one_byte: bool,
fn check_segmentation(
input: &str,
- mode: Mode,
+ mode: Syntax,
expect_segments: &[(Segment, &str)],
expect_prompts: &[PromptStyle],
) {
#[allow(dead_code)]
fn print_segmentation(mut input: &str) {
- let mut segmenter = Segmenter::new(Mode::Interactive, false);
+ let mut segmenter = Segmenter::new(Syntax::Interactive, false);
while let Some((seg_len, seg_type)) = segmenter.push(input, true).unwrap() {
let (token, rest) = input.split_at(seg_len);
print!("{seg_type:?} {token:?}");
.x 1y _z
!abc abc!
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Identifier, "a"),
(Segment::Spaces, " "),
WXYZ. /* unterminated end of line comment
WxYz./* unterminated end of line comment
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Identifier, "abcd."),
(Segment::Spaces, " "),
andx orx notx eqx gex gtx lex ltx nex allx byx tox withx
and. with.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Identifier, "and"),
(Segment::Spaces, " "),
~&|=>=><=<~=<>(),-+*/[]**!*
% : ; ? _ ` { } ~ !*
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Punct, "~"),
(Segment::Spaces, " "),
1.23e1 45.6E-1 78.9e+1 99.9E+01 11.2e-03
. 1e e1 1e+ 1e- 1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Number, "0"),
(Segment::Spaces, " "),
-/**/1
-. -1e -e1 -1e+ -1e- -1.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Spaces, " "),
(Segment::Number, "-0"),
+ /* also a punctuator on blank line
- 'new command'
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::QuotedString, "'x'"),
(Segment::Spaces, " "),
title my title.
#! /usr/bin/pspp
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Shbang, "#! /usr/bin/pspp"),
(Segment::Newline, "\n"),
next command.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::CommentCommand, "* Comment commands \"don't"),
(Segment::Newline, "\n"),
second paragraph.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::StartDocument, ""),
(Segment::Document, "DOCUMENT one line."),
/**/ lab not quoted here either
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "FIL"),
(Segment::Spaces, " "),
begin data 123.
not data
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "begin"),
(Segment::Spaces, " "),
inner command.
end repeat.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "do"),
(Segment::Spaces, " "),
.map(|_| PromptStyle::DoRepeat)
.chain([PromptStyle::First, PromptStyle::First, PromptStyle::First])
.collect();
- check_segmentation(&s, Mode::Interactive, &expect_output, &expect_prompts);
+ check_segmentation(&s, Syntax::Interactive, &expect_output, &expect_prompts);
}
#[test]
inner command
end repeat
"#,
- Mode::Batch,
+ Syntax::Batch,
&[
(Segment::Identifier, "do"),
(Segment::Spaces, " "),
mod define {
use crate::{
- lex::segment::{Mode, Segment},
+ lex::segment::{Syntax, Segment},
prompt::PromptStyle,
};
var1 var2 var3 "!enddefine"
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1() var1 var2 var3 /* !enddefine
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1()
var1 var2 var3!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
check_segmentation(
r#"define !macro1()var1 var2 var3!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1()
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1(a(), b(), c())
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
)
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
content 2
!enddefine.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1.
data list /x 1.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
x.
data list /x 1.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
x.
data list /x 1.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
r#"define !macro1.
data list /x 1.
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
content line 1
content line 2
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
check_segmentation(
r#"define !macro1()
"#,
- Mode::Interactive,
+ Syntax::Interactive,
&[
(Segment::Identifier, "define"),
(Segment::Spaces, " "),
fourth command.
fifth command.
"#,
- Mode::Batch,
+ Syntax::Batch,
&[
(Segment::Identifier, "first"),
(Segment::Spaces, " "),
fourth command.
fifth command.
"#,
- Mode::Auto,
+ Syntax::Auto,
&[
(Segment::Identifier, "command"),
(Segment::Newline, "\n"),