decent segmentation tests
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 8 Jul 2024 02:41:09 +0000 (19:41 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 8 Jul 2024 02:41:09 +0000 (19:41 -0700)
rust/src/lex/segment.rs

index ccede6c47306de61dcb37c038347cdfbd7d204d6..de75bde5682ba5be954a9b260f69f0ef45be55aa 100644 (file)
@@ -586,7 +586,6 @@ impl Segmenter {
                 Ok((rest, Type::Punct))
             }
             _ => {
-                println!("unexpected {c:?} {:?}", c.is_whitespace());
                 self.state.1 = Substate::empty();
                 Ok((rest, Type::UnexpectedChar))
             }
@@ -695,7 +694,6 @@ impl Segmenter {
             } else if id_match_n("DEFINE", identifier, 6) {
                 self.state.0 = State::Define1;
             } else if id_match("FILE", identifier) {
-                println!("next={:?}", self.next_id_in_command(rest, eof)?.0);
                 if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
                     self.state = (State::FileLabel1, Substate::empty());
                     return Ok((rest, Type::Identifier));
@@ -707,17 +705,13 @@ impl Segmenter {
                 }
             } else if id_match("BEGIN", identifier) {
                 let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
-                println!("next_id={next_id:?}");
                 if id_match("DATA", next_id) {
-                    println!("{}:{}", file!(), line!());
                     let rest2 = skip_spaces_and_comments(rest2, eof)?;
-                    println!("{}:{} {rest2:?}", file!(), line!());
                     let rest2 = if let Some(s) = rest2.strip_prefix('.') {
                         skip_spaces_and_comments(s, eof)?
                     } else {
                         rest2
                     };
-                    println!("{}:{}", file!(), line!());
                     if is_end_of_line(rest2, eof)? {
                         let s = &input[..input.len() - rest2.len()];
                         self.state = (
@@ -728,10 +722,8 @@ impl Segmenter {
                             },
                             Substate::empty(),
                         );
-                        println!("{}:{}", file!(), line!());
                         return Ok((rest, Type::Identifier));
                     }
-                    println!("{}:{}", file!(), line!());
                 }
             }
         }
@@ -1119,7 +1111,7 @@ impl Segmenter {
                 // parsing.
                 self.state.0 = State::General;
             }
-            Type::Punct if rest.starts_with('(') => {
+            Type::Punct if input.starts_with('(') => {
                 self.state.0 = State::Define3;
                 self.nest = 1;
             }
@@ -1140,10 +1132,10 @@ impl Segmenter {
                 // parsing.
                 self.state.0 = State::General;
             }
-            Type::Punct if rest.starts_with('(') => {
+            Type::Punct if input.starts_with('(') => {
                 self.nest += 1;
             }
-            Type::Punct if rest.starts_with(')') => {
+            Type::Punct if input.starts_with(')') => {
                 self.nest -= 1;
                 if self.nest == 0 {
                     self.state = (State::Define4, Substate::empty());
@@ -1171,6 +1163,7 @@ impl Segmenter {
             }
         }
     }
+
     /// We are in the body of a macro definition, looking for additional lines
     /// of the body or `!ENDDEFINE`.
     ///
@@ -1187,17 +1180,20 @@ impl Segmenter {
         if let Some(end) = Self::find_enddefine(line) {
             // Macro ends at the !ENDDEFINE on this line.
             self.state = (State::General, Substate::empty());
-            let prefix = &input[..input.len() - end.len()];
+            let (prefix, rest) = input.split_at(line.len() - end.len());
             if prefix.is_empty() {
                 // Line starts with `!ENDDEFINE`.
                 self.push(input, eof)
-            } else if input.trim().is_empty() {
+            } else if prefix.trim_start().is_empty() {
                 // Line starts with spaces followed by `!ENDDEFINE`.
-                Ok((end, Type::Spaces))
+                Ok((rest, Type::Spaces))
             } else {
                 // Line starts with some content followed by `!ENDDEFINE`.
-                Ok((end, Type::MacroBody))
+                Ok((rest, Type::MacroBody))
             }
+        } else if line.is_empty() {
+            // Entirely blank line.
+            self.parse_define_6(input, eof)
         } else {
             // No `!ENDDEFINE`.  We have a full line of macro body.
             //
@@ -1207,7 +1203,7 @@ impl Segmenter {
             //
             // However, if the first line of the macro body is blank, we just
             // report it as spaces because it's not significant.
-            let type_ = if self.state.0 == State::Define4 && line.trim().is_empty() {
+            let type_ = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
                 Type::Spaces
             } else {
                 Type::MacroBody
@@ -1328,7 +1324,8 @@ mod test {
         let mut segmenter = Segmenter::new(Mode::Auto, false);
         loop {
             let (rest, type_) = segmenter.push(input, true).unwrap();
-            let token = &input[..input.len() - rest.len()];
+            let len = input.len() - rest.len();
+            let token = &input[..len];
             println!("{type_:?} {token:?}");
             if type_ == Type::End {
                 break;
@@ -1543,9 +1540,10 @@ not data
         );
     }
 
-        #[test]
+    #[test]
     fn test_do_repeat() {
-        print_segmentation(r#"do repeat x=a b c
+        print_segmentation(
+            r#"do repeat x=a b c
           y=d e f.
   do repeat a=1 thru 5.
 another command.
@@ -1558,7 +1556,156 @@ do
   repeat #a=1.
   inner command.
 end repeat.
-"#);
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_simple() {
+        print_segmentation(
+            r#"define !macro1()
+var1 var2 var3 "!enddefine"
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_no_newline_after_parentheses() {
+        print_segmentation(
+            r#"define !macro1() var1 var2 var3 /* !enddefine
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_no_newline_before_enddefine() {
+        print_segmentation(
+            r#"define !macro1()
+var1 var2 var3!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_all_on_one_line() {
+        print_segmentation(
+            r#"define !macro1()var1 var2 var3!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_empty() {
+        print_segmentation(
+            r#"define !macro1()
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_blank_lines() {
+        print_segmentation(
+            r#"define !macro1()
+
+
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_arguments() {
+        print_segmentation(
+            r#"define !macro1(a(), b(), c())
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_multiline_arguments() {
+        print_segmentation(
+            r#"define !macro1(
+  a(), b(
+  ),
+  c()
+)
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_arguments_start_on_second_line() {
+        print_segmentation(
+            r#"define !macro1
+(x,y,z
+)
+content 1
+content 2
+!enddefine.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_1() {
+        print_segmentation(
+            r#"define !macro1.
+data list /x 1.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_2() {
+        print_segmentation(
+            r#"define !macro1
+x.
+data list /x 1.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_3() {
+        print_segmentation(
+            r#"define !macro1(.
+x.
+data list /x 1.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_early_end_of_command_4() {
+        // Notice the command terminator at the end of the `DEFINE` command,
+        // which should not be there and ends it early.
+        print_segmentation(
+            r#"define !macro1.
+data list /x 1.
+"#,
+        );
+    }
+
+    #[test]
+    fn test_define_missing_enddefine() {
+        print_segmentation(
+            r#"define !macro1()
+content line 1
+content line 2
+"#,
+        );
     }
 
+    #[test]
+    fn test_define_missing_enddefine_2() {
+        print_segmentation(
+            r#"define !macro1()
+"#,
+        );
+    }
 }