X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fsegment.c;h=346910898ce3898b4bec6c62563a1a6f8dfe10bd;hb=e86d3e8623564b379e6097a3df9e7232e8087160;hp=0d83257959f6b2d6818c01ff03c759b63eaea02a;hpb=95a5547f4e99d58f09395172448cb944ac3a23f0;p=pspp

diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c
index 0d83257959..346910898c 100644
--- a/src/language/lexer/segment.c
+++ b/src/language/lexer/segment.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+   Copyright (C) 2010, 2011, 2013, 2016 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -38,88 +38,112 @@ enum segmenter_state
     S_DOCUMENT_1,
     S_DOCUMENT_2,
     S_DOCUMENT_3,
-    S_FILE_LABEL,
+    S_FILE_LABEL_1,
+    S_FILE_LABEL_2,
+    S_FILE_LABEL_3,
     S_DO_REPEAT_1,
     S_DO_REPEAT_2,
     S_DO_REPEAT_3,
+    S_DEFINE_1,
+    S_DEFINE_2,
+    S_DEFINE_3,
+    S_DEFINE_4,
+    S_DEFINE_5,
     S_BEGIN_DATA_1,
     S_BEGIN_DATA_2,
     S_BEGIN_DATA_3,
     S_BEGIN_DATA_4,
-    S_TITLE_1,
-    S_TITLE_2
   };
 
 #define SS_START_OF_LINE (1u << 0)
 #define SS_START_OF_COMMAND (1u << 1)
 
 static int segmenter_detect_command_name__ (const char *input,
-                                            size_t n, int ofs);
+                                            size_t n, bool eof, int ofs);
 
 static int
-segmenter_u8_to_uc__ (ucs4_t *puc, const char *input_, size_t n)
+segmenter_u8_to_uc__ (ucs4_t *puc, const char *input_, size_t n, bool eof,
+                      size_t ofs)
 {
   const uint8_t *input = CHAR_CAST (const uint8_t *, input_);
   int mblen;
 
-  assert (n > 0);
+  assert (n > ofs);
+
+  input += ofs;
+  n -= ofs;
 
   mblen = u8_mbtoucr (puc, input, n);
-  return (mblen >= 0 ? mblen
-          : mblen == -2 ? -1
-          : u8_mbtouc (puc, input, n));
+  if (mblen >= 0)
+    return mblen;
+  else if (mblen != -2)
+    return u8_mbtouc (puc, input, n);
+  else if (eof)
+    {
+      *puc = 0xfffd;
+      return n;
+    }
+  else
+    return -1;
 }
 
 static int
 segmenter_parse_shbang__ (struct segmenter *s, const char *input, size_t n,
-                          enum segment_type *type)
+                          bool eof, enum segment_type *type)
 {
   if (input[0] == '#')
     {
-      if (n < 2)
-        return -1;
-      else if (input[1] == '!')
+      if (n >= 2)
         {
-          int ofs;
-
-          for (ofs = 2; ofs < n; ofs++)
-            if (input[ofs] == '\n')
-              {
-                if (input[ofs - 1] == '\r')
-                  ofs--;
-
-                s->state = S_GENERAL;
-                s->substate = SS_START_OF_COMMAND;
-                *type = SEG_SHBANG;
-                return ofs;
-              }
+          if (input[1] == '!')
+            {
+              for (int ofs = 2; ; ofs++)
+                {
+                  if (ofs >= n)
+                    {
+                      if (!eof)
+                        return -1;
+                    }
+                  else if (input[ofs] == '\n')
+                    {
+                      if (input[ofs - 1] == '\r')
+                        ofs--;
+                    }
+                  else
+                    continue;
 
-          return -1;
+                  s->state = S_GENERAL;
+                  s->substate = SS_START_OF_COMMAND;
+                  *type = SEG_SHBANG;
+                  return ofs;
+                }
+            }
         }
+      else if (!eof)
+        return -1;
     }
 
   s->state = S_GENERAL;
   s->substate = SS_START_OF_LINE | SS_START_OF_COMMAND;
-  return segmenter_push (s, input, n, type);
+  return segmenter_push (s, input, n, eof, type);
 }
 
 static int
 segmenter_parse_digraph__ (const char *seconds, struct segmenter *s,
-                           const char *input, size_t n,
+                           const char *input, size_t n, bool eof,
                            enum segment_type *type)
 {
   assert (s->state == S_GENERAL);
 
-  if (n < 2)
-    return -1;
-
   *type = SEG_PUNCT;
   s->substate = 0;
-  return input[1] != '\0' && strchr (seconds, input[1]) != NULL ? 2 : 1;
+  return (n < 2
+          ? (eof ? 1 : -1)
+          : (strchr (seconds, input[1]) != NULL ? 2 : 1));
 }
 
 static int
-skip_comment (const char *input, size_t n, size_t ofs)
+skip_comment (const char *input, size_t n, bool eof, size_t ofs)
 {
   for (; ofs < n; ofs++)
     {
@@ -128,34 +152,34 @@ skip_comment (const char *input, size_t n, size_t ofs)
       else if (input[ofs] == '*')
         {
           if (ofs + 1 >= n)
-            return -1;
+            return eof ? ofs + 1 : -1;
           else if (input[ofs + 1] == '/')
             return ofs + 2;
         }
     }
-  return -1;
+  return eof ? ofs : -1;
 }
 
 static int
-skip_spaces_and_comments (const char *input, size_t n, int ofs)
+skip_spaces_and_comments (const char *input, size_t n, bool eof, int ofs)
 {
   while (ofs < n)
     {
       ucs4_t uc;
       int mblen;
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
 
       if (uc == '/')
         {
           if (ofs + 1 >= n)
-            return -1;
+            return eof ? ofs : -1;
           else if (input[ofs + 1] != '*')
             return ofs;
 
-          ofs = skip_comment (input, n, ofs + 2);
+          ofs = skip_comment (input, n, eof, ofs + 2);
           if (ofs < 0)
             return -1;
         }
@@ -165,18 +189,20 @@ skip_spaces_and_comments (const char *input, size_t n, int ofs)
         return ofs;
     }
 
-  return -1;
+  return eof ? ofs : -1;
 }
 
 static int
-is_end_of_line (const char *input, size_t n, int ofs)
+is_end_of_line (const char *input, size_t n, bool eof, int ofs)
 {
-  if (input[ofs] == '\n')
+  if (ofs >= n)
+    return eof ? 1 : -1;
+  else if (input[ofs] == '\n')
     return 1;
   else if (input[ofs] == '\r')
     {
       if (ofs + 1 >= n)
-        return -1;
+        return eof ? 1 : -1;
       return input[ofs + 1] == '\n';
     }
   else
@@ -184,18 +210,33 @@ is_end_of_line (const char *input, size_t n, int ofs)
 }
 
 static int
-at_end_of_line (const char *input, size_t n, int ofs)
+at_end_of_line (const char *input, size_t n, bool eof, int ofs)
 {
-  ofs = skip_spaces_and_comments (input, n, ofs);
+  ofs = skip_spaces_and_comments (input, n, eof, ofs);
   if (ofs < 0)
     return -1;
 
-  return is_end_of_line (input, n, ofs);
+  return is_end_of_line (input, n, eof, ofs);
 }
 
+static bool
+is_all_spaces (const char *input_, size_t n)
+{
+  const uint8_t *input = CHAR_CAST (const uint8_t *, input_);
+
+  int mblen;
+  for (int ofs = 0; ofs < n; ofs += mblen)
+    {
+      ucs4_t uc;
+      mblen = u8_mbtouc (&uc, input + ofs, n - ofs);
+      if (!lex_uc_is_space (uc))
+        return false;
+    }
+  return true;
+}
 
 static int
-segmenter_parse_newline__ (const char *input, size_t n,
+segmenter_parse_newline__ (const char *input, size_t n, bool eof,
                            enum segment_type *type)
 {
   int ofs;
@@ -205,7 +246,10 @@ segmenter_parse_newline__ (const char *input, size_t n,
   else
     {
       if (n < 2)
-        return -1;
+        {
+          assert (!eof);
+          return -1;
+        }
 
       assert (input[0] == '\r');
       assert (input[1] == '\n');
@@ -217,14 +261,14 @@ segmenter_parse_newline__ (const char *input, size_t n,
 }
 
 static int
-skip_spaces (const char *input, size_t n, size_t ofs)
+skip_spaces (const char *input, size_t n, bool eof, size_t ofs)
 {
   while (ofs < n)
     {
       ucs4_t uc;
       int mblen;
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
 
@@ -234,76 +278,97 @@ skip_spaces (const char *input, size_t n, size_t ofs)
       ofs += mblen;
     }
 
-  return -1;
+  return eof ? ofs : -1;
 }
 
 static int
-skip_digits (const char *input, size_t n, int ofs)
+skip_digits (const char *input, size_t n, bool eof, int ofs)
 {
   for (; ofs < n; ofs++)
     if (!c_isdigit (input[ofs]))
       return ofs;
-  return -1;
+  return eof ? ofs : -1;
 }
 
 static int
 segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n,
-                          enum segment_type *type)
+                          bool eof, enum segment_type *type, int ofs)
 {
-  int ofs;
-
   assert (s->state == S_GENERAL);
 
-  ofs = skip_digits (input, n, 0);
+  ofs = skip_digits (input, n, eof, ofs);
   if (ofs < 0)
     return -1;
 
+  if (ofs >= n)
+    {
+      if (!eof)
+        return -1;
+      goto number;
+    }
   if (input[ofs] == '.')
     {
-      ofs = skip_digits (input, n, ofs + 1);
+      if (ofs + 1 >= n)
+        {
+          if (!eof)
+            return -1;
+          goto number;
+        }
+
+      ofs = skip_digits (input, n, eof, ofs + 1);
       if (ofs < 0)
         return -1;
+      else if (ofs >= n)
+        goto number;
     }
 
-  if (ofs >= n)
-    return -1;
   if (input[ofs] == 'e' || input[ofs] == 'E')
     {
       ofs++;
       if (ofs >= n)
-        return -1;
+        {
+          if (!eof)
+            return -1;
+          goto expected_exponent;
+        }
 
       if (input[ofs] == '+' || input[ofs] == '-')
         {
           ofs++;
           if (ofs >= n)
-            return -1;
+            {
+              if (!eof)
+                return -1;
+              goto expected_exponent;
+            }
         }
 
       if (!c_isdigit (input[ofs]))
-        {
-          *type = SEG_EXPECTED_EXPONENT;
-          s->substate = 0;
-          return ofs;
-        }
+        goto expected_exponent;
 
-      ofs = skip_digits (input, n, ofs);
+      ofs = skip_digits (input, n, eof, ofs);
       if (ofs < 0)
         return -1;
     }
 
   if (input[ofs - 1] == '.')
     {
-      int eol = at_end_of_line (input, n, ofs);
+      int eol = at_end_of_line (input, n, eof, ofs);
       if (eol < 0)
         return -1;
       else if (eol)
         ofs--;
     }
 
+number:
   *type = SEG_NUMBER;
   s->substate = 0;
   return ofs;
+
+expected_exponent:
+  *type = SEG_EXPECTED_EXPONENT;
+  s->substate = 0;
+  return ofs;
 }
 
 static bool
@@ -344,7 +409,7 @@ is_reserved_word (const char *s, int n)
 
 static int
 segmenter_parse_comment_1__ (struct segmenter *s,
-                             const char *input, size_t n,
+                             const char *input, size_t n, bool eof,
                              enum segment_type *type)
 {
   int endcmd;
@@ -357,7 +422,7 @@ segmenter_parse_comment_1__ (struct segmenter *s,
       ucs4_t uc;
       int mblen;
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
 
@@ -370,7 +435,6 @@ segmenter_parse_comment_1__ (struct segmenter *s,
         case '\n':
           if (ofs > 1 && input[ofs - 1] == '\r')
             ofs--;
-
           if (endcmd == -2)
             {
               /* Blank line ends comment command. */
@@ -404,50 +468,66 @@ segmenter_parse_comment_1__ (struct segmenter *s,
 
       ofs += mblen;
     }
+
+  if (eof)
+    {
+      /* End of file. */
+      s->state = S_GENERAL;
+      s->substate = SS_START_OF_COMMAND;
+      *type = SEG_SEPARATE_COMMANDS;
+      return ofs;
+    }
+
   return -1;
 }
 
 static int
-segmenter_parse_comment_2__ (struct segmenter *s, const char *input, size_t n,
-                             enum segment_type *type)
+segmenter_parse_comment_2__ (struct segmenter *s, const char *input,
+                             size_t n, bool eof, enum segment_type *type)
 {
-  int new_cmd;
-  ucs4_t uc;
-  int mblen;
-  int ofs;
-
-  ofs = segmenter_parse_newline__ (input, n, type);
-  if (ofs < 0 || ofs >= n)
-    return -1;
-
-  mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
-  if (mblen < 0)
+  int ofs = segmenter_parse_newline__ (input, n, eof, type);
+  if (ofs < 0)
     return -1;
 
-  if (uc == '+' || uc == '-' || uc == '.')
-    new_cmd = true;
-  else if (!lex_uc_is_space (uc))
-    switch (s->mode)
-      {
-      case SEG_MODE_INTERACTIVE:
-        new_cmd = false;
-        break;
+  int new_cmd;
+  if (ofs >= n)
+    {
+      if (!eof)
+        return -1;
+      new_cmd = false;
+    }
+  else
+    {
+      ucs4_t uc;
+      int mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
+      if (mblen < 0)
+        return -1;
 
-      case SEG_MODE_BATCH:
+      if (uc == '+' || uc == '-' || uc == '.')
         new_cmd = true;
-        break;
+      else if (!lex_uc_is_space (uc))
+        switch (s->mode)
+          {
+          case SEG_MODE_INTERACTIVE:
+            new_cmd = false;
+            break;
 
-      case SEG_MODE_AUTO:
-        new_cmd = segmenter_detect_command_name__ (input, n, ofs);
-        if (new_cmd < 0)
-          return -1;
-        break;
+          case SEG_MODE_BATCH:
+            new_cmd = true;
+            break;
 
-      default:
-        NOT_REACHED ();
-      }
-  else
-    new_cmd = false;
+          case SEG_MODE_AUTO:
+            new_cmd = segmenter_detect_command_name__ (input, n, eof, ofs);
+            if (new_cmd < 0)
+              return -1;
+            break;
+
+          default:
+            NOT_REACHED ();
+          }
+      else
+        new_cmd = false;
+    }
 
   if (new_cmd)
     {
@@ -461,7 +541,7 @@ segmenter_parse_comment_2__ (struct segmenter *s, const char *input, size_t n,
 
 static int
 segmenter_parse_document_1__ (struct segmenter *s, const char *input, size_t n,
-                              enum segment_type *type)
+                              bool eof, enum segment_type *type)
 {
   bool end_cmd;
   int ofs;
@@ -473,7 +553,7 @@ segmenter_parse_document_1__ (struct segmenter *s, const char *input, size_t n,
       ucs4_t uc;
       int mblen;
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
 
@@ -499,16 +579,22 @@ segmenter_parse_document_1__ (struct segmenter *s, const char *input, size_t n,
 
       ofs += mblen;
     }
+  if (eof)
+    {
+      *type = SEG_DOCUMENT;
+      s->state = S_DOCUMENT_3;
+      return ofs;
+    }
   return -1;
 }
 
 static int
 segmenter_parse_document_2__ (struct segmenter *s, const char *input, size_t n,
-                              enum segment_type *type)
+                              bool eof, enum segment_type *type)
 {
   int ofs;
 
-  ofs = segmenter_parse_newline__ (input, n, type);
+  ofs = segmenter_parse_newline__ (input, n, eof, type);
   if (ofs < 0)
     return -1;
 
@@ -526,22 +612,27 @@ segmenter_parse_document_3__ (struct segmenter *s, enum segment_type *type)
 }
 
 static int
-segmenter_unquoted (const char *input, size_t n, int ofs)
+segmenter_unquoted (const char *input, size_t n, bool eof, int ofs)
 
 {
-  char c;
-
-  ofs = skip_spaces_and_comments (input, n, ofs);
+  ofs = skip_spaces_and_comments (input, n, eof, ofs);
   if (ofs < 0)
     return -1;
-
-  c = input[ofs];
-  return c != '\'' && c != '"' && c != '\n' && c != '\0';
+  else if (ofs < n)
+    {
+      char c = input[ofs];
+      return c != '\'' && c != '"' && c != '\n';
+    }
+  else
+    {
+      assert (eof);
+      return 0;
+    }
 }
 
 static int
 next_id_in_command (const struct segmenter *s, const char *input, size_t n,
-                    int ofs, char id[], size_t id_size)
+                    bool eof, int ofs, char id[], size_t id_size)
 {
   struct segmenter sub;
 
@@ -555,7 +646,7 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n,
       enum segment_type type;
       int retval;
 
-      retval = segmenter_push (&sub, input + ofs, n - ofs, &type);
+      retval = segmenter_push (&sub, input + ofs, n - ofs, eof, &type);
       if (retval < 0)
         {
           id[0] = '\0';
@@ -589,6 +680,9 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n,
         case SEG_COMMENT_COMMAND:
         case SEG_DO_REPEAT_COMMAND:
         case SEG_INLINE_DATA:
+        case SEG_MACRO_ID:
+        case SEG_MACRO_NAME:
+        case SEG_MACRO_BODY:
         case SEG_START_DOCUMENT:
         case SEG_DOCUMENT:
         case SEG_START_COMMAND:
@@ -597,25 +691,23 @@ next_id_in_command (const struct segmenter *s, const char *input, size_t n,
         case SEG_END:
         case SEG_EXPECTED_QUOTE:
         case SEG_EXPECTED_EXPONENT:
-        case SEG_UNEXPECTED_DOT:
         case SEG_UNEXPECTED_CHAR:
           id[0] = '\0';
           return ofs + retval;
-
-        case SEG_N_TYPES:
-          NOT_REACHED ();
         }
       ofs += retval;
     }
 }
 
+/* Called when INPUT begins with a character that can start off an ID token. */
 static int
 segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
-                      enum segment_type *type)
+                      bool eof, enum segment_type *type)
 {
   ucs4_t uc;
   int ofs;
 
+  assert (n > 0);
   assert (s->state == S_GENERAL);
 
   ofs = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, input), n);
@@ -624,9 +716,13 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
       int mblen;
 
       if (ofs >= n)
-        return -1;
+        {
+          if (eof)
+            break;
+          return -1;
+        }
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
       else if (!lex_uc_is_idn (uc))
@@ -637,17 +733,16 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
 
   if (input[ofs - 1] == '.')
     {
-      int eol = at_end_of_line (input, n, ofs);
+      int eol = at_end_of_line (input, n, eof, ofs);
       if (eol < 0)
         return -1;
       else if (eol)
         ofs--;
     }
 
-  if (is_reserved_word (input, ofs))
-    *type = SEG_RESERVED_WORD;
-  else
-    *type = SEG_IDENTIFIER;
+  *type = (is_reserved_word (input, ofs) ? SEG_RESERVED_WORD
+           : input[0] == '!' ? SEG_MACRO_ID
+           : SEG_IDENTIFIER);
 
   if (s->substate & SS_START_OF_COMMAND)
     {
@@ -656,7 +751,7 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
       if (lex_id_match_n (ss_cstr ("COMMENT"), word, 4))
         {
           s->state = S_COMMENT_1;
-          return segmenter_parse_comment_1__ (s, input, n, type);
+          return segmenter_parse_comment_1__ (s, input, n, eof, type);
         }
       else if (lex_id_match (ss_cstr ("DOCUMENT"), word))
         {
@@ -664,27 +759,20 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
           *type = SEG_START_DOCUMENT;
           return 0;
         }
-      else if (lex_id_match (ss_cstr ("TITLE"), word)
-               || lex_id_match (ss_cstr ("SUBTITLE"), word))
+      else if (lex_id_match_n (ss_cstr ("DEFINE"), word, 6))
         {
-          int result = segmenter_unquoted (input, n, ofs);
-          if (result < 0)
-            return -1;
-          else if (result)
-            {
-              s->state = S_TITLE_1;
-              return ofs;
-            }
+          s->state = S_DEFINE_1;
+          return ofs;
         }
       else if (lex_id_match (ss_cstr ("FILE"), word))
         {
           char id[16];
 
-          if (next_id_in_command (s, input, n, ofs, id, sizeof id) < 0)
+          if (next_id_in_command (s, input, n, eof, ofs, id, sizeof id) < 0)
             return -1;
           else if (lex_id_match (ss_cstr ("LABEL"), ss_cstr (id)))
             {
-              s->state = S_FILE_LABEL;
+              s->state = S_FILE_LABEL_1;
               s->substate = 0;
               return ofs;
             }
@@ -693,7 +781,7 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
         {
           char id[16];
 
-          if (next_id_in_command (s, input, n, ofs, id, sizeof id) < 0)
+          if (next_id_in_command (s, input, n, eof, ofs, id, sizeof id) < 0)
             return -1;
           else if (lex_id_match (ss_cstr ("REPEAT"), ss_cstr (id)))
             {
@@ -707,25 +795,27 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
           char id[16];
           int ofs2;
 
-          ofs2 = next_id_in_command (s, input, n, ofs, id, sizeof id);
+          ofs2 = next_id_in_command (s, input, n, eof, ofs, id, sizeof id);
           if (ofs2 < 0)
             return -1;
           else if (lex_id_match (ss_cstr ("DATA"), ss_cstr (id)))
             {
               int eol;
 
-              ofs2 = skip_spaces_and_comments (input, n, ofs2);
+              ofs2 = skip_spaces_and_comments (input, n, eof, ofs2);
               if (ofs2 < 0)
                 return -1;
 
-              if (input[ofs2] == '.')
+              if (ofs2 >= n)
+                assert (eof);
+              else if (input[ofs2] == '.')
                 {
-                  ofs2 = skip_spaces_and_comments (input, n, ofs2 + 1);
+                  ofs2 = skip_spaces_and_comments (input, n, eof, ofs2 + 1);
                   if (ofs2 < 0)
                     return -1;
                 }
 
-              eol = is_end_of_line (input, n, ofs2);
+              eol = is_end_of_line (input, n, eof, ofs2);
               if (eol < 0)
                 return -1;
               else if (eol)
@@ -748,7 +838,8 @@ segmenter_parse_id__ (struct segmenter *s, const char *input, size_t n,
 static int
 segmenter_parse_string__ (enum segment_type string_type,
                           int ofs, struct segmenter *s,
-                          const char *input, size_t n, enum segment_type *type)
+                          const char *input, size_t n, bool eof,
+                          enum segment_type *type)
 {
   int quote = input[ofs];
 
@@ -757,46 +848,57 @@ segmenter_parse_string__ (enum segment_type string_type,
     if (input[ofs] == quote)
       {
         ofs++;
-        if (ofs >= n)
-          return -1;
-        else if (input[ofs] == quote)
-          ofs++;
-        else
+        if (ofs < n)
           {
-            *type = string_type;
-            s->substate = 0;
-            return ofs;
+            if (input[ofs] == quote)
+              {
+                ofs++;
+                continue;
+              }
           }
-      }
-    else if (input[ofs] == '\n' || input[ofs] == '\0')
-      {
-        *type = SEG_EXPECTED_QUOTE;
+        else if (!eof)
+          return -1;
+
+        *type = string_type;
         s->substate = 0;
         return ofs;
       }
+    else if (input[ofs] == '\n')
+      goto expected_quote;
     else
       ofs++;
 
+  if (eof)
+    goto expected_quote;
+
   return -1;
+
+expected_quote:
+  *type = SEG_EXPECTED_QUOTE;
+  s->substate = 0;
+  return ofs;
 }
 
 static int
 segmenter_maybe_parse_string__ (enum segment_type string_type,
                                 struct segmenter *s,
-                                const char *input, size_t n,
+                                const char *input, size_t n, bool eof,
                                 enum segment_type *type)
 {
   if (n < 2)
-    return -1;
+    {
+      if (!eof)
+        return -1;
+    }
   else if (input[1] == '\'' || input[1] == '"')
-    return segmenter_parse_string__ (string_type, 1, s, input, n, type);
-  else
-    return segmenter_parse_id__ (s, input, n, type);
+    return segmenter_parse_string__ (string_type, 1, s, input, n, eof, type);
+
+  return segmenter_parse_id__ (s, input, n, eof, type);
 }
 
 static int
 segmenter_parse_mid_command__ (struct segmenter *s,
-                               const char *input, size_t n,
+                               const char *input, size_t n, bool eof,
                                enum segment_type *type)
 {
   ucs4_t uc;
@@ -806,7 +908,7 @@ segmenter_parse_mid_command__ (struct segmenter *s,
   assert (s->state == S_GENERAL);
   assert (!(s->substate & SS_START_OF_LINE));
 
-  mblen = segmenter_u8_to_uc__ (&uc, input, n);
+  mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, 0);
   if (mblen < 0)
     return -1;
 
@@ -818,26 +920,44 @@ segmenter_parse_mid_command__ (struct segmenter *s,
       return 1;
 
     case '/':
-      if (n == 1)
-        return -1;
+      if (n < 2)
+        {
+          if (!eof)
+            return -1;
+        }
       else if (input[1] == '*')
         {
-          ofs = skip_comment (input, n, 2);
+          ofs = skip_comment (input, n, eof, 2);
           if (ofs < 0)
             return -1;
 
           *type = SEG_COMMENT;
           return ofs;
         }
-      else
+
+      s->substate = 0;
+      *type = SEG_PUNCT;
+      return 1;
+
+    case '-':
+      ofs = skip_spaces (input, n, eof, 1);
+      if (ofs < 0)
+        return -1;
+      else if (ofs < n && c_isdigit (input[ofs]))
+        return segmenter_parse_number__ (s, input, n, eof, type, ofs);
+      else if (ofs < n && input[ofs] == '.')
         {
-          s->substate = 0;
-          *type = SEG_PUNCT;
-          return 1;
+          if (ofs + 1 >= n)
+            {
+              if (!eof)
+                return -1;
+            }
+          else if (c_isdigit (input[ofs + 1]))
+            return segmenter_parse_number__ (s, input, n, eof, type, ofs);
         }
-
-    case '(': case ')': case ',': case '=': case '-':
-    case '[': case ']': case '&': case '|': case '+':
+      /* Fall through. */
+    case '(': case ')': case '{': case ',': case '=': case ';': case ':':
+    case '[': case ']': case '}': case '&': case '|': case '+':
       *type = SEG_PUNCT;
       s->substate = 0;
       return 1;
@@ -847,66 +967,82 @@ segmenter_parse_mid_command__ (struct segmenter *s,
         {
           /* '*' at the beginning of a command begins a comment. */
           s->state = S_COMMENT_1;
-          return segmenter_parse_comment_1__ (s, input, n, type);
+          return segmenter_parse_comment_1__ (s, input, n, eof, type);
         }
       else
-        return segmenter_parse_digraph__ ("*", s, input, n, type);
+        return segmenter_parse_digraph__ ("*", s, input, n, eof, type);
 
     case '<':
-      return segmenter_parse_digraph__ ("=>", s, input, n, type);
+      return segmenter_parse_digraph__ ("=>", s, input, n, eof, type);
 
     case '>':
-      return segmenter_parse_digraph__ ("=", s, input, n, type);
+      return segmenter_parse_digraph__ ("=", s, input, n, eof, type);
 
     case '~':
-      return segmenter_parse_digraph__ ("=", s, input, n, type);
+      return segmenter_parse_digraph__ ("=", s, input, n, eof, type);
 
     case '.':
       if (n < 2)
-        return -1;
-      else if (c_isdigit (input[1]))
-        return segmenter_parse_number__ (s, input, n, type);
-      else
         {
-          int eol = at_end_of_line (input, n, 1);
-          if (eol < 0)
+          if (!eof)
             return -1;
+        }
+      else if (c_isdigit (input[1]))
+        return segmenter_parse_number__ (s, input, n, eof, type, 0);
 
-          if (eol)
-            {
-              *type = SEG_END_COMMAND;
-              s->substate = SS_START_OF_COMMAND;
-            }
-          else
-            *type = SEG_UNEXPECTED_DOT;
-          return 1;
+      int eol = at_end_of_line (input, n, eof, 1);
+      if (eol < 0)
+        return -1;
+
+      if (eol)
+        {
+          *type = SEG_END_COMMAND;
+          s->substate = SS_START_OF_COMMAND;
         }
-      NOT_REACHED ();
+      else
+        *type = SEG_PUNCT;
+      return 1;
 
     case '0': case '1': case '2': case '3': case '4':
     case '5': case '6': case '7': case '8': case '9':
-      return segmenter_parse_number__ (s, input, n, type);
+      return segmenter_parse_number__ (s, input, n, eof, type, 0);
 
     case 'u': case 'U':
       return segmenter_maybe_parse_string__ (SEG_UNICODE_STRING,
-                                           s, input, n, type);
+                                             s, input, n, eof, type);
 
     case 'x': case 'X':
       return segmenter_maybe_parse_string__ (SEG_HEX_STRING,
-                                             s, input, n, type);
+                                             s, input, n, eof, type);
 
     case '\'': case '"':
       return segmenter_parse_string__ (SEG_QUOTED_STRING, 0,
-                                       s, input, n, type);
+                                       s, input, n, eof, type);
+
+    case '!':
+      if (n < 2)
+        {
+          if (!eof)
+            return -1;
+          *type = SEG_PUNCT;
+          return 1;
+        }
+      else if (input[1] == '*')
+        {
+          *type = SEG_MACRO_ID;
+          return 2;
+        }
+      else
+        return segmenter_parse_id__ (s, input, n, eof, type);
 
     default:
       if (lex_uc_is_space (uc))
         {
-          ofs = skip_spaces (input, n, mblen);
+          ofs = skip_spaces (input, n, eof, mblen);
           if (ofs < 0)
             return -1;
 
-          if (input[ofs - 1] == '\r' && input[ofs] == '\n')
+          if (ofs < n && input[ofs - 1] == '\r' && input[ofs] == '\n')
             {
               if (ofs == 1)
                 {
@@ -921,7 +1057,13 @@ segmenter_parse_mid_command__ (struct segmenter *s,
           return ofs;
         }
       else if (lex_uc_is_id1 (uc))
-        return segmenter_parse_id__ (s, input, n, type);
+        return segmenter_parse_id__ (s, input, n, eof, type);
+      else if (uc > 32 && uc < 127 && uc != '\\' && uc != '^')
+        {
+          *type = SEG_PUNCT;
+          s->substate = 0;
+          return 1;
+        }
       else
         {
           *type = SEG_UNEXPECTED_CHAR;
@@ -982,7 +1124,8 @@ segmenter_get_command_name_candidates (unsigned char first)
 }
 
 static int
-segmenter_detect_command_name__ (const char *input, size_t n, int ofs)
+segmenter_detect_command_name__ (const char *input, size_t n, bool eof,
+                                 int ofs)
 {
   const char **commands;
 
@@ -995,9 +1138,13 @@ segmenter_detect_command_name__ (const char *input, size_t n, int ofs)
       int mblen;
 
       if (ofs >= n)
-        return -1;
+        {
+          if (eof)
+            break;
+          return -1;
+        }
 
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
       if (mblen < 0)
         return -1;
 
@@ -1007,6 +1154,9 @@ segmenter_detect_command_name__ (const char *input, size_t n, int ofs)
 
       ofs += mblen;
     }
+  if (!ofs)
+    return 0;
+
   if (input[ofs - 1] == '.')
     ofs--;
 
@@ -1027,15 +1177,16 @@ segmenter_detect_command_name__ (const char *input, size_t n, int ofs)
 }
 
 static int
-is_start_of_string__ (const char *input, size_t n, int ofs)
+is_start_of_string__ (const char *input, size_t n, bool eof, int ofs)
 {
-  int c;
+  if (ofs >= n)
+    return eof ? 0 : -1;
 
-  c = input[ofs];
+  int c = input[ofs];
   if (c == 'x' || c == 'X' || c == 'u' || c == 'U')
     {
       if (ofs + 1 >= n)
-        return -1;
+        return eof ? 0 : -1;
 
       return input[ofs + 1] == '\'' || input[ofs + 1] == '"';
     }
@@ -1045,7 +1196,7 @@ is_start_of_string__ (const char *input, size_t n, int ofs)
 
 static int
 segmenter_parse_start_of_line__ (struct segmenter *s,
-                                 const char *input, size_t n,
+                                 const char *input, size_t n, bool eof,
                                  enum segment_type *type)
 {
   ucs4_t uc;
@@ -1055,19 +1206,19 @@ segmenter_parse_start_of_line__ (struct segmenter *s,
   assert (s->state == S_GENERAL);
   assert (s->substate & SS_START_OF_LINE);
 
-  mblen = segmenter_u8_to_uc__ (&uc, input, n);
+  mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, 0);
   if (mblen < 0)
     return -1;
 
   switch (uc)
     {
     case '+':
-      ofs = skip_spaces_and_comments (input, n, 1);
+      ofs = skip_spaces_and_comments (input, n, eof, 1);
       if (ofs < 0)
         return -1;
       else
         {
-          int is_string = is_start_of_string__ (input, n, ofs);
+          int is_string = is_start_of_string__ (input, n, eof, ofs);
           if (is_string < 0)
             return -1;
           else if (is_string)
@@ -1089,7 +1240,7 @@ segmenter_parse_start_of_line__ (struct segmenter *s,
     default:
       if (lex_uc_is_space (uc))
         {
-          int eol = at_end_of_line (input, n, 0);
+          int eol = at_end_of_line (input, n, eof, 0);
           if (eol < 0)
             return -1;
           else if (eol)
@@ -1105,7 +1256,7 @@ segmenter_parse_start_of_line__ (struct segmenter *s,
         break;
       else if (s->mode == SEG_MODE_AUTO)
         {
-          int cmd = segmenter_detect_command_name__ (input, n, 0);
+          int cmd = segmenter_detect_command_name__ (input, n, eof, 0);
           if (cmd < 0)
             return -1;
           else if (cmd == 0)
@@ -1120,20 +1271,20 @@ segmenter_parse_start_of_line__ (struct segmenter *s,
     }
 
   s->substate = SS_START_OF_COMMAND;
-  return segmenter_parse_mid_command__ (s, input, n, type);
+  return segmenter_parse_mid_command__ (s, input, n, eof, type);
 }
 
 static int
-segmenter_parse_file_label__ (struct segmenter *s,
-                              const char *input, size_t n,
-                              enum segment_type *type)
+segmenter_parse_file_label_1__ (struct segmenter *s,
+                                const char *input, size_t n, bool eof,
+                                enum segment_type *type)
 {
   struct segmenter sub;
   int ofs;
 
   sub = *s;
   sub.state = S_GENERAL;
-  ofs = segmenter_push (&sub, input, n, type);
+  ofs = segmenter_push (&sub, input, n, eof, type);
 
   if (ofs < 0)
     return -1;
@@ -1143,13 +1294,13 @@ segmenter_parse_file_label__ (struct segmenter *s,
 
       assert (lex_id_match (ss_cstr ("LABEL"),
                             ss_buffer ((char *) input, ofs)));
-      result = segmenter_unquoted (input, n, ofs);
+      result = segmenter_unquoted (input, n, eof, ofs);
       if (result < 0)
         return -1;
       else
         {
           if (result)
-            s->state = S_TITLE_1;
+            s->state = S_FILE_LABEL_2;
           else
             *s = sub;
           return ofs;
@@ -1162,9 +1313,74 @@ segmenter_parse_file_label__ (struct segmenter *s,
     }
 }
 
+static int
+segmenter_parse_file_label_2__ (struct segmenter *s,
+                                const char *input, size_t n, bool eof,
+                                enum segment_type *type)
+{
+  int ofs;
+
+  ofs = skip_spaces (input, n, eof, 0);
+  if (ofs < 0)
+    return -1;
+  s->state = S_FILE_LABEL_3;
+  *type = SEG_SPACES;
+  return ofs;
+}
+
+static int
+segmenter_parse_file_label_3__ (struct segmenter *s,
+                                const char *input, size_t n, bool eof,
+                                enum segment_type *type)
+{
+  int endcmd;
+  int ofs;
+
+  endcmd = -1;
+  ofs = 0;
+  while (ofs < n)
+    {
+      ucs4_t uc;
+      int mblen;
+
+      mblen = segmenter_u8_to_uc__ (&uc, input, n, eof, ofs);
+      if (mblen < 0)
+        return -1;
+
+      switch (uc)
+        {
+        case '\n':
+          goto end_of_line;
+
+        case '.':
+          endcmd = ofs;
+          break;
+
+        default:
+          if (!lex_uc_is_space (uc))
+            endcmd = -1;
+          break;
+        }
+
+      ofs += mblen;
+    }
+
+  if (eof)
+    {
+    end_of_line:
+      s->state = S_GENERAL;
+      s->substate = 0;
+      *type = SEG_UNQUOTED_STRING;
+      return endcmd >= 0 ? endcmd : ofs;
+    }
+
+  return -1;
+}
+
 static int
 segmenter_subparse (struct segmenter *s,
-                    const char *input, size_t n, enum segment_type *type)
+                    const char *input, size_t n, bool eof,
+                    enum segment_type *type)
 {
   struct segmenter sub;
   int ofs;
@@ -1172,24 +1388,31 @@ segmenter_subparse (struct segmenter *s,
   sub.mode = s->mode;
   sub.state = S_GENERAL;
   sub.substate = s->substate;
-  ofs = segmenter_push (&sub, input, n, type);
+  ofs = segmenter_push (&sub, input, n, eof, type);
   s->substate = sub.substate;
   return ofs;
 }
 
+/* We are segmenting a DO REPEAT command, currently reading the syntax that
+   defines the stand-in variables (the head) before the lines of syntax to be
+   repeated (the body). */
 static int
 segmenter_parse_do_repeat_1__ (struct segmenter *s,
-                               const char *input, size_t n,
+                               const char *input, size_t n, bool eof,
                                enum segment_type *type)
 {
-  int ofs = segmenter_subparse (s, input, n, type);
+  int ofs = segmenter_subparse (s, input, n, eof, type);
   if (ofs < 0)
     return -1;
 
-  if (*type == SEG_START_COMMAND || *type == SEG_SEPARATE_COMMANDS)
-    s->state = S_DO_REPEAT_2;
-  else if (*type == SEG_END_COMMAND)
+  if (*type == SEG_SEPARATE_COMMANDS)
+    {
+      /* We reached a blank line that separates the head from the body. */
+      s->state = S_DO_REPEAT_2;
+    }
+  else if (*type == SEG_END_COMMAND || *type == SEG_START_COMMAND)
     {
+      /* We reached the body. */
       s->state = S_DO_REPEAT_3;
       s->substate = 1;
     }
@@ -1197,17 +1420,20 @@ segmenter_parse_do_repeat_1__ (struct segmenter *s,
   return ofs;
 }
 
+/* We are segmenting a DO REPEAT command, currently reading a blank line that
+   separates the head from the body. */
 static int
 segmenter_parse_do_repeat_2__ (struct segmenter *s,
-                               const char *input, size_t n,
+                               const char *input, size_t n, bool eof,
                                enum segment_type *type)
 {
-  int ofs = segmenter_subparse (s, input, n, type);
+  int ofs = segmenter_subparse (s, input, n, eof, type);
   if (ofs < 0)
     return -1;
 
   if (*type == SEG_NEWLINE)
     {
+      /* We reached the body. */
       s->state = S_DO_REPEAT_3;
       s->substate = 1;
     }
@@ -1217,7 +1443,7 @@ segmenter_parse_do_repeat_2__ (struct segmenter *s,
 
 static bool
 check_repeat_command (struct segmenter *s,
-                      const char *input, size_t n)
+                      const char *input, size_t n, bool eof)
 {
   int direction;
   char id[16];
@@ -1227,7 +1453,7 @@ check_repeat_command (struct segmenter *s,
   if (input[ofs] == '+' || input[ofs] == '-')
     ofs++;
 
-  ofs = next_id_in_command (s, input, n, ofs, id, sizeof id);
+  ofs = next_id_in_command (s, input, n, eof, ofs, id, sizeof id);
   if (ofs < 0)
     return false;
   else if (lex_id_match (ss_cstr ("DO"), ss_cstr (id)))
@@ -1237,7 +1463,7 @@ check_repeat_command (struct segmenter *s,
   else
     return true;
 
-  ofs = next_id_in_command (s, input, n, ofs, id, sizeof id);
+  ofs = next_id_in_command (s, input, n, eof, ofs, id, sizeof id);
   if (ofs < 0)
     return false;
 
@@ -1247,43 +1473,48 @@ check_repeat_command (struct segmenter *s,
 }
 
 static int
-segmenter_parse_full_line__ (const char *input, size_t n,
+segmenter_parse_full_line__ (const char *input, size_t n, bool eof,
                              enum segment_type *type)
 {
   const char *newline = memchr (input, '\n', n);
+  if (!newline)
+    return eof ? n : -1;
 
-  if (newline == NULL)
-    return -1;
-  else
+  ptrdiff_t ofs = newline - input;
+  if (ofs == 0 || (ofs == 1 && input[0] == '\r'))
     {
-      int ofs = newline - input;
-      if (ofs == 0 || (ofs == 1 && input[0] == '\r'))
-        {
-          *type = SEG_NEWLINE;
-          return ofs + 1;
-        }
-      else
-        return ofs - (input[ofs - 1] == '\r');
+      *type = SEG_NEWLINE;
+      return ofs + 1;
     }
+  else
+    return ofs - (input[ofs - 1] == '\r');
 }
 
+/* We are in the body of DO REPEAT, segmenting the lines of syntax that are to
+   be repeated.  Report each line of syntax as a single SEG_DO_REPEAT_COMMAND.
+
+   DO REPEAT can be nested, so we look for DO REPEAT...END REPEAT blocks inside
+   the lines we're segmenting.  s->substate counts the nesting level, starting
+   at 1. */
 static int
 segmenter_parse_do_repeat_3__ (struct segmenter *s,
-                               const char *input, size_t n,
+                               const char *input, size_t n, bool eof,
                                enum segment_type *type)
 {
   int ofs;
 
-  ofs = segmenter_parse_full_line__ (input, n, type);
-  if (ofs < 0 || input[ofs - 1] == '\n')
+  ofs = segmenter_parse_full_line__ (input, n, eof, type);
+  if (ofs < 0 || (ofs > 0 && input[ofs - 1] == '\n'))
     return ofs;
-  else if (!check_repeat_command (s, input, n))
+  else if (!check_repeat_command (s, input, n, eof) && !eof)
     return -1;
   else if (s->substate == 0)
     {
+      /* Nesting level dropped to 0, so we've finished reading the DO REPEAT
+         body. */
       s->state = S_GENERAL;
       s->substate = SS_START_OF_COMMAND | SS_START_OF_LINE;
-      return segmenter_push (s, input, n, type);
+      return segmenter_push (s, input, n, eof, type);
     }
   else
     {
@@ -1292,12 +1523,211 @@ segmenter_parse_do_repeat_3__ (struct segmenter *s,
     }
 }
 
+/* We are segmenting a DEFINE command, which consists of:
+
+  - The DEFINE keyword.
+
+  - An identifier.  We transform this into SEG_MACRO_NAME instead of
+    SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be
+    macro-expanded.
+
+  - Anything but "(".
+
+  - "(" followed by a sequence of tokens possibly including balanced parentheses
+    up to a final ")".
+
+  - A sequence of any number of lines, one string per line, ending with
+    "!ENDDEFINE".  The first line is usually blank (that is, a newline follows
+    the "(").  The last line usually just has "!ENDDEFINE." on it, but it can
+    start with other tokens.  The whole DEFINE...!ENDDEFINE can be on a single
+    line, even.
+   */
+static int
+segmenter_parse_define_1_2__ (struct segmenter *s,
+                              const char *input, size_t n, bool eof,
+                              enum segment_type *type)
+{
+  int ofs = segmenter_subparse (s, input, n, eof, type);
+  if (ofs < 0)
+    return -1;
+
+  if (s->state == S_DEFINE_1
+      && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID))
+    {
+      *type = SEG_MACRO_NAME;
+      s->state = S_DEFINE_2;
+    }
+  else if (*type == SEG_SEPARATE_COMMANDS
+      || *type == SEG_END_COMMAND
+      || *type == SEG_START_COMMAND)
+    {
+      /* The DEFINE command is malformed because we reached its end without
+         ever hitting a "(" token.  Transition back to general parsing. */
+      s->state = S_GENERAL;
+      return ofs;
+    }
+  else if (*type == SEG_PUNCT && input[0] == '(')
+    {
+      s->state = S_DEFINE_3;
+      s->nest = 1;
+      return ofs;
+    }
+
+  return ofs;
+}
+
+static int
+segmenter_parse_define_3__ (struct segmenter *s,
+                            const char *input, size_t n, bool eof,
+                            enum segment_type *type)
+{
+  int ofs = segmenter_subparse (s, input, n, eof, type);
+  if (ofs < 0)
+    return -1;
+
+  if (*type == SEG_SEPARATE_COMMANDS
+      || *type == SEG_END_COMMAND
+      || *type == SEG_START_COMMAND)
+    {
+      /* The DEFINE command is malformed because we reached its end before
+         closing the set of parentheses.  Transition back to general
+         parsing. */
+      s->state = S_GENERAL;
+      return ofs;
+    }
+  else if (*type == SEG_PUNCT && input[0] == '(')
+    {
+      s->nest++;
+      return ofs;
+    }
+  else if (*type == SEG_PUNCT && input[0] == ')')
+    {
+      s->nest--;
+      if (!s->nest)
+        {
+          s->state = S_DEFINE_4;
+          s->substate = 0;
+        }
+      return ofs;
+    }
+
+  return ofs;
+}
+
+static size_t
+find_enddefine (struct substring input)
+{
+  size_t n = input.length;
+  const struct substring enddefine = ss_cstr ("!ENDDEFINE");
+  for (int ofs = 0;;)
+    {
+      /* Skip !ENDDEFINE in comment. */
+      ofs = skip_spaces_and_comments (input.string, n, true, ofs);
+      if (ofs + enddefine.length > n)
+        return SIZE_MAX;
+
+      char c = input.string[ofs];
+      if (c == '!'
+               && ss_equals_case (ss_substr (input, ofs, enddefine.length),
+                                  enddefine))
+        return ofs;
+      else if (c == '\'' || c == '"')
+        {
+          /* Skip quoted !ENDDEFINE. */
+          ofs++;
+          for (;;)
+            {
+              if (ofs >= n)
+                return SIZE_MAX;
+              else if (input.string[ofs++] == c)
+                break;
+            }
+        }
+      else
+        ofs++;
+    }
+}
+
+/* We are in the body of a macro definition, looking for additional lines of
+   the body or !ENDDEFINE. */
+static int
+segmenter_parse_define_4__ (struct segmenter *s,
+                            const char *input, size_t n, bool eof,
+                            enum segment_type *type)
+{
+  /* Gather a whole line. */
+  const char *newline = memchr (input, '\n', n);
+  int ofs = (newline ? newline - input - (newline > input && newline[-1] == '\r')
+             : eof ? n
+             : -1);
+  if (ofs < 0)
+    return -1;
+
+  /* Does the line contain !ENDDEFINE? */
+  size_t end = find_enddefine (ss_buffer (input, ofs));
+  if (end == SIZE_MAX)
+    {
+      /* No !ENDDEFINE.  We have a full line of macro body.
+
+         The line might be blank, whether completely empty or just spaces and
+         comments.  That's OK: we need to report blank lines because they can
+         have significance.
+
+         However, if the first line of the macro body (the same line as the
+         closing parenthesis in the argument definition) is blank, we just
+         report it as spaces because it's not significant. */
+      *type = (s->substate == 0 && is_all_spaces (input, ofs)
+               ? SEG_SPACES : SEG_MACRO_BODY);
+      s->state = S_DEFINE_5;
+      s->substate = 1;
+      return ofs;
+    }
+  else
+    {
+      /* Macro ends at the !ENDDEFINE on this line. */
+      s->state = S_GENERAL;
+      s->substate = 0;
+      if (!end)
+        {
+          /* Line starts with !ENDDEFINE. */
+          return segmenter_push (s, input, n, eof, type);
+        }
+      else
+        {
+          if (is_all_spaces (input, end))
+            {
+              /* Line starts with spaces followed by !ENDDEFINE. */
+              *type = SEG_SPACES;
+            }
+          else
+            {
+              /* Line starts with some content followed by !ENDDEFINE. */
+              *type = SEG_MACRO_BODY;
+            }
+          return end;
+        }
+    }
+}
+
+static int
+segmenter_parse_define_5__ (struct segmenter *s,
+                            const char *input, size_t n, bool eof,
+                            enum segment_type *type)
+{
+  int ofs = segmenter_parse_newline__ (input, n, eof, type);
+  if (ofs < 0)
+    return -1;
+
+  s->state = S_DEFINE_4;
+  return ofs;
+}
+
 static int
 segmenter_parse_begin_data_1__ (struct segmenter *s,
-                                const char *input, size_t n,
+                                const char *input, size_t n, bool eof,
                                 enum segment_type *type)
 {
-  int ofs = segmenter_subparse (s, input, n, type);
+  int ofs = segmenter_subparse (s, input, n, eof, type);
   if (ofs < 0)
     return -1;
 
@@ -1309,10 +1739,10 @@ segmenter_parse_begin_data_1__ (struct segmenter *s,
 
 static int
 segmenter_parse_begin_data_2__ (struct segmenter *s,
-                                const char *input, size_t n,
+                                const char *input, size_t n, bool eof,
                                 enum segment_type *type)
 {
-  int ofs = segmenter_subparse (s, input, n, type);
+  int ofs = segmenter_subparse (s, input, n, eof, type);
   if (ofs < 0)
     return -1;
 
@@ -1331,7 +1761,7 @@ is_end_data (const char *input, size_t n)
   int mblen;
   int ofs;
 
-  if (n < 3 || c_strncasecmp (input, "END", 3))
+  if (n < 4 || c_strncasecmp (input, "END", 3))
     return false;
 
   ofs = 3;
@@ -1364,19 +1794,19 @@ is_end_data (const char *input, size_t n)
 
 static int
 segmenter_parse_begin_data_3__ (struct segmenter *s,
-                                const char *input, size_t n,
+                                const char *input, size_t n, bool eof,
                                 enum segment_type *type)
 {
   int ofs;
 
-  ofs = segmenter_parse_full_line__ (input, n, type);
+  ofs = segmenter_parse_full_line__ (input, n, eof, type);
   if (ofs < 0)
     return -1;
   else if (is_end_data (input, ofs))
     {
       s->state = S_GENERAL;
       s->substate = SS_START_OF_COMMAND | SS_START_OF_LINE;
-      return segmenter_push (s, input, n, type);
+      return segmenter_push (s, input, n, eof, type);
     }
   else
     {
@@ -1388,12 +1818,12 @@ segmenter_parse_begin_data_3__ (struct segmenter *s,
 
 static int
 segmenter_parse_begin_data_4__ (struct segmenter *s,
-                                const char *input, size_t n,
+                                const char *input, size_t n, bool eof,
                                 enum segment_type *type)
 {
   int ofs;
 
-  ofs = segmenter_parse_newline__ (input, n, type);
+  ofs = segmenter_parse_newline__ (input, n, eof, type);
   if (ofs < 0)
     return -1;
 
@@ -1401,64 +1831,6 @@ segmenter_parse_begin_data_4__ (struct segmenter *s,
   return ofs;
 }
 
-static int
-segmenter_parse_title_1__ (struct segmenter *s,
-                           const char *input, size_t n,
-                           enum segment_type *type)
-{
-  int ofs;
-
-  ofs = skip_spaces (input, n, 0);
-  if (ofs < 0)
-    return -1;
-  s->state = S_TITLE_2;
-  *type = SEG_SPACES;
-  return ofs;
-}
-
-static int
-segmenter_parse_title_2__ (struct segmenter *s,
-                           const char *input, size_t n,
-                           enum segment_type *type)
-{
-  int endcmd;
-  int ofs;
-
-  endcmd = -1;
-  ofs = 0;
-  while (ofs < n)
-    {
-      ucs4_t uc;
-      int mblen;
-
-      mblen = segmenter_u8_to_uc__ (&uc, input + ofs, n - ofs);
-      if (mblen < 0)
-        return -1;
-
-      switch (uc)
-        {
-        case '\n':
-          s->state = S_GENERAL;
-          s->substate = 0;
-          *type = SEG_UNQUOTED_STRING;
-          return endcmd >= 0 ? endcmd : ofs;
-
-        case '.':
-          endcmd = ofs;
-          break;
-
-        default:
-          if (!lex_uc_is_space (uc))
-            endcmd = -1;
-          break;
-        }
-
-      ofs += mblen;
-    }
-
-  return -1;
-}
-
 /* Returns the name of segment TYPE as a string.  The caller must not modify
    or free the returned string.
 
@@ -1476,17 +1848,28 @@ segment_type_to_string (enum segment_type type)
     }
 }
 
-/* Initializes S as a segmenter with the given syntax MODE.
+/* Returns a segmenter with the given syntax MODE.
+
+   If IS_SNIPPET is false, then the segmenter will parse as if it's being given
+   a whole file.  This means, for example, that it will interpret - or + at the
+   beginning of the syntax as a separator between commands (since - or + at the
+   beginning of a line has this meaning).
+
+   If IS_SNIPPET is true, then the segmenter will parse as if it's being given
+   an isolated piece of syntax.  This means that, for example, that it will
+   interpret - or + at the beginning of the syntax as an operator token or (if
+   followed by a digit) as part of a number.
 
    A segmenter does not contain any external references, so nothing needs to be
    done to destroy one.  For the same reason, segmenters may be copied with
    plain struct assignment (or memcpy). */
-void
-segmenter_init (struct segmenter *s, enum segmenter_mode mode)
+struct segmenter
+segmenter_init (enum segmenter_mode mode, bool is_snippet)
 {
-  s->state = S_SHBANG;
-  s->substate = 0;
-  s->mode = mode;
+  return (struct segmenter) {
+    .state = is_snippet ? S_GENERAL : S_SHBANG,
+    .mode = mode,
+  };
 }
 
 /* Returns the mode passed to segmenter_init() for S. */
@@ -1498,9 +1881,9 @@ segmenter_get_mode (const struct segmenter *s)
 
 /* Attempts to label a prefix of S's remaining input with a segment type.  The
    caller supplies the first N bytes of the remaining input as INPUT, which
-   must be a UTF-8 encoded string.  The end of the input stream must be
-   indicated by a null byte at the beginning of a line, that is, immediately
-   following a new-line (or as the first byte of the input stream).
+   must be a UTF-8 encoded string.  If EOF is true, then the N bytes supplied
+   are the entire (remainder) of the input; if EOF is false, then further input
+   is potentially available.
 
    The input may contain '\n' or '\r\n' line ends in any combination.
 
@@ -1510,12 +1893,15 @@ segmenter_get_mode (const struct segmenter *s)
    bytes as part of INPUT, because they have (figuratively) been consumed by
    the segmenter.
 
+   Segments can have zero length, including segment types SEG_END,
+   SEG_SEPARATE_COMMANDS, SEG_START_DOCUMENT, SEG_INLINE_DATA, and SEG_SPACES.
+
    Failure occurs only if the segment type of the N bytes in INPUT cannot yet
-   be determined.  In this case segmenter_push() returns -1.  The caller should
-   obtain more input and then call segmenter_push() again with a larger N and
-   repeat until the input is exhausted (which must be indicated as described
-   above) or until a valid segment is returned.  segmenter_push() will never
-   return -1 when the end of input is visible within INPUT.
+   be determined.  In this case segmenter_push() returns -1.  If more input is
+   available, the caller should obtain some more, then call again with a larger
+   N.  If this is not enough, the process might need to repeat again and agin.
+   If input is exhausted, then the caller may call again setting EOF to true.
+   segmenter_push() will never return -1 when EOF is true.
 
    The caller must not, in a sequence of calls, supply contradictory input.
    That is, bytes provided as part of INPUT in one call, but not consumed, must
@@ -1523,63 +1909,74 @@ segmenter_get_mode (const struct segmenter *s)
    because segmenter_push() must often make decisions based on looking ahead
    beyond the bytes that it consumes. */
 int
-segmenter_push (struct segmenter *s, const char *input, size_t n,
+segmenter_push (struct segmenter *s, const char *input, size_t n, bool eof,
                 enum segment_type *type)
 {
-  if (n == 0)
-    return -1;
-
-  if (input[0] == '\0')
+  if (!n)
     {
-      *type = SEG_END;
-      return 1;
+      if (eof)
+        {
+          *type = SEG_END;
+          return 0;
+        }
+      else
+        return -1;
     }
 
   switch (s->state)
     {
     case S_SHBANG:
-      return segmenter_parse_shbang__ (s, input, n, type);
+      return segmenter_parse_shbang__ (s, input, n, eof, type);
 
     case S_GENERAL:
       return (s->substate & SS_START_OF_LINE
-              ? segmenter_parse_start_of_line__ (s, input, n, type)
-              : segmenter_parse_mid_command__ (s, input, n, type));
+              ? segmenter_parse_start_of_line__ (s, input, n, eof, type)
+              : segmenter_parse_mid_command__ (s, input, n, eof, type));
 
     case S_COMMENT_1:
-      return segmenter_parse_comment_1__ (s, input, n, type);
+      return segmenter_parse_comment_1__ (s, input, n, eof, type);
     case S_COMMENT_2:
-      return segmenter_parse_comment_2__ (s, input, n, type);
+      return segmenter_parse_comment_2__ (s, input, n, eof, type);
 
     case S_DOCUMENT_1:
-      return segmenter_parse_document_1__ (s, input, n, type);
+      return segmenter_parse_document_1__ (s, input, n, eof, type);
     case S_DOCUMENT_2:
-      return segmenter_parse_document_2__ (s, input, n, type);
+      return segmenter_parse_document_2__ (s, input, n, eof, type);
     case S_DOCUMENT_3:
       return segmenter_parse_document_3__ (s, type);
 
-    case S_FILE_LABEL:
-      return segmenter_parse_file_label__ (s, input, n, type);
+    case S_FILE_LABEL_1:
+      return segmenter_parse_file_label_1__ (s, input, n, eof, type);
+    case S_FILE_LABEL_2:
+      return segmenter_parse_file_label_2__ (s, input, n, eof, type);
+    case S_FILE_LABEL_3:
+      return segmenter_parse_file_label_3__ (s, input, n, eof, type);
 
     case S_DO_REPEAT_1:
-      return segmenter_parse_do_repeat_1__ (s, input, n, type);
+      return segmenter_parse_do_repeat_1__ (s, input, n, eof, type);
     case S_DO_REPEAT_2:
-      return segmenter_parse_do_repeat_2__ (s, input, n, type);
+      return segmenter_parse_do_repeat_2__ (s, input, n, eof, type);
     case S_DO_REPEAT_3:
-      return segmenter_parse_do_repeat_3__ (s, input, n, type);
+      return segmenter_parse_do_repeat_3__ (s, input, n, eof, type);
+
+    case S_DEFINE_1:
+    case S_DEFINE_2:
+      return segmenter_parse_define_1_2__ (s, input, n, eof, type);
+    case S_DEFINE_3:
+      return segmenter_parse_define_3__ (s, input, n, eof, type);
+    case S_DEFINE_4:
+      return segmenter_parse_define_4__ (s, input, n, eof, type);
+    case S_DEFINE_5:
+      return segmenter_parse_define_5__ (s, input, n, eof, type);
 
     case S_BEGIN_DATA_1:
-      return segmenter_parse_begin_data_1__ (s, input, n, type);
+      return segmenter_parse_begin_data_1__ (s, input, n, eof, type);
     case S_BEGIN_DATA_2:
-      return segmenter_parse_begin_data_2__ (s, input, n, type);
+      return segmenter_parse_begin_data_2__ (s, input, n, eof, type);
     case S_BEGIN_DATA_3:
-      return segmenter_parse_begin_data_3__ (s, input, n, type);
+      return segmenter_parse_begin_data_3__ (s, input, n, eof, type);
     case S_BEGIN_DATA_4:
-      return segmenter_parse_begin_data_4__ (s, input, n, type);
-
-    case S_TITLE_1:
-      return segmenter_parse_title_1__ (s, input, n, type);
-    case S_TITLE_2:
-      return segmenter_parse_title_2__ (s, input, n, type);
+      return segmenter_parse_begin_data_4__ (s, input, n, eof, type);
     }
 
   NOT_REACHED ();
@@ -1610,8 +2007,11 @@ segmenter_get_prompt (const struct segmenter *s)
     case S_DOCUMENT_3:
       return PROMPT_FIRST;
 
-    case S_FILE_LABEL:
+    case S_FILE_LABEL_1:
       return PROMPT_LATER;
+    case S_FILE_LABEL_2:
+    case S_FILE_LABEL_3:
+      return PROMPT_FIRST;
 
     case S_DO_REPEAT_1:
     case S_DO_REPEAT_2:
@@ -1619,6 +2019,14 @@ segmenter_get_prompt (const struct segmenter *s)
     case S_DO_REPEAT_3:
       return PROMPT_DO_REPEAT;
 
+    case S_DEFINE_1:
+    case S_DEFINE_2:
+    case S_DEFINE_3:
+      return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER;
+    case S_DEFINE_4:
+    case S_DEFINE_5:
+      return PROMPT_DEFINE;
+
     case S_BEGIN_DATA_1:
       return PROMPT_FIRST;
     case S_BEGIN_DATA_2:
@@ -1627,9 +2035,6 @@ segmenter_get_prompt (const struct segmenter *s)
     case S_BEGIN_DATA_4:
       return PROMPT_DATA;
 
-    case S_TITLE_1:
-    case S_TITLE_2:
-      return PROMPT_FIRST;
     }
 
   NOT_REACHED ();