X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Flexer.c;h=35a9afa381f038968fafb27c05a87d57e408b176;hb=74f7e168fc9a87ff45730452df3627636c5e2f77;hp=782c399f701d663d21cf8b1bb153bc1e2a04ae40;hpb=b454cceec7abb67de3225d63c9daf7b112ea4e0a;p=pspp

diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c
index 782c399f70..35a9afa381 100644
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -38,6 +38,7 @@
 #include "libpspp/cast.h"
 #include "libpspp/deque.h"
 #include "libpspp/i18n.h"
+#include "libpspp/intern.h"
 #include "libpspp/ll.h"
 #include "libpspp/message.h"
 #include "libpspp/misc.h"
@@ -68,7 +69,6 @@ struct lex_token
        call. */
     size_t token_pos;           /* Offset into src->buffer of token start. */
     size_t token_len;           /* Length of source for token in bytes. */
-    int first_line;             /* Line number at token_pos. */
 
     /* For a token obtained through macro expansion, this is just this token.
 
@@ -80,6 +80,21 @@ struct lex_token
     size_t *ref_cnt;        /* Number of lex_tokens that refer to macro_rep. */
   };
 
+static struct msg_point lex_token_start_point (const struct lex_source *,
+                                               const struct lex_token *);
+static struct msg_point lex_token_end_point (const struct lex_source *,
+                                             const struct lex_token *);
+
+static bool lex_ofs_at_phrase__ (struct lexer *, int ofs, const char *s,
+                                 size_t *n_matchedp);
+
+/* Source offset of the last byte in TOKEN. */
+static size_t
+lex_token_end (const struct lex_token *token)
+{
+  return token->token_pos + MAX (token->token_len, 1) - 1;
+}
+
 static void
 lex_token_destroy (struct lex_token *t)
 {
@@ -206,6 +221,14 @@ lex_stage_shift (struct lex_stage *dst, struct lex_stage *src, size_t n)
 struct lex_source
   {
     struct ll ll;               /* In lexer's list of sources. */
+
+    /* Reference count:
+
+       - One for struct lexer.
+
+       - One for each struct msg_location that references this source. */
+    size_t n_refs;
+
     struct lex_reader *reader;
     struct lexer *lexer;
     struct segmenter segmenter;
@@ -220,7 +243,10 @@ struct lex_source
     size_t journal_pos;         /* First byte not yet output to journal. */
     size_t seg_pos;             /* First byte not yet scanned as token. */
 
-    int n_newlines;             /* Number of new-lines up to seg_pos. */
+    /* Offset into 'buffer' of starts of lines. */
+    size_t *lines;
+    size_t n_lines, allocated_lines;
+
     bool suppress_next_newline;
 
     /* Tokens.
@@ -250,7 +276,6 @@ struct lex_source
 
 static struct lex_source *lex_source_create (struct lexer *,
                                              struct lex_reader *);
-static void lex_source_destroy (struct lex_source *);
 
 /* Lexer. */
 struct lexer
@@ -260,17 +285,18 @@ struct lexer
   };
 
 static struct lex_source *lex_source__ (const struct lexer *);
-static char *lex_source_get_syntax__ (const struct lex_source *,
-                                      int n0, int n1);
+static char *lex_source_syntax__ (const struct lex_source *,
+                                  int ofs0, int ofs1);
 static const struct lex_token *lex_next__ (const struct lexer *, int n);
 static void lex_source_push_endcmd__ (struct lex_source *);
 static void lex_source_push_parse (struct lex_source *, struct lex_token *);
 static void lex_source_clear_parse (struct lex_source *);
 
 static bool lex_source_get_parse (struct lex_source *);
-static void lex_source_error_valist (struct lex_source *, int n0, int n1,
-                                     const char *format, va_list)
-   PRINTF_FORMAT (4, 0);
+static void lex_source_msg_valist (struct lex_source *, enum msg_class,
+                                   int ofs0, int ofs1,
+                                   const char *format, va_list)
+   PRINTF_FORMAT (5, 0);
 static const struct lex_token *lex_source_next__ (const struct lex_source *,
                                                   int n);
 
@@ -319,7 +345,10 @@ lex_destroy (struct lexer *lexer)
       struct lex_source *source, *next;
 
       ll_for_each_safe (source, next, struct lex_source, ll, &lexer->sources)
-        lex_source_destroy (source);
+        {
+          ll_remove (&source->ll);
+          lex_source_unref (source);
+        }
       macro_set_destroy (lexer->macros);
       free (lexer);
     }
@@ -374,7 +403,8 @@ lex_get (struct lexer *lexer)
   while (src->parse_ofs == src->n_parse)
     if (!lex_source_get_parse (src))
       {
-        lex_source_destroy (src);
+        ll_remove (&src->ll);
+        lex_source_unref (src);
         src = lex_source__ (lexer);
         if (src == NULL)
           return;
@@ -399,27 +429,77 @@ lex_error (struct lexer *lexer, const char *format, ...)
   va_list args;
 
   va_start (args, format);
-  lex_next_error_valist (lexer, 0, 0, format, args);
+  lex_ofs_msg_valist (lexer, SE, lex_ofs (lexer), lex_ofs (lexer),
+                      format, args);
   va_end (args);
 }
 
-/* Prints a syntax error message containing the current token and
-   given message MESSAGE (if non-null). */
+/* Prints a syntax error message for the span of tokens N0 through N1,
+   inclusive, from the current token in LEXER, adding message MESSAGE (if
+   non-null). */
 void
-lex_error_valist (struct lexer *lexer, const char *format, va_list args)
+lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
 {
-  lex_next_error_valist (lexer, 0, 0, format, args);
+  va_list args;
+
+  va_start (args, format);
+  int ofs = lex_ofs (lexer);
+  lex_ofs_msg_valist (lexer, SE, n0 + ofs, n1 + ofs, format, args);
+  va_end (args);
 }
 
-/* Prints a syntax error message containing the current token and
-   given message MESSAGE (if non-null). */
+/* Prints a syntax error message for the span of tokens with offsets OFS0
+   through OFS1, inclusive, within the current command in LEXER, adding message
+   MESSAGE (if non-null). */
 void
-lex_next_error (struct lexer *lexer, int n0, int n1, const char *format, ...)
+lex_ofs_error (struct lexer *lexer, int ofs0, int ofs1, const char *format, ...)
+{
+  va_list args;
+
+  va_start (args, format);
+  lex_ofs_msg_valist (lexer, SE, ofs0, ofs1, format, args);
+  va_end (args);
+}
+
+/* Prints a message of the given CLASS containing the current token and given
+   message MESSAGE (if non-null). */
+void
+lex_msg (struct lexer *lexer, enum msg_class class, const char *format, ...)
+{
+  va_list args;
+
+  va_start (args, format);
+  lex_ofs_msg_valist (lexer, class, lex_ofs (lexer), lex_ofs (lexer),
+                      format, args);
+  va_end (args);
+}
+
+/* Prints a syntax error message for the span of tokens N0 through N1,
+   inclusive, from the current token in LEXER, adding message MESSAGE (if
+   non-null). */
+void
+lex_next_msg (struct lexer *lexer, enum msg_class class, int n0, int n1,
+              const char *format, ...)
 {
   va_list args;
 
   va_start (args, format);
-  lex_next_error_valist (lexer, n0, n1, format, args);
+  int ofs = lex_ofs (lexer);
+  lex_ofs_msg_valist (lexer, class, n0 + ofs, n1 + ofs, format, args);
+  va_end (args);
+}
+
+/* Prints a message of the given CLASS for the span of tokens with offsets OFS0
+   through OFS1, inclusive, within the current command in LEXER, adding message
+   MESSAGE (if non-null). */
+void
+lex_ofs_msg (struct lexer *lexer, enum msg_class class, int ofs0, int ofs1,
+             const char *format, ...)
+{
+  va_list args;
+
+  va_start (args, format);
+  lex_ofs_msg_valist (lexer, class, ofs0, ofs1, format, args);
   va_end (args);
 }
 
@@ -440,18 +520,22 @@ void
 void
 lex_error_expecting_valist (struct lexer *lexer, va_list args)
 {
-  enum { MAX_OPTIONS = 9 };
-  const char *options[MAX_OPTIONS];
-  int n = 0;
-  while (n < MAX_OPTIONS)
+  const char **options = NULL;
+  size_t allocated = 0;
+  size_t n = 0;
+
+  for (;;)
     {
       const char *option = va_arg (args, const char *);
       if (!option)
         break;
 
+      if (n >= allocated)
+        options = x2nrealloc (options, &allocated, sizeof *options);
       options[n++] = option;
     }
   lex_error_expecting_array (lexer, options, n);
+  free (options);
 }
 
 void
@@ -464,62 +548,82 @@ lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
       break;
 
     case 1:
-      lex_error (lexer, _("expecting %s"), options[0]);
+      lex_error (lexer, _("Syntax error expecting %s."), options[0]);
       break;
 
     case 2:
-      lex_error (lexer, _("expecting %s or %s"), options[0], options[1]);
+      lex_error (lexer, _("Syntax error expecting %s or %s."),
+                 options[0], options[1]);
       break;
 
     case 3:
-      lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1],
-                 options[2]);
+      lex_error (lexer, _("Syntax error expecting %s, %s, or %s."),
+                 options[0], options[1], options[2]);
       break;
 
     case 4:
-      lex_error (lexer, _("expecting %s, %s, %s, or %s"),
+      lex_error (lexer, _("Syntax error expecting %s, %s, %s, or %s."),
                  options[0], options[1], options[2], options[3]);
       break;
 
     case 5:
-      lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"),
+      lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, or %s."),
                  options[0], options[1], options[2], options[3], options[4]);
       break;
 
     case 6:
-      lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"),
+      lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, or %s."),
                  options[0], options[1], options[2], options[3], options[4],
                  options[5]);
       break;
 
     case 7:
-      lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"),
+      lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, "
+                          "or %s."),
                  options[0], options[1], options[2], options[3], options[4],
                  options[5], options[6]);
       break;
 
     case 8:
-      lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"),
+      lex_error (lexer, _("Syntax error expecting %s, %s, %s, %s, %s, %s, %s, "
+                          "or %s."),
                  options[0], options[1], options[2], options[3], options[4],
                  options[5], options[6], options[7]);
       break;
 
     default:
-      lex_error (lexer, NULL);
+      {
+        struct string s = DS_EMPTY_INITIALIZER;
+        for (size_t i = 0; i < n; i++)
+          {
+            if (i > 0)
+              ds_put_cstr (&s, ", ");
+            ds_put_cstr (&s, options[i]);
+          }
+        lex_error (lexer, _("Syntax error expecting one of the following: %s."),
+                   ds_cstr (&s));
+        ds_destroy (&s);
+      }
+      break;
     }
 }
 
 /* Reports an error to the effect that subcommand SBC may only be specified
-   once.
-
-   This function does not take a lexer as an argument or use lex_error(),
-   because the result would ordinarily just be redundant: "Syntax error at
-   SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does
-   not help the user find the error. */
+   once. */
 void
-lex_sbc_only_once (const char *sbc)
+lex_sbc_only_once (struct lexer *lexer, const char *sbc)
 {
-  msg (SE, _("Subcommand %s may only be specified once."), sbc);
+  int ofs = lex_ofs (lexer) - 1;
+  if (lex_ofs_token (lexer, ofs)->type == T_EQUALS)
+    ofs--;
+
+  /* lex_ofs_at_phrase__() handles subcommand names that are keywords, such as
+     BY. */
+  if (lex_ofs_at_phrase__ (lexer, ofs, sbc, NULL))
+    lex_ofs_error (lexer, ofs, ofs,
+                   _("Subcommand %s may only be specified once."), sbc);
+  else
+    msg (SE, _("Subcommand %s may only be specified once."), sbc);
 }
 
 /* Reports an error to the effect that subcommand SBC is missing.
@@ -529,9 +633,10 @@ lex_sbc_only_once (const char *sbc)
    command has been parsed, and so lex_error() would always report "Syntax
    error at end of command", which does not help the user find the error. */
 void
-lex_sbc_missing (const char *sbc)
+lex_sbc_missing (struct lexer *lexer, const char *sbc)
 {
-  msg (SE, _("Required subcommand %s was not specified."), sbc);
+  lex_ofs_error (lexer, 0, lex_max_ofs (lexer),
+                 _("Required subcommand %s was not specified."), sbc);
 }
 
 /* Reports an error to the effect that specification SPEC may only be specified
@@ -539,7 +644,7 @@ lex_sbc_missing (const char *sbc)
 void
 lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
 {
-  lex_error (lexer, _("%s may only be specified once within subcommand %s"),
+  lex_error (lexer, _("%s may only be specified once within subcommand %s."),
              spec, sbc);
 }
 
@@ -548,36 +653,18 @@ lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
 void
 lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
 {
-  lex_error (lexer, _("Required %s specification missing from %s subcommand"),
-             sbc, spec);
+  lex_error (lexer, _("Required %s specification missing from %s subcommand."),
+             spec, sbc);
 }
 
-/* Prints a syntax error message containing the current token and
-   given message MESSAGE (if non-null). */
+/* Prints a syntax error message for the span of tokens with offsets OFS0
+   through OFS1, inclusive, within the current command in LEXER, adding message
+   MESSAGE (if non-null) with the given ARGS. */
 void
-lex_next_error_valist (struct lexer *lexer, int n0, int n1,
-                       const char *format, va_list args)
+lex_ofs_msg_valist (struct lexer *lexer, enum msg_class class,
+                    int ofs0, int ofs1, const char *format, va_list args)
 {
-  struct lex_source *src = lex_source__ (lexer);
-
-  if (src != NULL)
-    lex_source_error_valist (src, n0, n1, format, args);
-  else
-    {
-      struct string s;
-
-      ds_init_empty (&s);
-      ds_put_format (&s, _("Syntax error at end of input"));
-      if (format != NULL)
-        {
-          ds_put_cstr (&s, ": ");
-          ds_put_vformat (&s, format, args);
-        }
-      if (ds_last (&s) != '.')
-        ds_put_byte (&s, '.');
-      msg (SE, "%s", ds_cstr (&s));
-      ds_destroy (&s);
-    }
+  lex_source_msg_valist (lex_source__ (lexer), class, ofs0, ofs1, format, args);
 }
 
 /* Checks that we're at end of command.
@@ -589,7 +676,7 @@ lex_end_of_command (struct lexer *lexer)
 {
   if (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_STOP)
     {
-      lex_error (lexer, _("expecting end of command"));
+      lex_error (lexer, _("Syntax error expecting end of command."));
       return CMD_FAILURE;
     }
   else
@@ -791,7 +878,7 @@ lex_force_string (struct lexer *lexer)
     return true;
   else
     {
-      lex_error (lexer, _("expecting string"));
+      lex_error (lexer, _("Syntax error expecting string."));
       return false;
     }
 }
@@ -820,7 +907,7 @@ lex_force_int (struct lexer *lexer)
     return true;
   else
     {
-      lex_error (lexer, _("expecting integer"));
+      lex_error (lexer, _("Syntax error expecting integer."));
       return false;
     }
 }
@@ -847,23 +934,25 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
       /* Weird, maybe a bug in the caller.  Just report that we needed an
          integer. */
       if (name)
-        lex_error (lexer, _("Integer expected for %s."), name);
+        lex_error (lexer, _("Syntax error expecting integer for %s."), name);
       else
-        lex_error (lexer, _("Integer expected."));
+        lex_error (lexer, _("Syntax error expecting integer."));
     }
   else if (min == max)
     {
       if (name)
-        lex_error (lexer, _("Expected %ld for %s."), min, name);
+        lex_error (lexer, _("Syntax error expecting %ld for %s."), min, name);
       else
-        lex_error (lexer, _("Expected %ld."), min);
+        lex_error (lexer, _("Syntax error expecting %ld."), min);
     }
   else if (min + 1 == max)
     {
       if (name)
-        lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
+        lex_error (lexer, _("Syntax error expecting %ld or %ld for %s."),
+                   min, min + 1, name);
       else
-        lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
+        lex_error (lexer, _("Syntax error expecting %ld or %ld."),
+                   min, min + 1);
     }
   else
     {
@@ -874,10 +963,12 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
         {
           if (name)
             lex_error (lexer,
-                       _("Expected integer between %ld and %ld for %s."),
+                       _("Syntax error expecting integer "
+                         "between %ld and %ld for %s."),
                        min, max, name);
           else
-            lex_error (lexer, _("Expected integer between %ld and %ld."),
+            lex_error (lexer, _("Syntax error expecting integer "
+                                "between %ld and %ld."),
                        min, max);
         }
       else if (report_lower_bound)
@@ -885,44 +976,53 @@ lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
           if (min == 0)
             {
               if (name)
-                lex_error (lexer, _("Expected non-negative integer for %s."),
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative integer for %s."),
                            name);
               else
-                lex_error (lexer, _("Expected non-negative integer."));
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative integer."));
             }
           else if (min == 1)
             {
               if (name)
-                lex_error (lexer, _("Expected positive integer for %s."),
+                lex_error (lexer, _("Syntax error expecting "
+                                    "positive integer for %s."),
                            name);
               else
-                lex_error (lexer, _("Expected positive integer."));
+                lex_error (lexer, _("Syntax error expecting "
+                                    "positive integer."));
             }
           else
             {
               if (name)
-                lex_error (lexer, _("Expected integer %ld or greater for %s."),
+                lex_error (lexer, _("Syntax error expecting "
+                                    "integer %ld or greater for %s."),
                            min, name);
               else
-                lex_error (lexer, _("Expected integer %ld or greater."), min);
+                lex_error (lexer, _("Syntax error expecting "
+                                    "integer %ld or greater."), min);
             }
         }
       else if (report_upper_bound)
         {
           if (name)
             lex_error (lexer,
-                       _("Expected integer less than or equal to %ld for %s."),
+                       _("Syntax error expecting integer less than or equal "
+                         "to %ld for %s."),
                        max, name);
           else
-            lex_error (lexer, _("Expected integer less than or equal to %ld."),
+            lex_error (lexer, _("Syntax error expecting integer less than or "
+                                "equal to %ld."),
                        max);
         }
       else
         {
           if (name)
-            lex_error (lexer, _("Integer expected for %s."), name);
+            lex_error (lexer, _("Syntax error expecting integer for %s."),
+                       name);
           else
-            lex_error (lexer, _("Integer expected."));
+            lex_error (lexer, _("Syntax error expecting integer."));
         }
     }
   return false;
@@ -936,7 +1036,262 @@ lex_force_num (struct lexer *lexer)
   if (lex_is_number (lexer))
     return true;
 
-  lex_error (lexer, _("expecting number"));
+  lex_error (lexer, _("Syntax error expecting number."));
+  return false;
+}
+
+/* If the current token is an number in the closed range [MIN,MAX], does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_closed (struct lexer *lexer, const char *name,
+                            double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) < min;
+  bool too_big = is_number && lex_number (lexer) > max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min > max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Syntax error expecting number for %s."), name);
+      else
+        lex_error (lexer, _("Syntax error expecting number."));
+    }
+  else if (min == max)
+    {
+      if (name)
+        lex_error (lexer, _("Syntax error expecting number %g for %s."),
+                   min, name);
+      else
+        lex_error (lexer, _("Syntax error expecting number %g."), min);
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Syntax error expecting number "
+                         "between %g and %g for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting number "
+                                "between %g and %g."),
+                       min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative number for %s."),
+                           name);
+              else
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting number "
+                                    "%g or greater for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Syntax error expecting number "
+                                    "%g or greater."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Syntax error expecting number "
+                         "less than or equal to %g for %s."),
+                       max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting number "
+                                "less than or equal to %g."),
+                       max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number for %s."), name);
+          else
+            lex_error (lexer, _("Syntax error expecting number."));
+        }
+    }
+  return false;
+}
+
+/* If the current token is an number in the half-open range [MIN,MAX), does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_halfopen (struct lexer *lexer, const char *name,
+                              double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) < min;
+  bool too_big = is_number && lex_number (lexer) >= max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min >= max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Syntax error expecting number for %s."), name);
+      else
+        lex_error (lexer, _("Syntax error expecting number."));
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number "
+                                "in [%g,%g) for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting number in [%g,%g)."),
+                       min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative number for %s."),
+                           name);
+              else
+                lex_error (lexer, _("Syntax error expecting "
+                                    "non-negative number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting "
+                                    "number %g or greater for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Syntax error expecting "
+                                    "number %g or greater."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer,
+                       _("Syntax error expecting "
+                         "number less than %g for %s."), max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting "
+                                "number less than %g."), max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number for %s."), name);
+          else
+            lex_error (lexer, _("Syntax error expecting number."));
+        }
+    }
+  return false;
+}
+
+/* If the current token is an number in the open range (MIN,MAX), does
+   nothing and returns true.  Otherwise, reports an error and returns false.
+   If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_num_range_open (struct lexer *lexer, const char *name,
+                          double min, double max)
+{
+  bool is_number = lex_is_number (lexer);
+  bool too_small = is_number && lex_number (lexer) <= min;
+  bool too_big = is_number && lex_number (lexer) >= max;
+  if (is_number && !too_small && !too_big)
+    return true;
+
+  if (min >= max)
+    {
+      /* Weird, maybe a bug in the caller.  Just report that we needed an
+         number. */
+      if (name)
+        lex_error (lexer, _("Syntax error expecting number for %s."), name);
+      else
+        lex_error (lexer, _("Syntax error expecting number."));
+    }
+  else
+    {
+      bool report_lower_bound = min > -DBL_MAX || too_small;
+      bool report_upper_bound = max < DBL_MAX || too_big;
+
+      if (report_lower_bound && report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number "
+                                "in (%g,%g) for %s."),
+                       min, max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting number "
+                                "in (%g,%g)."), min, max);
+        }
+      else if (report_lower_bound)
+        {
+          if (min == 0)
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting "
+                                    "positive number for %s."), name);
+              else
+                lex_error (lexer, _("Syntax error expecting "
+                                    "positive number."));
+            }
+          else
+            {
+              if (name)
+                lex_error (lexer, _("Syntax error expecting number "
+                                    "greater than %g for %s."),
+                           min, name);
+              else
+                lex_error (lexer, _("Syntax error expecting number "
+                                    "greater than %g."), min);
+            }
+        }
+      else if (report_upper_bound)
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number "
+                                "less than %g for %s."),
+                       max, name);
+          else
+            lex_error (lexer, _("Syntax error expecting number "
+                                "less than %g."), max);
+        }
+      else
+        {
+          if (name)
+            lex_error (lexer, _("Syntax error expecting number "
+                                "for %s."), name);
+          else
+            lex_error (lexer, _("Syntax error expecting number."));
+        }
+    }
   return false;
 }
 
@@ -948,7 +1303,7 @@ lex_force_id (struct lexer *lexer)
   if (lex_token (lexer) == T_ID)
     return true;
 
-  lex_error (lexer, _("expecting identifier"));
+  lex_error (lexer, _("Syntax error expecting identifier."));
   return false;
 }
 
@@ -1024,23 +1379,18 @@ lex_next__ (const struct lexer *lexer_, int n)
 }
 
 static const struct lex_token *
-lex_source_next__ (const struct lex_source *src_, int n)
+lex_source_ofs__ (const struct lex_source *src_, int ofs)
 {
   struct lex_source *src = CONST_CAST (struct lex_source *, src_);
 
-  if (n < 0)
+  if (ofs < 0)
     {
-      if (-n <= src->parse_ofs)
-        return src->parse[src->parse_ofs - (-n)];
-      else
-        {
-          static const struct lex_token endcmd_token
-            = { .token = { .type = T_ENDCMD } };
-          return &endcmd_token;
-        }
+      static const struct lex_token endcmd_token
+        = { .token = { .type = T_ENDCMD } };
+      return &endcmd_token;
     }
 
-  while (src->n_parse - src->parse_ofs <= n)
+  while (ofs >= src->n_parse)
     {
       if (src->n_parse > 0)
         {
@@ -1052,7 +1402,13 @@ lex_source_next__ (const struct lex_source *src_, int n)
       lex_source_get_parse (src);
     }
 
-  return src->parse[src->parse_ofs + n];
+  return src->parse[ofs];
+}
+
+static const struct lex_token *
+lex_source_next__ (const struct lex_source *src, int n)
+{
+  return lex_source_ofs__ (src, n + src->parse_ofs);
 }
 
 /* Returns the "struct token" of the token N after the current one in LEXER.
@@ -1113,17 +1469,137 @@ lex_next_tokss (const struct lexer *lexer, int n)
   return lex_next (lexer, n)->string;
 }
 
+/* Returns the offset of the current token within the command being parsed in
+   LEXER.  This is 0 for the first token in a command, 1 for the second, and so
+   on.  The return value is useful later for referring to this token in calls
+   to lex_ofs_*(). */
+int
+lex_ofs (const struct lexer *lexer)
+{
+  struct lex_source *src = lex_source__ (lexer);
+  return src ? src->parse_ofs : 0;
+}
+
+/* Returns the offset of the last token in the current command. */
+int
+lex_max_ofs (const struct lexer *lexer)
+{
+  struct lex_source *src = lex_source__ (lexer);
+  if (!src)
+    return 0;
+
+  int ofs = MAX (1, src->n_parse) - 1;
+  for (;;)
+    {
+      enum token_type type = lex_source_ofs__ (src, ofs)->token.type;
+      if (type == T_ENDCMD || type == T_STOP)
+        return ofs;
+
+      ofs++;
+    }
+}
+
+/* Returns the token within LEXER's current command with offset OFS.  Use
+   lex_ofs() to find out the offset of the current token. */
+const struct token *
+lex_ofs_token (const struct lexer *lexer_, int ofs)
+{
+  struct lexer *lexer = CONST_CAST (struct lexer *, lexer_);
+  struct lex_source *src = lex_source__ (lexer);
+
+  if (src != NULL)
+    return &lex_source_next__ (src, ofs - src->parse_ofs)->token;
+  else
+    {
+      static const struct token stop_token = { .type = T_STOP };
+      return &stop_token;
+    }
+}
+
+/* Allocates and returns a new struct msg_location that spans tokens with
+   offsets OFS0 through OFS1, inclusive, within the current command in
+   LEXER.  See lex_ofs() for an explanation of token offsets.
+
+   The caller owns and must eventually free the returned object. */
+struct msg_location *
+lex_ofs_location (const struct lexer *lexer, int ofs0, int ofs1)
+{
+  int ofs = lex_ofs (lexer);
+  return lex_get_location (lexer, ofs0 - ofs, ofs1 - ofs);
+}
+
+/* Returns a msg_point for the first character in the token with offset OFS,
+   where offset 0 is the first token in the command currently being parsed, 1
+   the second token, and so on.  These are absolute offsets, not relative to
+   the token currently being parsed within the command.
+
+   Returns zeros for a T_STOP token.
+ */
+struct msg_point
+lex_ofs_start_point (const struct lexer *lexer, int ofs)
+{
+  const struct lex_source *src = lex_source__ (lexer);
+  return (src
+          ? lex_token_start_point (src, lex_source_ofs__ (src, ofs))
+          : (struct msg_point) { 0, 0 });
+}
+
+/* Returns a msg_point for the last character, inclusive, in the token with
+   offset OFS, where offset 0 is the first token in the command currently being
+   parsed, 1 the second token, and so on.  These are absolute offsets, not
+   relative to the token currently being parsed within the command.
+
+   Returns zeros for a T_STOP token.
+
+   Most of the time, a single token is wholly within a single line of syntax,
+   so that the start and end point for a given offset have the same line
+   number.  There are two exceptions: a T_STRING token can be made up of
+   multiple segments on adjacent lines connected with "+" punctuators, and a
+   T_NEG_NUM token can consist of a "-" on one line followed by the number on
+   the next.
+ */
+struct msg_point
+lex_ofs_end_point (const struct lexer *lexer, int ofs)
+{
+  const struct lex_source *src = lex_source__ (lexer);
+  return (src
+          ? lex_token_end_point (src, lex_source_ofs__ (src, ofs))
+          : (struct msg_point) { 0, 0 });
+}
+
 /* Returns the text of the syntax in tokens N0 ahead of the current one,
    through N1 ahead of the current one, inclusive.  (For example, if N0 and N1
-   are both zero, this requests the syntax for the current token.)  The caller
-   must eventually free the returned string (with free()).  The syntax is
-   encoded in UTF-8 and in the original form supplied to the lexer so that, for
-   example, it may include comments, spaces, and new-lines if it spans multiple
-   tokens.  Macro expansion, however, has already been performed. */
+   are both zero, this requests the syntax for the current token.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
 char *
 lex_next_representation (const struct lexer *lexer, int n0, int n1)
 {
-  return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+  const struct lex_source *src = lex_source__ (lexer);
+  return (src
+          ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
+          : xstrdup (""));
+}
+
+
+/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
+   inclusive.  (For example, if OFS0 and OFS1 are both zero, this requests the
+   syntax for the first token in the current command.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
+char *
+lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
+{
+  const struct lex_source *src = lex_source__ (lexer);
+  return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
 }
 
 /* Returns true if the token N ahead of the current one was produced by macro
@@ -1159,22 +1635,31 @@ lex_tokens_match (const struct token *actual, const struct token *expected)
     }
 }
 
-static size_t
-lex_at_phrase__ (struct lexer *lexer, const char *s)
+static bool
+lex_ofs_at_phrase__ (struct lexer *lexer, int ofs, const char *s,
+                     size_t *n_matchedp)
 {
   struct string_lexer slex;
   struct token token;
 
-  size_t i = 0;
+  size_t n_matched = 0;
+  bool all_matched = true;
   string_lexer_init (&slex, s, strlen (s), SEG_MODE_INTERACTIVE, true);
   while (string_lexer_next (&slex, &token))
     {
-      bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+      bool match = lex_tokens_match (lex_ofs_token (lexer, ofs + n_matched),
+                                     &token);
       token_uninit (&token);
       if (!match)
-        return 0;
+        {
+          all_matched = false;
+          break;
+        }
+      n_matched++;
     }
-  return i;
+  if (n_matchedp)
+    *n_matchedp = n_matched;
+  return all_matched;
 }
 
 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
@@ -1186,7 +1671,7 @@ lex_at_phrase__ (struct lexer *lexer, const char *s)
 bool
 lex_at_phrase (struct lexer *lexer, const char *s)
 {
-  return lex_at_phrase__ (lexer, s) > 0;
+  return lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, NULL);
 }
 
 /* If LEXER is positioned at the sequence of tokens that may be parsed from S,
@@ -1198,61 +1683,83 @@ lex_at_phrase (struct lexer *lexer, const char *s)
 bool
 lex_match_phrase (struct lexer *lexer, const char *s)
 {
-  size_t n = lex_at_phrase__ (lexer, s);
-  if (n > 0)
-    lex_get_n (lexer, n);
-  return n > 0;
+  size_t n_matched;
+  if (!lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched))
+    return false;
+  lex_get_n (lexer, n_matched);
+  return true;
 }
 
-static int
-count_newlines (char *s, size_t length)
-{
-  int n_newlines = 0;
-  char *newline;
-
-  while ((newline = memchr (s, '\n', length)) != NULL)
-    {
-      n_newlines++;
-      length -= (newline + 1) - s;
-      s = newline + 1;
-    }
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+   skips it and returns true.  Otherwise, issues an error and returns false.
 
-  return n_newlines;
+   S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+   "2SLS", or "END INPUT PROGRAM".  Identifiers may be abbreviated to their
+   first three letters. */
+bool
+lex_force_match_phrase (struct lexer *lexer, const char *s)
+{
+  size_t n_matched;
+  bool ok = lex_ofs_at_phrase__ (lexer, lex_ofs (lexer), s, &n_matched);
+  if (ok)
+    lex_get_n (lexer, n_matched);
+  else
+    lex_next_error (lexer, 0, n_matched, _("Syntax error expecting `%s'."), s);
+  return ok;
 }
 
+/* Returns the 1-based line number of the source text at the byte OFFSET in
+   SRC. */
 static int
-lex_token_get_last_line_number (const struct lex_source *src,
-                                const struct lex_token *token)
+lex_source_ofs_to_line_number (const struct lex_source *src, size_t offset)
 {
-  if (token->first_line == 0)
-    return 0;
-  else
+  size_t lo = 0;
+  size_t hi = src->n_lines;
+  for (;;)
     {
-      char *token_str = &src->buffer[token->token_pos];
-      return token->first_line + count_newlines (token_str, token->token_len) + 1;
+      size_t mid = (lo + hi) / 2;
+      if (mid + 1 >= src->n_lines)
+        return src->n_lines;
+      else if (offset >= src->lines[mid + 1])
+        lo = mid;
+      else if (offset < src->lines[mid])
+        hi = mid;
+      else
+        return mid + 1;
     }
 }
 
+/* Returns the 1-based column number of the source text at the byte OFFSET in
+   SRC. */
 static int
-lex_token_get_column__ (const struct lex_source *src, size_t offset)
+lex_source_ofs_to_column_number (const struct lex_source *src, size_t offset)
 {
   const char *newline = memrchr (src->buffer, '\n', offset);
   size_t line_ofs = newline ? newline - src->buffer + 1 : 0;
   return utf8_count_columns (&src->buffer[line_ofs], offset - line_ofs) + 1;
 }
 
-static int
-lex_token_get_first_column (const struct lex_source *src,
-                            const struct lex_token *token)
+static struct msg_point
+lex_source_ofs_to_point__ (const struct lex_source *src, size_t offset)
 {
-  return lex_token_get_column__ (src, token->token_pos);
+  return (struct msg_point) {
+    .line = lex_source_ofs_to_line_number (src, offset),
+    .column = lex_source_ofs_to_column_number (src, offset),
+  };
 }
 
-static int
-lex_token_get_last_column (const struct lex_source *src,
-                           const struct lex_token *token)
+static struct msg_point
+lex_token_start_point (const struct lex_source *src,
+                       const struct lex_token *token)
+{
+  return lex_source_ofs_to_point__ (src, token->token_pos);
+}
+
+static struct msg_point
+lex_token_end_point (const struct lex_source *src,
+                     const struct lex_token *token)
 {
-  return lex_token_get_column__ (src, token->token_pos + token->token_len);
+  return lex_source_ofs_to_point__ (src, lex_token_end (token));
 }
 
 static struct msg_location
@@ -1261,11 +1768,10 @@ lex_token_location (const struct lex_source *src,
                     const struct lex_token *t1)
 {
   return (struct msg_location) {
-    .file_name = src->reader->file_name,
-    .first_line = t0->first_line,
-    .last_line = lex_token_get_last_line_number (src, t1),
-    .first_column = lex_token_get_first_column (src, t0),
-    .last_column = lex_token_get_last_column (src, t1),
+    .file_name = intern_new_if_nonnull (src->reader->file_name),
+    .start = lex_token_start_point (src, t0),
+    .end = lex_token_end_point (src, t1),
+    .src = CONST_CAST (struct lex_source *, src),
   };
 }
 
@@ -1279,67 +1785,11 @@ lex_token_location_rw (const struct lex_source *src,
 }
 
 static struct msg_location *
-lex_source_get_location (const struct lex_source *src, int n0, int n1)
+lex_source_get_location (const struct lex_source *src, int ofs0, int ofs1)
 {
   return lex_token_location_rw (src,
-                                lex_source_next__ (src, n0),
-                                lex_source_next__ (src, n1));
-}
-
-/* Returns the 1-based line number of the start of the syntax that represents
-   the token N after the current one in LEXER.  Returns 0 for a T_STOP token or
-   if the token is drawn from a source that does not have line numbers. */
-int
-lex_get_first_line_number (const struct lexer *lexer, int n)
-{
-  const struct lex_source *src = lex_source__ (lexer);
-  return src ? lex_source_next__ (src, n)->first_line : 0;
-}
-
-/* Returns the 1-based line number of the end of the syntax that represents the
-   token N after the current one in LEXER, plus 1.  Returns 0 for a T_STOP
-   token or if the token is drawn from a source that does not have line
-   numbers.
-
-   Most of the time, a single token is wholly within a single line of syntax,
-   but there are two exceptions: a T_STRING token can be made up of multiple
-   segments on adjacent lines connected with "+" punctuators, and a T_NEG_NUM
-   token can consist of a "-" on one line followed by the number on the next.
- */
-int
-lex_get_last_line_number (const struct lexer *lexer, int n)
-{
-  const struct lex_source *src = lex_source__ (lexer);
-  return src ? lex_token_get_last_line_number (src,
-                                               lex_source_next__ (src, n)) : 0;
-}
-
-/* Returns the 1-based column number of the start of the syntax that represents
-   the token N after the current one in LEXER.  Returns 0 for a T_STOP
-   token.
-
-   Column numbers are measured according to the width of characters as shown in
-   a typical fixed-width font, in which CJK characters have width 2 and
-   combining characters have width 0.  */
-int
-lex_get_first_column (const struct lexer *lexer, int n)
-{
-  const struct lex_source *src = lex_source__ (lexer);
-  return src ? lex_token_get_first_column (src, lex_source_next__ (src, n)) : 0;
-}
-
-/* Returns the 1-based column number of the end of the syntax that represents
-   the token N after the current one in LEXER, plus 1.  Returns 0 for a T_STOP
-   token.
-
-   Column numbers are measured according to the width of characters as shown in
-   a typical fixed-width font, in which CJK characters have width 2 and
-   combining characters have width 0.  */
-int
-lex_get_last_column (const struct lexer *lexer, int n)
-{
-  const struct lex_source *src = lex_source__ (lexer);
-  return src ? lex_token_get_last_column (src, lex_source_next__ (src, n)) : 0;
+                                lex_source_ofs__ (src, ofs0),
+                                lex_source_ofs__ (src, ofs1));
 }
 
 /* Returns the name of the syntax file from which the current command is drawn.
@@ -1361,26 +1811,15 @@ lex_get_file_name (const struct lexer *lexer)
    must eventually free the location (with msg_location_destroy()). */
 struct msg_location *
 lex_get_location (const struct lexer *lexer, int n0, int n1)
-{
-  struct msg_location *loc = lex_get_lines (lexer, n0, n1);
-  loc->first_column = lex_get_first_column (lexer, n0);
-  loc->last_column = lex_get_last_column (lexer, n1);
-  return loc;
-}
-
-/* Returns a newly allocated msg_location for the syntax that represents tokens
-   with 0-based offsets N0...N1, inclusive, from the current token.  The
-   location only covers the tokens' lines, not the columns.  The caller must
-   eventually free the location (with msg_location_destroy()). */
-struct msg_location *
-lex_get_lines (const struct lexer *lexer, int n0, int n1)
 {
   struct msg_location *loc = xmalloc (sizeof *loc);
   *loc = (struct msg_location) {
-    .file_name = xstrdup_if_nonnull (lex_get_file_name (lexer)),
-    .first_line = lex_get_first_line_number (lexer, n0),
-    .last_line = lex_get_last_line_number (lexer, n1),
+    .file_name = intern_new_if_nonnull (lex_get_file_name (lexer)),
+    .start = lex_ofs_start_point (lexer, n0 + lex_ofs (lexer)),
+    .end = lex_ofs_end_point (lexer, n1 + lex_ofs (lexer)),
+    .src = lex_source__ (lexer),
   };
+  lex_source_ref (loc->src);
   return loc;
 }
 
@@ -1434,7 +1873,7 @@ lex_interactive_reset (struct lexer *lexer)
     {
       src->length = 0;
       src->journal_pos = src->seg_pos = 0;
-      src->n_newlines = 0;
+      src->n_lines = 0;
       src->suppress_next_newline = false;
       src->segmenter = segmenter_init (segmenter_get_mode (&src->segmenter),
                                        false);
@@ -1460,16 +1899,21 @@ void
 lex_discard_noninteractive (struct lexer *lexer)
 {
   struct lex_source *src = lex_source__ (lexer);
-
   if (src != NULL)
     {
+      if (src->reader->error == LEX_ERROR_IGNORE)
+        return;
+
       lex_stage_clear (&src->pp);
       lex_stage_clear (&src->merge);
       lex_source_clear_parse (src);
 
       for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
            src = lex_source__ (lexer))
-        lex_source_destroy (src);
+        {
+          ll_remove (&src->ll);
+          lex_source_unref (src);
+        }
     }
 }
 
@@ -1514,32 +1958,39 @@ lex_source__ (const struct lexer *lexer)
           : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
 }
 
-/* Returns the text of the syntax in SRC for tokens N0 ahead of the current
-   one, through N1 ahead of the current one, inclusive.  (For example, if N0
-   and N1 are both zero, this requests the syntax for the current token.)  The
-   caller must eventually free the returned string (with free()).  The syntax
-   is encoded in UTF-8 and in the original form supplied to the lexer so that,
-   for example, it may include comments, spaces, and new-lines if it spans
-   multiple tokens.  Macro expansion, however, has already been performed. */
+const struct lex_source *
+lex_source (const struct lexer *lexer)
+{
+  return lex_source__ (lexer);
+}
+
+/* Returns the text of the syntax in SRC for tokens with offsets OFS0 through
+   OFS1 in the current command, inclusive.  (For example, if OFS0 and OFS1 are
+   both zero, this requests the syntax for the first token in the current
+   command.)  The caller must eventually free the returned string (with
+   free()).  The syntax is encoded in UTF-8 and in the original form supplied
+   to the lexer so that, for example, it may include comments, spaces, and
+   new-lines if it spans multiple tokens.  Macro expansion, however, has
+   already been performed. */
 static char *
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1)
 {
   struct string s = DS_EMPTY_INITIALIZER;
-  for (size_t i = n0; i <= n1; )
+  for (size_t i = ofs0; i <= ofs1; )
     {
       /* Find [I,J) as the longest sequence of tokens not produced by macro
          expansion, or otherwise the longest sequence expanded from a single
          macro call. */
-      const struct lex_token *first = lex_source_next__ (src, i);
+      const struct lex_token *first = lex_source_ofs__ (src, i);
       size_t j;
-      for (j = i + 1; j <= n1; j++)
+      for (j = i + 1; j <= ofs1; j++)
         {
-          const struct lex_token *cur = lex_source_next__ (src, j);
+          const struct lex_token *cur = lex_source_ofs__ (src, j);
           if ((first->macro_rep != NULL) != (cur->macro_rep != NULL)
               || first->macro_rep != cur->macro_rep)
             break;
         }
-      const struct lex_token *last = lex_source_next__ (src, j - 1);
+      const struct lex_token *last = lex_source_ofs__ (src, j - 1);
 
       /* Now add the syntax for this sequence of tokens to SRC. */
       if (!ds_is_empty (&s))
@@ -1564,10 +2015,10 @@ lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
 }
 
 static bool
-lex_source_contains_macro_call (struct lex_source *src, int n0, int n1)
+lex_source_contains_macro_call (struct lex_source *src, int ofs0, int ofs1)
 {
-  for (size_t i = n0; i <= n1; i++)
-    if (lex_source_next__ (src, i)->macro_rep)
+  for (int i = ofs0; i <= ofs1; i++)
+    if (lex_source_ofs__ (src, i)->macro_rep)
       return true;
   return false;
 }
@@ -1582,13 +2033,13 @@ lex_source_contains_macro_call (struct lex_source *src, int n0, int n1)
 
    The caller must not modify or free the returned string. */
 static struct substring
-lex_source_get_macro_call (struct lex_source *src, int n0, int n1)
+lex_source_get_macro_call (struct lex_source *src, int ofs0, int ofs1)
 {
-  if (!lex_source_contains_macro_call (src, n0, n1))
+  if (!lex_source_contains_macro_call (src, ofs0, ofs1))
     return ss_empty ();
 
-  const struct lex_token *token0 = lex_source_next__ (src, n0);
-  const struct lex_token *token1 = lex_source_next__ (src, MAX (n0, n1));
+  const struct lex_token *token0 = lex_source_ofs__ (src, ofs0);
+  const struct lex_token *token1 = lex_source_ofs__ (src, MAX (ofs0, ofs1));
   size_t start = token0->token_pos;
   size_t end = token1->token_pos + token1->token_len;
 
@@ -1596,63 +2047,39 @@ lex_source_get_macro_call (struct lex_source *src, int n0, int n1)
 }
 
 static void
-lex_source_error_valist (struct lex_source *src, int n0, int n1,
-                         const char *format, va_list args)
+lex_source_msg_valist (struct lex_source *src, enum msg_class class,
+                       int ofs0, int ofs1, const char *format, va_list args)
 {
-  const struct lex_token *token;
-  struct string s;
-
-  ds_init_empty (&s);
+  struct string s = DS_EMPTY_INITIALIZER;
 
-  token = lex_source_next__ (src, n0);
-  if (token->token.type == T_ENDCMD)
-    ds_put_cstr (&s, _("Syntax error at end of command"));
-  else
+  if (src)
     {
-      /* Get the syntax that caused the error. */
-      char *raw_syntax = lex_source_get_syntax__ (src, n0, n1);
-      char syntax[64];
-      str_ellipsize (ss_cstr (raw_syntax), syntax, sizeof syntax);
-      free (raw_syntax);
-
       /* Get the macro call(s) that expanded to the syntax that caused the
          error. */
       char call[64];
-      str_ellipsize (lex_source_get_macro_call (src, n0, n1),
+      str_ellipsize (lex_source_get_macro_call (src, ofs0, ofs1),
                      call, sizeof call);
-
-      if (syntax[0])
-        {
-          if (call[0])
-            ds_put_format (&s,
-                           _("Syntax error at `%s' (in expansion of `%s')"),
-                           syntax, call);
-          else
-            ds_put_format (&s, _("Syntax error at `%s'"), syntax);
-        }
-      else
-        {
-          if (call[0])
-            ds_put_format (&s, _("Syntax error in syntax expanded from `%s'"),
-                           call);
-          else
-            ds_put_cstr (&s, _("Syntax error"));
-        }
+      if (call[0])
+        ds_put_format (&s, _("In syntax expanded from `%s'"), call);
     }
+  else
+    ds_put_cstr (&s, _("At end of input"));
 
+  if (!ds_is_empty (&s))
+    ds_put_cstr (&s, ": ");
   if (format)
-    {
-      ds_put_cstr (&s, ": ");
-      ds_put_vformat (&s, format, args);
-    }
+    ds_put_vformat (&s, format, args);
+  else
+    ds_put_cstr (&s, _("Syntax error."));
+
   if (ds_last (&s) != '.')
     ds_put_byte (&s, '.');
 
   struct msg *m = xmalloc (sizeof *m);
   *m = (struct msg) {
-    .category = MSG_C_SYNTAX,
-    .severity = MSG_S_ERROR,
-    .location = lex_source_get_location (src, n0, n1),
+    .category = msg_class_to_category (class),
+    .severity = msg_class_to_severity (class),
+    .location = src ? lex_source_get_location (src, ofs0, ofs1) : NULL,
     .text = ds_steal_cstr (&s),
   };
   msg_emit (m);
@@ -1666,8 +2093,7 @@ lex_get_error (struct lex_source *src, const struct lex_token *token)
                  syntax, sizeof syntax);
 
   struct string s = DS_EMPTY_INITIALIZER;
-  ds_put_format (&s, _("Syntax error at `%s'"), syntax);
-  ds_put_format (&s, ": %s", token->token.string.string);
+  ds_put_cstr (&s, token->token.string.string);
 
   struct msg *m = xmalloc (sizeof *m);
   *m = (struct msg) {
@@ -1693,10 +2119,6 @@ lex_source_try_get_pp (struct lex_source *src)
   token->macro_rep = NULL;
   token->ref_cnt = NULL;
   token->token_pos = src->seg_pos;
-  if (src->reader->line_number > 0)
-    token->first_line = src->reader->line_number + src->n_newlines;
-  else
-    token->first_line = 0;
 
   /* Extract a segment. */
   const char *segment;
@@ -1720,7 +2142,12 @@ lex_source_try_get_pp (struct lex_source *src)
   token->token_len = seg_len;
   src->seg_pos += seg_len;
   if (seg_type == SEG_NEWLINE)
-    src->n_newlines++;
+    {
+      if (src->n_lines >= src->allocated_lines)
+        src->lines = x2nrealloc (src->lines, &src->allocated_lines,
+                                 sizeof *src->lines);
+      src->lines[src->n_lines++] = src->seg_pos;
+    }
 
   /* Get a token from the segment. */
   enum tokenize_result result = token_from_segment (
@@ -1838,11 +2265,9 @@ lex_source_try_get_merge (const struct lex_source *src_)
         }
 
       const struct lex_token *t = lex_stage_nth (&src->pp, ofs);
-      size_t start = t->token_pos;
-      size_t end = t->token_pos + t->token_len;
       const struct macro_token mt = {
         .token = t->token,
-        .syntax = ss_buffer (&src->buffer[start], end - start),
+        .syntax = ss_buffer (&src->buffer[t->token_pos], t->token_len),
       };
       const struct msg_location loc = lex_token_location (src, t, t);
       n_call = macro_call_add (mc, &mt, &loc);
@@ -1891,7 +2316,6 @@ lex_source_try_get_merge (const struct lex_source *src_)
             .token = expansion.mts[i].token,
             .token_pos = c0->token_pos,
             .token_len = (c1->token_pos + c1->token_len) - c0->token_pos,
-            .first_line = c0->first_line,
             .macro_rep = macro_rep,
             .ofs = ofs[i],
             .len = len[i],
@@ -1967,7 +2391,6 @@ lex_source_get_parse (struct lex_source *src)
             .token = out,
             .token_pos = first->token_pos,
             .token_len = (last->token_pos - first->token_pos) + last->token_len,
-            .first_line = first->first_line,
 
             /* This works well if all the tokens were not expanded from macros,
                or if they came from the same macro expansion.  It just gives up
@@ -2018,11 +2441,19 @@ lex_source_clear_parse (struct lex_source *src)
 static struct lex_source *
 lex_source_create (struct lexer *lexer, struct lex_reader *reader)
 {
+  size_t allocated_lines = 4;
+  size_t *lines = xmalloc (allocated_lines * sizeof *lines);
+  *lines = 0;
+
   struct lex_source *src = xmalloc (sizeof *src);
   *src = (struct lex_source) {
+    .n_refs = 1,
     .reader = reader,
     .segmenter = segmenter_init (reader->syntax, false),
     .lexer = lexer,
+    .lines = lines,
+    .n_lines = 1,
+    .allocated_lines = allocated_lines,
   };
 
   lex_source_push_endcmd__ (src);
@@ -2030,9 +2461,43 @@ lex_source_create (struct lexer *lexer, struct lex_reader *reader)
   return src;
 }
 
-static void
-lex_source_destroy (struct lex_source *src)
+void
+lex_set_message_handler (struct lexer *lexer,
+                         void (*output_msg) (const struct msg *,
+                                             struct lexer *))
+{
+  struct msg_handler msg_handler = {
+    .output_msg = (void (*)(const struct msg *, void *)) output_msg,
+    .aux = lexer,
+    .lex_source_ref = lex_source_ref,
+    .lex_source_unref = lex_source_unref,
+    .lex_source_get_line = lex_source_get_line,
+  };
+  msg_set_handler (&msg_handler);
+}
+
+struct lex_source *
+lex_source_ref (const struct lex_source *src_)
 {
+  struct lex_source *src = CONST_CAST (struct lex_source *, src_);
+  if (src)
+    {
+      assert (src->n_refs > 0);
+      src->n_refs++;
+    }
+  return src;
+}
+
+void
+lex_source_unref (struct lex_source *src)
+{
+  if (!src)
+    return;
+
+  assert (src->n_refs > 0);
+  if (--src->n_refs > 0)
+    return;
+
   char *file_name = src->reader->file_name;
   char *encoding = src->reader->encoding;
   if (src->reader->class->destroy != NULL)
@@ -2040,11 +2505,11 @@ lex_source_destroy (struct lex_source *src)
   free (file_name);
   free (encoding);
   free (src->buffer);
+  free (src->lines);
   lex_stage_uninit (&src->pp);
   lex_stage_uninit (&src->merge);
   lex_source_clear_parse (src);
   free (src->parse);
-  ll_remove (&src->ll);
   free (src);
 }
 
@@ -2166,9 +2631,7 @@ lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
 struct lex_reader *
 lex_reader_for_string (const char *s, const char *encoding)
 {
-  struct substring ss;
-  ss_alloc_substring (&ss, ss_cstr (s));
-  return lex_reader_for_substring_nocopy (ss, encoding);
+  return lex_reader_for_substring_nocopy (ss_clone (ss_cstr (s)), encoding);
 }
 
 /* Formats FORMAT as a printf()-like format string and creates and returns a
@@ -2220,3 +2683,21 @@ static struct lex_reader_class lex_string_reader_class =
     lex_string_read,
     lex_string_close
   };
+
+struct substring
+lex_source_get_line (const struct lex_source *src, int line)
+{
+  if (line < 1 || line > src->n_lines)
+    return ss_empty ();
+
+  size_t ofs = src->lines[line - 1];
+  size_t end;
+  if (line < src->n_lines)
+    end = src->lines[line];
+  else
+    {
+      const char *newline = memchr (src->buffer + ofs, '\n', src->length - ofs);
+      end = newline ? newline - src->buffer : src->length;
+    }
+  return ss_buffer (&src->buffer[ofs], end - ofs);
+}