lexer: New function lex_ofs_representation().

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)
diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c

index 908556a7eeaab39472de12da5a0434b2ad03a6c5..1fadf53b3079e9758bc4e13a5fc8bc36f1d0a046 100644 (file)
--- a/src/language/lexer/lexer.c
+++ b/src/language/lexer/lexer.c
@@ -282,8 +282,8 @@ struct lexer
    };
  
  static struct lex_source *lex_source__ (const struct lexer *);
-static char *lex_source_get_syntax__ (const struct lex_source *,
-                                      int n0, int n1);
+static char *lex_source_syntax__ (const struct lex_source *,
+                                  int ofs0, int ofs1);
  static const struct lex_token *lex_next__ (const struct lexer *, int n);
  static void lex_source_push_endcmd__ (struct lex_source *);
  static void lex_source_push_parse (struct lex_source *, struct lex_token *);
@@ -1451,15 +1451,37 @@ lex_ofs_end_point (const struct lexer *lexer, int ofs)
  
  /* Returns the text of the syntax in tokens N0 ahead of the current one,
     through N1 ahead of the current one, inclusive.  (For example, if N0 and N1
-   are both zero, this requests the syntax for the current token.)  The caller
-   must eventually free the returned string (with free()).  The syntax is
-   encoded in UTF-8 and in the original form supplied to the lexer so that, for
-   example, it may include comments, spaces, and new-lines if it spans multiple
-   tokens.  Macro expansion, however, has already been performed. */
+   are both zero, this requests the syntax for the current token.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
  char *
  lex_next_representation (const struct lexer *lexer, int n0, int n1)
  {
-  return lex_source_get_syntax__ (lex_source__ (lexer), n0, n1);
+  const struct lex_source *src = lex_source__ (lexer);
+  return (src
+          ? lex_source_syntax__ (src, n0 + src->parse_ofs, n1 + src->parse_ofs)
+          : xstrdup (""));
+}
+
+
+/* Returns the text of the syntax in tokens with offsets OFS0 to OFS1,
+   inclusive.  (For example, if OFS0 and OFS1 are both zero, this requests the
+   syntax for the first token in the current command.)
+
+   The caller must eventually free the returned string (with free()).  The
+   syntax is encoded in UTF-8 and in the original form supplied to the lexer so
+   that, for example, it may include comments, spaces, and new-lines if it
+   spans multiple tokens.  Macro expansion, however, has already been
+   performed. */
+char *
+lex_ofs_representation (const struct lexer *lexer, int ofs0, int ofs1)
+{
+  const struct lex_source *src = lex_source__ (lexer);
+  return src ? lex_source_syntax__ (src, ofs0, ofs1) : xstrdup ("");
  }
  
  /* Returns true if the token N ahead of the current one was produced by macro
@@ -1787,32 +1809,33 @@ lex_source__ (const struct lexer *lexer)
            : ll_data (ll_head (&lexer->sources), struct lex_source, ll));
  }
  
-/* Returns the text of the syntax in SRC for tokens N0 ahead of the current
-   one, through N1 ahead of the current one, inclusive.  (For example, if N0
-   and N1 are both zero, this requests the syntax for the current token.)  The
-   caller must eventually free the returned string (with free()).  The syntax
-   is encoded in UTF-8 and in the original form supplied to the lexer so that,
-   for example, it may include comments, spaces, and new-lines if it spans
-   multiple tokens.  Macro expansion, however, has already been performed. */
+/* Returns the text of the syntax in SRC for tokens with offsets OFS0 through
+   OFS1 in the current command, inclusive.  (For example, if OFS0 and OFS1 are
+   both zero, this requests the syntax for the first token in the current
+   command.)  The caller must eventually free the returned string (with
+   free()).  The syntax is encoded in UTF-8 and in the original form supplied
+   to the lexer so that, for example, it may include comments, spaces, and
+   new-lines if it spans multiple tokens.  Macro expansion, however, has
+   already been performed. */
  static char *
-lex_source_get_syntax__ (const struct lex_source *src, int n0, int n1)
+lex_source_syntax__ (const struct lex_source *src, int ofs0, int ofs1)
  {
    struct string s = DS_EMPTY_INITIALIZER;
-  for (size_t i = n0; i <= n1; )
+  for (size_t i = ofs0; i <= ofs1; )
      {
        /* Find [I,J) as the longest sequence of tokens not produced by macro
           expansion, or otherwise the longest sequence expanded from a single
           macro call. */
-      const struct lex_token *first = lex_source_next__ (src, i);
+      const struct lex_token *first = lex_source_ofs__ (src, i);
        size_t j;
-      for (j = i + 1; j <= n1; j++)
+      for (j = i + 1; j <= ofs1; j++)
          {
-          const struct lex_token *cur = lex_source_next__ (src, j);
+          const struct lex_token *cur = lex_source_ofs__ (src, j);
            if ((first->macro_rep != NULL) != (cur->macro_rep != NULL)
                || first->macro_rep != cur->macro_rep)
              break;
          }
-      const struct lex_token *last = lex_source_next__ (src, j - 1);
+      const struct lex_token *last = lex_source_ofs__ (src, j - 1);
  
        /* Now add the syntax for this sequence of tokens to SRC. */
        if (!ds_is_empty (&s))
@@ -1883,7 +1906,8 @@ lex_source_error_valist (struct lex_source *src, int n0, int n1,
    else
      {
        /* Get the syntax that caused the error. */
-      char *raw_syntax = lex_source_get_syntax__ (src, n0, n1);
+      char *raw_syntax = lex_source_syntax__ (src, n0 + src->parse_ofs,
+                                              n1 + src->parse_ofs);
        char syntax[64];
        str_ellipsize (ss_cstr (raw_syntax), syntax, sizeof syntax);
        free (raw_syntax);
diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h

index 764da74b19db207100e9935606c817f97eb67047..bb9d6a954db209d1c787512b4e9be32550809e25 100644 (file)
--- a/src/language/lexer/lexer.h
+++ b/src/language/lexer/lexer.h
@@ -166,6 +166,7 @@ struct msg_point lex_ofs_end_point (const struct lexer *, int ofs);
  
  /* Token representation. */
  char *lex_next_representation (const struct lexer *, int n0, int n1);
+char *lex_ofs_representation (const struct lexer *, int ofs0, int ofs1);
  bool lex_next_is_from_macro (const struct lexer *, int n);
  
  /* Current position. */
diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c

index 9bbc37f9a23e4c09ef72f052b46365b369648943..b1537cd92e01d9e810118b7730fd33ed82fded50 100644 (file)
--- a/src/language/stats/matrix.c
+++ b/src/language/stats/matrix.c
@@ -5367,25 +5367,12 @@ matrix_print_parse (struct matrix_state *s)
  
    if (lex_token (s->lexer) != T_SLASH && lex_token (s->lexer) != T_ENDCMD)
      {
-      size_t depth = 0;
-      for (size_t i = 0; ; i++)
-        {
-          enum token_type t = lex_next_token (s->lexer, i);
-          if (t == T_LPAREN || t == T_LBRACK || t == T_LCURLY)
-            depth++;
-          else if ((t == T_RPAREN || t == T_RBRACK || t == T_RCURLY) && depth)
-            depth--;
-          else if ((t == T_SLASH && !depth) || t == T_ENDCMD || t == T_STOP)
-            {
-              if (i > 0)
-                cmd->print.title = lex_next_representation (s->lexer, 0, i - 1);
-              break;
-            }
-        }
-
+      int start_ofs = lex_ofs (s->lexer);
        cmd->print.expression = matrix_parse_exp (s);
        if (!cmd->print.expression)
          goto error;
+      cmd->print.title = lex_ofs_representation (s->lexer, start_ofs,
+                                                 lex_ofs (s->lexer) - 1);
      }
  
    while (lex_match (s->lexer, T_SLASH))
diff --git a/src/language/utilities/title.c b/src/language/utilities/title.c

index 323bdf3fa51e79d35c375e8fc6a4dde22431a979..8ad6a4acb30b5723ea27e1ad0fc698f667c0b758 100644 (file)
--- a/src/language/utilities/title.c
+++ b/src/language/utilities/title.c
@@ -44,28 +44,18 @@ parse_title (struct lexer *lexer, void (*set_title) (const char *))
        set_title (lex_tokcstr (lexer));
        lex_get (lexer);
      }
-  else if (lex_token (lexer) == T_ENDCMD)
-    {
-      /* This would be a bad special case below because n-1 would be
-         SIZE_MAX. */
-      set_title ("");
-    }
    else
      {
-      /* Count the tokens in the title. */
-      size_t n = 0;
-      while (lex_next (lexer, n)->type != T_ENDCMD)
-        n++;
+      int start_ofs = lex_ofs (lexer);
+      while (lex_token (lexer) != T_ENDCMD)
+        lex_get (lexer);
  
        /* Get the raw representation of all the tokens, including any space
           between them, and use it as the title. */
-      char *title = lex_next_representation (lexer, 0, n - 1);
+      char *title = lex_ofs_representation (lexer, start_ofs,
+                                            lex_ofs (lexer) - 1);
        set_title (title);
        free (title);
-
-      /* Skip past the tokens. */
-      for (size_t i = 0; i < n; i++)
-        lex_get (lexer);
      }
    return CMD_SUCCESS;
  }
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 13 Feb 2022 19:07:22 +0000 (11:07 -0800)
src/language/lexer/lexer.c		patch \| blob \| history
src/language/lexer/lexer.h		patch \| blob \| history
src/language/stats/matrix.c		patch \| blob \| history
src/language/utilities/title.c		patch \| blob \| history