message: Intern file names in msg_location to make them cheaper to copy.
[pspp] / src / language / control / define.c
index 8f3bfa23aa0976d2cc1ba5d2b254bfb7c5dca446..f33e885925a807b67071d1832b2de8442f3c7845 100644 (file)
@@ -23,6 +23,7 @@
 #include "language/lexer/macro.h"
 #include "language/lexer/scan.h"
 #include "language/lexer/token.h"
+#include "libpspp/intern.h"
 #include "libpspp/message.h"
 
 #include "gl/xalloc.h"
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-static bool
-force_macro_id (struct lexer *lexer)
-{
-  return lex_token (lexer) == T_MACRO_ID || lex_force_id (lexer);
-}
-
 static bool
 match_macro_id (struct lexer *lexer, const char *keyword)
 {
@@ -77,20 +72,49 @@ parse_quoted_token (struct lexer *lexer, struct token *token)
   return true;
 }
 
+static bool
+dup_arg_type (struct lexer *lexer, bool *saw_arg_type)
+{
+  if (*saw_arg_type)
+    {
+      lex_error (lexer, _("Only one of !TOKENS, !CHAREND, !ENCLOSE, or "
+                          "!CMDEND is allowed."));
+      return false;
+    }
+  else
+    {
+      *saw_arg_type = true;
+      return true;
+    }
+}
+
 int
 cmd_define (struct lexer *lexer, struct dataset *ds UNUSED)
 {
-  if (!force_macro_id (lexer))
-    return CMD_FAILURE;
+  /* Parse macro name.
+
+     The macro name is a T_STRING token, even though it's an identifier,
+     because that's the way that the segmenter prevents it from getting
+     macro-expanded. */
+  if (lex_token (lexer) != T_STRING)
+    {
+      lex_error (lexer, _("expecting identifier"));
+      return CMD_FAILURE;
+    }
+  const char *name = lex_tokcstr (lexer);
+  if (!id_is_plausible (name + (name[0] == '!'), false))
+    {
+      lex_error (lexer, _("expecting identifier"));
+      return CMD_FAILURE;
+    }
 
-  /* Parse macro name. */
   struct macro *m = xmalloc (sizeof *m);
   *m = (struct macro) {
-    .name = ss_xstrdup (lex_tokss (lexer)),
+    .name = xstrdup (name),
     .location = xmalloc (sizeof *m->location),
   };
   *m->location = (struct msg_location) {
-    .file_name = xstrdup_if_nonnull (lex_get_file_name (lexer)),
+    .file_name = intern_new_if_nonnull (lex_get_file_name (lexer)),
     .first_line = lex_get_first_line_number (lexer, 0),
   };
   lex_get (lexer);
@@ -144,79 +168,99 @@ cmd_define (struct lexer *lexer, struct dataset *ds UNUSED)
           p->positional = false;
           p->name = xasprintf ("!%s", lex_tokcstr (lexer));
           lex_get (lexer);
-
-          if (!lex_force_match (lexer, T_EQUALS))
-            goto error;
         }
+      lex_match (lexer, T_EQUALS);
 
-      /* Parse default value. */
-      if (match_macro_id (lexer, "!DEFAULT"))
+      bool saw_default = false;
+      bool saw_arg_type = false;
+      for (;;)
         {
-          if (!lex_force_match (lexer, T_LPAREN))
-            goto error;
-
-          /* XXX Should this handle balanced inner parentheses? */
-          while (!lex_match (lexer, T_RPAREN))
+          if (match_macro_id (lexer, "!DEFAULT"))
             {
-              if (lex_token (lexer) == T_ENDCMD)
+              if (saw_default)
                 {
-                  lex_error_expecting (lexer, ")");
+                  lex_error (lexer,
+                             _("!DEFAULT is allowed only once per argument."));
                   goto error;
                 }
-              char *syntax = lex_next_representation (lexer, 0, 0);
-              const struct macro_token mt = {
-                .token = *lex_next (lexer, 0),
-                .syntax = ss_cstr (syntax),
-              };
-              macro_tokens_add (&p->def, &mt);
-              free (syntax);
+              saw_default = true;
+
+              if (!lex_force_match (lexer, T_LPAREN))
+                goto error;
 
+              /* XXX Should this handle balanced inner parentheses? */
+              while (!lex_match (lexer, T_RPAREN))
+                {
+                  if (lex_token (lexer) == T_ENDCMD)
+                    {
+                      lex_error_expecting (lexer, ")");
+                      goto error;
+                    }
+                  char *syntax = lex_next_representation (lexer, 0, 0);
+                  const struct macro_token mt = {
+                    .token = *lex_next (lexer, 0),
+                    .syntax = ss_cstr (syntax),
+                  };
+                  macro_tokens_add (&p->def, &mt);
+                  free (syntax);
+
+                  lex_get (lexer);
+                }
+            }
+          else if (match_macro_id (lexer, "!NOEXPAND"))
+            p->expand_arg = false;
+          else if (match_macro_id (lexer, "!TOKENS"))
+            {
+              if (!dup_arg_type (lexer, &saw_arg_type)
+                  || !lex_force_match (lexer, T_LPAREN)
+                  || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX))
+                goto error;
+              p->arg_type = ARG_N_TOKENS;
+              p->n_tokens = lex_integer (lexer);
               lex_get (lexer);
+              if (!lex_force_match (lexer, T_RPAREN))
+                goto error;
             }
-        }
+          else if (match_macro_id (lexer, "!CHAREND"))
+            {
+              if (!dup_arg_type (lexer, &saw_arg_type))
+                goto error;
 
-      if (match_macro_id (lexer, "!NOEXPAND"))
-        p->expand_arg = false;
+              p->arg_type = ARG_CHAREND;
 
-      if (match_macro_id (lexer, "!TOKENS"))
-        {
-          if (!lex_force_match (lexer, T_LPAREN)
-              || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX))
-            goto error;
-          p->arg_type = ARG_N_TOKENS;
-          p->n_tokens = lex_integer (lexer);
-          lex_get (lexer);
-          if (!lex_force_match (lexer, T_RPAREN))
-            goto error;
-        }
-      else if (match_macro_id (lexer, "!CHAREND"))
-        {
-          p->arg_type = ARG_CHAREND;
-          p->charend = (struct token) { .type = T_STOP };
+              if (!lex_force_match (lexer, T_LPAREN)
+                  || !parse_quoted_token (lexer, &p->end)
+                  || !lex_force_match (lexer, T_RPAREN))
+                goto error;
+            }
+          else if (match_macro_id (lexer, "!ENCLOSE"))
+            {
+              if (!dup_arg_type (lexer, &saw_arg_type))
+                goto error;
 
-          if (!lex_force_match (lexer, T_LPAREN)
-              || !parse_quoted_token (lexer, &p->charend)
-              || !lex_force_match (lexer, T_RPAREN))
-            goto error;
-        }
-      else if (match_macro_id (lexer, "!ENCLOSE"))
-        {
-          p->arg_type = ARG_ENCLOSE;
-          p->enclose[0] = p->enclose[1] = (struct token) { .type = T_STOP };
-
-          if (!lex_force_match (lexer, T_LPAREN)
-              || !parse_quoted_token (lexer, &p->enclose[0])
-              || !lex_force_match (lexer, T_COMMA)
-              || !parse_quoted_token (lexer, &p->enclose[1])
-              || !lex_force_match (lexer, T_RPAREN))
-            goto error;
+              p->arg_type = ARG_ENCLOSE;
+
+              if (!lex_force_match (lexer, T_LPAREN)
+                  || !parse_quoted_token (lexer, &p->start)
+                  || !lex_force_match (lexer, T_COMMA)
+                  || !parse_quoted_token (lexer, &p->end)
+                  || !lex_force_match (lexer, T_RPAREN))
+                goto error;
+            }
+          else if (match_macro_id (lexer, "!CMDEND"))
+            {
+              if (!dup_arg_type (lexer, &saw_arg_type))
+                goto error;
+
+              p->arg_type = ARG_CMDEND;
+            }
+          else
+            break;
         }
-      else if (match_macro_id (lexer, "!CMDEND"))
-        p->arg_type = ARG_CMDEND;
-      else
+      if (!saw_arg_type)
         {
-          lex_error_expecting (lexer, "!TOKENS", "!CHAREND",
-                               "!ENCLOSE", "!CMDEND");
+          lex_error_expecting (lexer, "!TOKENS", "!CHAREND", "!ENCLOSE",
+                               "!CMDEND");
           goto error;
         }