Be more careful about checking variable names.

author Ben Pfaff <blp@cs.stanford.edu>

Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index 5c8d6e8dc313f156c664db1417b9b4cf9f7a398a..10768ce9216f391a47e7f86b681608efad6f3d22 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -81,6 +81,10 @@ struct dictionary
      struct varset **varsets;    /* Variable sets. */
      size_t n_varsets;           /* Number of variable sets. */
  
+    /* Number of VAR### names created by dict_make_unique_var_name(), or
+       less. */
+    unsigned long int n_unique_names;
+
      /* Whether variable names must be valid identifiers.  Normally, this is
         true, but sometimes a dictionary is prepared for external use
         (e.g. output to a CSV file) where names don't have to be valid. */
@@ -176,14 +180,15 @@ dict_get_encoding (const struct dictionary *d)
  }
  
  /* Checks whether UTF-8 string ID is an acceptable identifier in DICT's
-   encoding.  Returns true if it is, otherwise an error message that the caller
-   must free(). */
+   encoding for a variable in the classes in CLASSES.  Returns true if it is,
+   otherwise an error message that the caller must free(). */
  char * WARN_UNUSED_RESULT
-dict_id_is_valid__ (const struct dictionary *dict, const char *id)
+dict_id_is_valid__ (const struct dictionary *dict, const char *id,
+                    enum dict_class classes)
  {
    if (!dict->names_must_be_ids)
      return NULL;
-  return id_is_valid__ (id, dict->encoding);
+  return id_is_valid__ (id, dict->encoding, classes);
  }
  
  static bool
@@ -199,11 +204,12 @@ error_to_bool (char *error)
  }
  
  /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
-   encoding, false otherwise. */
+   encoding for a variable in the classes in CLASSES, false otherwise. */
  bool
-dict_id_is_valid (const struct dictionary *dict, const char *id)
+dict_id_is_valid (const struct dictionary *dict, const char *id,
+                  enum dict_class classes)
  {
-  return error_to_bool (dict_id_is_valid__ (dict, id));
+  return error_to_bool (dict_id_is_valid__ (dict, id, classes));
  }
  
  void
@@ -460,6 +466,8 @@ dict_clear_split_vars (struct dictionary *d)
  static void
  dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks)
  {
+  d->n_unique_names = 0;
+
    int dict_index = var_get_dict_index (v);
  
    assert (dict_contains_var (d, v));
@@ -621,9 +629,7 @@ dict_clear__ (struct dictionary *d, bool skip_callbacks)
    /* FIXME?  Should we really clear case_limit, label, documents?
       Others are necessarily cleared by deleting all the variables.*/
    while (d->n_vars > 0)
-    {
-      dict_delete_var__ (d, d->vars[d->n_vars - 1].var, skip_callbacks);
-    }
+    dict_delete_var__ (d, d->vars[d->n_vars - 1].var, skip_callbacks);
  
    free (d->vars);
    d->vars = NULL;
@@ -861,6 +867,25 @@ dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
    return add_var (d, new_var);
  }
  
+/* Creates and returns a new variable in DICT with the given WIDTH.  Uses HINT
+   as the variable name, if it is nonnull, not already in use in DICT, and a
+   valid name for a DC_ORDINARY variable; otherwise, chooses a unique name with
+   HINT as a hint. */
+struct variable *
+dict_create_var_with_unique_name (struct dictionary *dict, const char *hint,
+                                  int width)
+{
+  const char *name = (hint
+                      && dict_id_is_valid (dict, hint, DC_ORDINARY)
+                      && !dict_lookup_var (dict, hint)
+                      ? hint
+                      : dict_make_unique_var_name (dict, hint));
+  struct variable *var = dict_create_var_assert (dict, name, width);
+  if (name != hint)
+    free (CONST_CAST (char *, name));
+  return var;
+}
+
  /* Returns the variable named NAME in D, or a null pointer if no
     variable has that name. */
  struct variable *
@@ -960,11 +985,13 @@ dict_reorder_vars (struct dictionary *d,
    reindex_vars (d, 0, d->n_vars, false);
  }
  
-/* Changes the name of variable V that is currently in a dictionary to
+/* Changes the name of variable V that is currently in dictionary D to
     NEW_NAME. */
  static void
-rename_var (struct variable *v, const char *new_name)
+rename_var (struct dictionary *d, struct variable *v, const char *new_name)
  {
+  d->n_unique_names = 0;
+
    struct vardict_info *vardict = var_get_vardict (v);
    var_clear_vardict (v);
    var_set_name (v, new_name);
@@ -985,7 +1012,7 @@ dict_try_rename_var (struct dictionary *d, struct variable *v,
  
    struct variable *old = var_clone (v);
    unindex_var (d, var_get_vardict (v));
-  rename_var (v, new_name);
+  rename_var (d, v, new_name);
    reindex_var (d, var_get_vardict (v), false);
  
    if (settings_get_algorithm () == ENHANCED)
@@ -1040,7 +1067,7 @@ dict_rename_vars (struct dictionary *d,
    for (i = 0; i < count; i++)
      {
        unindex_var (d, var_get_vardict (vars[i]));
-      rename_var (vars[i], new_names[i]);
+      rename_var (d, vars[i], new_names[i]);
      }
  
    /* Add the renamed variables back into the name hash,
@@ -1061,7 +1088,7 @@ dict_rename_vars (struct dictionary *d,
  
            for (i = 0; i < count; i++)
              {
-              rename_var (vars[i], old_names[i]);
+              rename_var (d, vars[i], old_names[i]);
                reindex_var (d, var_get_vardict (vars[i]), false);
              }
  
@@ -1116,7 +1143,7 @@ make_hinted_name (const struct dictionary *dict, const char *hint)
        mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
                           hint_len - ofs);
        if (rp == root
-          ? lex_uc_is_id1 (uc) && uc != '$'
+          ? lex_uc_is_id1 (uc) && uc != '$' && uc != '#' && uc != '@'
            : lex_uc_is_idn (uc))
          {
            if (dropped)
@@ -1163,47 +1190,33 @@ make_hinted_name (const struct dictionary *dict, const char *hint)
  }
  
  static char *
-make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
+make_numeric_name (struct dictionary *dict)
  {
-  unsigned long int number;
-
-  for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
-       number < ULONG_MAX;
-       number++)
+  while (dict->n_unique_names++ < ULONG_MAX)
      {
-      char name[3 + INT_STRLEN_BOUND (number) + 1];
-
-      sprintf (name, "VAR%03lu", number);
+      char *name = xasprintf ("VAR%03lu", dict->n_unique_names);
        if (dict_lookup_var (dict, name) == NULL)
-        {
-          if (num_start != NULL)
-            *num_start = number + 1;
-          return xstrdup (name);
-        }
+        return name;
+      free (name);
      }
  
    NOT_REACHED ();
  }
  
-
  /* Devises and returns a variable name unique within DICT.  The variable name
     is owned by the caller, which must free it with free() when it is no longer
-   needed.
+   needed.  The variable name will not begin with '$' or '#' or '@'.
  
     HINT, if it is non-null, is used as a suggestion that will be
     modified for suitability as a variable name and for
     uniqueness.
  
-   If HINT is null or entirely unsuitable, a name in the form
-   "VAR%03d" will be generated, where the smallest unused integer
-   value is used.  If NUM_START is non-null, then its value is
-   used as the minimum numeric value to check, and it is updated
-   to the next value to be checked.
-*/
+   If HINT is null or entirely unsuitable, uses a name in the form "VAR%03d",
+   using the smallest available integer. */
  char *
-dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
-                           unsigned long int *num_start)
+dict_make_unique_var_name (const struct dictionary *dict_, const char *hint)
  {
+  struct dictionary *dict = CONST_CAST (struct dictionary *, dict_);
    if (hint != NULL)
      {
        char *hinted_name = make_hinted_name (dict, hint);
@@ -1211,7 +1224,7 @@ dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
          return hinted_name;
      }
  
-  return make_numeric_name (dict, num_start);
+  return make_numeric_name (dict);
  }
  
  /* Returns whether variable names must be valid identifiers.  Normally, this is
diff --git a/src/data/dictionary.h b/src/data/dictionary.h

index efb587cf75af20da71685ee670be2cf49c5fe762..284d71ca95dd896c78f01e5136ed27f1d9a28919 100644 (file)
--- a/src/data/dictionary.h
+++ b/src/data/dictionary.h
@@ -64,6 +64,9 @@ struct variable *dict_clone_var_as (struct dictionary *,
  struct variable *dict_clone_var_as_assert (struct dictionary *,
                                             const struct variable *,
                                             const char *);
+struct variable *dict_create_var_with_unique_name (struct dictionary *,
+                                                   const char *hint,
+                                                   int width);
  
  /* Deleting variables. */
  void dict_delete_var (struct dictionary *, struct variable *);
@@ -86,8 +89,7 @@ void dict_rename_var (struct dictionary *, struct variable *, const char *);
  bool dict_rename_vars (struct dictionary *,
                         struct variable **, char **new_names,
                         size_t count, char **err_name);
-char *dict_make_unique_var_name (const struct dictionary *, const char *hint,
-                                 unsigned long int *num_start);
+char *dict_make_unique_var_name (const struct dictionary *, const char *hint);
  
  bool dict_get_names_must_be_ids (const struct dictionary *);
  void dict_set_names_must_be_ids (struct dictionary *, bool);
@@ -192,9 +194,11 @@ bool dict_has_attributes (const struct dictionary *);
  /* Data encoding. */
  const char *dict_get_encoding (const struct dictionary *d);
  
-char *dict_id_is_valid__ (const struct dictionary *, const char *id)
+char *dict_id_is_valid__ (const struct dictionary *, const char *id,
+                          enum dict_class)
    WARN_UNUSED_RESULT;
-bool dict_id_is_valid (const struct dictionary *, const char *id);
+bool dict_id_is_valid (const struct dictionary *, const char *id,
+                       enum dict_class);
  
  /* Functions to be called upon dictionary changes. */
  struct dict_callbacks
diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c

index a7f2d04aabc2fbbf2a18df052dccbfa31a524c6d..87d9db6a69deabbdd021a3113719b68a0c70b476 100644 (file)
--- a/src/data/gnumeric-reader.c
+++ b/src/data/gnumeric-reader.c
@@ -684,7 +684,6 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
    int type = 0;
    int x = 0;
    struct gnumeric_reader *r = NULL;
-  unsigned long int vstart = 0;
    int ret;
    casenumber n_cases = CASENUMBER_MAX;
    int i;
@@ -853,19 +852,17 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
  
    for (i = 0 ; i < n_var_specs ; ++i)
      {
-      char *name;
-
-      if ((var_spec[i].name == NULL) && (var_spec[i].first_value == NULL))
+      struct var_spec *vs = &var_spec[i];
+      if (vs->name == NULL && !vs->first_value)
         continue;
  
        /* Probably no data exists for this variable, so allocate a
          default width */
-      if (var_spec[i].width == -1)
-       var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH;
+      if (vs->width == -1)
+       vs->width = SPREADSHEET_DEFAULT_WIDTH;
  
-      name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart);
-      dict_create_var (r->spreadsheet.dict, name, var_spec[i].width);
-      free (name);
+      dict_create_var_with_unique_name (r->spreadsheet.dict, vs->name,
+                                        vs->width);
      }
  
    /* Create the first case, and cache it */
diff --git a/src/data/identifier.h b/src/data/identifier.h

index 4a23dd7dd5db1079f47e9edab4d2e9a172e1ed96..8fc25cccbad4198326c77a05e1ee536d69805125 100644 (file)
--- a/src/data/identifier.h
+++ b/src/data/identifier.h
@@ -20,6 +20,7 @@
  #include <ctype.h>
  #include <stdbool.h>
  #include <unitypes.h>
+#include "data/dict-class.h"
  #include "libpspp/str.h"
  #include "gl/verify.h"
  
@@ -89,9 +90,10 @@ bool lex_is_keyword (enum token_type);
  /* Validating identifiers. */
  #define ID_MAX_LEN 64          /* Maximum length of identifier, in bytes. */
  
-bool id_is_valid (const char *id, const char *dict_encoding);
+bool id_is_valid (const char *id, const char *dict_encoding, enum dict_class);
  bool id_is_plausible (const char *id);
-char *id_is_valid__ (const char *id, const char *dict_encoding)
+char *id_is_valid__ (const char *id, const char *dict_encoding,
+                     enum dict_class)
    WARN_UNUSED_RESULT;
  char *id_is_plausible__ (const char *id) WARN_UNUSED_RESULT;
  
diff --git a/src/data/identifier2.c b/src/data/identifier2.c

index d21c6bba5b50eb59262eb10336b6f8b11f1273e6..53f98ac6d4adedcff212c7173674d37f3674ad45 100644 (file)
--- a/src/data/identifier2.c
+++ b/src/data/identifier2.c
@@ -25,6 +25,7 @@
  #include <string.h>
  #include <unistr.h>
  
+#include "libpspp/assertion.h"
  #include "libpspp/cast.h"
  #include "libpspp/i18n.h"
  #include "libpspp/message.h"
@@ -47,11 +48,15 @@ error_to_bool (char *error)
  }
  
  /* Checks whether if UTF-8 string ID is an acceptable identifier in encoding
-   DICT_ENCODING (UTF-8 if null).  Returns NULL if it is acceptable, otherwise
-   an error message that the caller must free(). */
+   DICT_ENCODING (UTF-8 if null) for a variable in one of the classes in
+   CLASSES.  Returns NULL if it is acceptable, otherwise an error message that
+   the caller must free(). */
  char * WARN_UNUSED_RESULT
-id_is_valid__ (const char *id, const char *dict_encoding)
+id_is_valid__ (const char *id, const char *dict_encoding,
+               enum dict_class classes)
  {
+  assert (classes && !(classes & ~DC_ALL));
+
    char *error = id_is_plausible__ (id);
    if (error)
      return error;
@@ -59,13 +64,55 @@ id_is_valid__ (const char *id, const char *dict_encoding)
    size_t dict_len;
    if (dict_encoding != NULL)
      {
-      /* XXX need to reject recoded strings that contain the fallback
-         character. */
-      dict_len = recode_string_len (dict_encoding, "UTF-8", id, -1);
+      struct substring out;
+      bool ok = !recode_pedantically (dict_encoding, "UTF-8", ss_cstr (id),
+                                      NULL, &out);
+      dict_len = ss_length (out);
+      ss_dealloc (&out);
+      if (!ok)
+        return xasprintf (_("Identifier `%s' is not valid in encoding `%s'."
+                            "used for this dictionary."), id, dict_encoding);
      }
    else
      dict_len = strlen (id);
  
+  enum dict_class c = dict_class_from_id (id);
+  if (!(classes & c))
+    {
+      switch (c)
+        {
+        case DC_ORDINARY:
+          switch ((int) classes)
+            {
+            case DC_SYSTEM:
+              return xasprintf (_("`%s' is not valid here because this "
+                                  "identifier must start with `$'."), id);
+
+            case DC_SCRATCH:
+              return xasprintf (_("`%s' is not valid here because this "
+                                  "identifier must start with `#'."), id);
+
+            case DC_SYSTEM | DC_SCRATCH:
+              return xasprintf (_("`%s' is not valid here because this "
+                                  "identifier must start with `$' or `#'."),
+                                id);
+
+            case DC_ORDINARY:
+            default:
+              NOT_REACHED ();
+            }
+          NOT_REACHED ();
+
+        case DC_SYSTEM:
+          return xasprintf (_("`%s' and other identifiers starting with `$' "
+                              "are not valid here."), id);
+
+        case DC_SCRATCH:
+          return xasprintf (_("`%s' and other identifiers starting with `#' "
+                              "are not valid here."), id);
+        }
+    }
+
    if (dict_len > ID_MAX_LEN)
      return xasprintf (_("Identifier `%s' exceeds %d-byte limit."),
                        id, ID_MAX_LEN);
@@ -74,11 +121,12 @@ id_is_valid__ (const char *id, const char *dict_encoding)
  }
  
  /* Returns true if UTF-8 string ID is an acceptable identifier in encoding
-   DICT_ENCODING (UTF-8 if null), false otherwise. */
+   DICT_ENCODING (UTF-8 if null) for variable in one of the classes in CLASSES,
+   false otherwise. */
  bool
-id_is_valid (const char *id, const char *dict_encoding)
+id_is_valid (const char *id, const char *dict_encoding, enum dict_class classes)
  {
-  return error_to_bool (id_is_valid__ (id, dict_encoding));
+  return error_to_bool (id_is_valid__ (id, dict_encoding, classes));
  }
  
  /* Checks whether UTF-8 string ID is an plausible identifier.  Returns NULL if
diff --git a/src/data/mrset.c b/src/data/mrset.c

index c5e6cd2ec2b80b8302e277cd7a6fbfd4c491eea1..c5e00439d55e755877e14257bde78079c9fa6a0f 100644 (file)
--- a/src/data/mrset.c
+++ b/src/data/mrset.c
@@ -73,16 +73,7 @@ mrset_destroy (struct mrset *mrset)
  char * WARN_UNUSED_RESULT
  mrset_is_valid_name__ (const char *name, const char *dict_encoding)
  {
-  char *error = id_is_valid__ (name, dict_encoding);
-  if (error)
-    return error;
-
-  if (name[0] != '$')
-    return xasprintf (_("%s is not a valid name for a multiple response "
-                        "set.  Multiple response set names must begin with "
-                        "`$'."), name);
-
-  return NULL;
+  return id_is_valid__ (name, dict_encoding, DC_SYSTEM);
  }
  
  static bool
diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c

index c95272364580179316c34c076ec43a4e5c90cc60..f05d68272969e659e4cd77c8a16b2dbc2a2114ca 100644 (file)
--- a/src/data/ods-reader.c
+++ b/src/data/ods-reader.c
@@ -771,7 +771,6 @@ ods_make_reader (struct spreadsheet *spreadsheet,
  {
    intf ret = 0;
    xmlChar *type = NULL;
-  unsigned long int vstart = 0;
    casenumber n_cases = CASENUMBER_MAX;
    int i;
    struct var_spec *var_spec = NULL;
@@ -936,15 +935,11 @@ ods_make_reader (struct spreadsheet *spreadsheet,
  
    for (i = 0; i < n_var_specs ; ++i)
      {
-      struct fmt_spec fmt;
-      struct variable *var = NULL;
-      char *name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart);
-      int width  = xmv_to_width (&var_spec[i].firstval, opts->asw);
-      dict_create_var (r->spreadsheet.dict, name, width);
-      free (name);
-
-      var = dict_get_var (r->spreadsheet.dict, i);
+      int width = xmv_to_width (&var_spec[i].firstval, opts->asw);
+      struct variable *var = dict_create_var_with_unique_name (
+        r->spreadsheet.dict, var_spec[i].name, width);
  
+      struct fmt_spec fmt;
        if (0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
         {
           fmt.type = FMT_DATE;
@@ -953,7 +948,6 @@ ods_make_reader (struct spreadsheet *spreadsheet,
         }
        else
         fmt = fmt_default_for_width (width);
-
        var_set_both_formats (var, fmt);
      }
  
diff --git a/src/data/pc+-file-reader.c b/src/data/pc+-file-reader.c

index 73b9ea804b715d83332d4af132d0225884dc0e48..8c35724138ad2a8836e7e6db2bca9c7de2ae24de 100644 (file)
--- a/src/data/pc+-file-reader.c
+++ b/src/data/pc+-file-reader.c
@@ -843,7 +843,7 @@ parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
            continue;
          }
  
-      if (!dict_id_is_valid (dict, name) || name[0] == '#')
+      if (!dict_id_is_valid (dict, name, DC_ORDINARY))
          {
            pcp_error (r, rec->pos, _("Invalid variable name `%s'."), name);
            return false;
@@ -852,12 +852,10 @@ parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
        struct variable *var = dict_create_var (dict, name, rec->width);
        if (var == NULL)
          {
-          char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
+          var = dict_create_var_with_unique_name (dict, name, rec->width);
            pcp_warn (r, rec->pos, _("Renaming variable with duplicate name "
                                     "`%s' to `%s'."),
-                    name, new_name);
-          var = dict_create_var_assert (dict, new_name, rec->width);
-          free (new_name);
+                    name, var_get_name (var));
          }
        if (rec->weight)
          {
diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c

index 2487ab2cb43f140d3fe6f9962b9a07afa4090d8e..50c4495ff8f11642a3b4f4c4ef5672e9334bfcb4 100644 (file)
--- a/src/data/por-file-reader.c
+++ b/src/data/por-file-reader.c
@@ -710,28 +710,13 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
        for (j = 0; j < 6; j++)
          fmt[j] = read_int (r);
  
-      if (!dict_id_is_valid (dict, name) || *name == '#' || *name == '$')
-        error (r, _("Invalid variable name `%s' in position %d."), name, i);
-      str_uppercase (name);
-
        if (width < 0 || width > 255)
         error (r, _("Bad width %d for variable %s."), width, name);
  
-      v = dict_create_var (dict, name, width);
-      if (v == NULL)
-        {
-          unsigned long int i;
-          for (i = 1; ; i++)
-            {
-              char *try_name = xasprintf ("%s_%lu", name, i);
-              v = dict_create_var (dict, try_name, width);
-              free (try_name);
-              if (v != NULL)
-                break;
-            }
-          warning (r, _("Duplicate variable name %s in position %d renamed "
-                        "to %s."), name, i, var_get_name (v));
-        }
+      v = dict_create_var_with_unique_name (dict, name, width);
+      if (utf8_strcasecmp (name, var_get_name (v)))
+        warning (r, _("Invalid or duplicate variable name %s in position %d "
+                      "renamed to %s."), name, i, var_get_name (v));
  
        print = convert_format (r, &fmt[0], v, &report_error);
        write = convert_format (r, &fmt[3], v, &report_error);
diff --git a/src/data/psql-reader.c b/src/data/psql-reader.c

index 8b83d018de0d9fe8de5a52889f6e896bf4f3b7ba..63d9cad3d1a9a46bb2eb6dbc6d820dc0d91d3118 100644 (file)
--- a/src/data/psql-reader.c
+++ b/src/data/psql-reader.c
@@ -180,13 +180,8 @@ static struct variable *
  create_var (struct psql_reader *r, struct fmt_spec fmt,
             int width, const char *suggested_name, int col)
  {
-  unsigned long int vx = 0;
-  struct variable *var;
-  char *name;
-
-  name = dict_make_unique_var_name (r->dict, suggested_name, &vx);
-  var = dict_create_var (r->dict, name, width);
-  free (name);
+  struct variable *var
+    = dict_create_var_with_unique_name (r->dict, suggested_name, width);
  
    var_set_both_formats (var, fmt);
  
diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c

index 491df7f996c349a853c3492d0a49ca575e717161..b0f60a00c28efa895fe48c0a4824f73efd900d83 100644 (file)
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -1400,15 +1400,6 @@ parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
    info->product = ss_xstrdup (product);
  }
  
-static struct variable *
-add_var_with_generated_name (struct dictionary *dict, int width)
-{
-  char *name = dict_make_unique_var_name (dict, NULL, NULL);
-  struct variable *var = dict_create_var_assert (dict, name, width);
-  free (name);
-  return var;
-}
-
  /* Reads a variable (type 2) record from R and adds the
     corresponding variable to DICT.
     Also skips past additional variable records for long string
@@ -1423,12 +1414,10 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
  
    for (rec = var_recs; rec < &var_recs[n_var_recs];)
      {
-      size_t n_values;
-      char *name;
-      size_t i;
+      char *name = recode_string_pool ("UTF-8", dict_encoding,
+                                       rec->name, -1, r->pool);
  
-      name = recode_string_pool ("UTF-8", dict_encoding,
-                                 rec->name, -1, r->pool);
+      /* Names are right-padded with spaces. */
        name[strcspn (name, " ")] = '\0';
  
        if (rec->width < 0 || rec->width > 255)
@@ -1438,25 +1427,13 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
            return false;
          }
  
-      struct variable *var;
-      if (!dict_id_is_valid (dict, name) || name[0] == '$' || name[0] == '#')
-        {
-          var = add_var_with_generated_name (dict, rec->width);
-          sys_warn (r, rec->pos, _("Renaming variable with invalid name "
-                                   "`%s' to `%s'."), name, var_get_name (var));
-        }
-      else
-        {
-          var = dict_create_var (dict, name, rec->width);
-          if (var == NULL)
-            {
-              var = add_var_with_generated_name (dict, rec->width);
-              sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
-                                       "`%s' to `%s'."),
-                        name, var_get_name (var));
-            }
-        }
+      struct variable *var = dict_create_var_with_unique_name (dict, name,
+                                                               rec->width);
        rec->var = var;
+      if (strcmp (var_get_name (var), name))
+        sys_warn (r, rec->pos, _("Renaming variable with invalid or duplicate "
+                                 "name `%s' to `%s'."),
+                  name, var_get_name (var));
  
        /* Set the short name the same as the long name (even if we renamed
           it). */
@@ -1500,14 +1477,14 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
                    ofs += 16;
                  }
  
-              for (i = 0; i < n_discrete; i++)
+              for (size_t i = 0; i < n_discrete; i++)
                  {
                    mv_add_num (&mv, parse_float (r, rec->missing, ofs));
                    ofs += 8;
                  }
              }
            else
-            for (i = 0; i < rec->missing_value_code; i++)
+            for (size_t i = 0; i < rec->missing_value_code; i++)
                mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
            var_set_missing_values (var, &mv);
          }
@@ -1520,8 +1497,8 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
  
        /* Account for values.
           Skip long string continuation records, if any. */
-      n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
-      for (i = 1; i < n_values; i++)
+      size_t n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
+      for (size_t i = 1; i < n_values; i++)
          if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
            {
              sys_error (r, rec->pos, _("Missing string continuation record."));
@@ -2056,8 +2033,7 @@ parse_long_var_name_map (struct sfm_reader *r,
    while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
      {
        /* Validate long name. */
-      if (!dict_id_is_valid (dict, long_name)
-          || long_name[0] == '$' || long_name[0] == '#')
+      if (!dict_id_is_valid (dict, long_name, DC_ORDINARY))
          {
            sys_warn (r, record->pos,
                      _("Long variable mapping from %s to invalid "
diff --git a/src/language/commands/attributes.c b/src/language/commands/attributes.c

index e646af3847ed45e8cc4ec58d98c583838e850562..deaadc18476ae44a97f09c5d318f184028ce7e25 100644 (file)
--- a/src/language/commands/attributes.c
+++ b/src/language/commands/attributes.c
@@ -88,7 +88,8 @@ parse_attribute_name (struct lexer *lexer, const char *dict_encoding,
  {
    if (!lex_force_id (lexer))
      return NULL;
-  char *error = id_is_valid__ (lex_tokcstr (lexer), dict_encoding);
+  char *error = id_is_valid__ (lex_tokcstr (lexer), dict_encoding,
+                               DC_ORDINARY | DC_SYSTEM | DC_SCRATCH);
    if (error)
      {
        lex_error (lexer, "%s", error);
diff --git a/src/language/commands/get-data.c b/src/language/commands/get-data.c

index 377e40c555554792a18ff12b35129a8843c70226..93c933ad6a6f576b7dd782392d8f9c8024edf313 100644 (file)
--- a/src/language/commands/get-data.c
+++ b/src/language/commands/get-data.c
@@ -511,7 +511,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
        if (!lex_force_id (lexer))
          goto error;
        name = xstrdup (lex_tokcstr (lexer));
-      char *error = dict_id_is_valid__ (dict, name);
+      char *error = dict_id_is_valid__ (dict, name, DC_ORDINARY);
        if (error)
          {
            lex_error (lexer, "%s", error);
diff --git a/src/language/commands/matrix.c b/src/language/commands/matrix.c

index 4dd70dcc84ca870b67c961ae4b9fb0f26877a0f6..bd6916a95144fd22891ebdd73006e54384c3bc3c 100644 (file)
--- a/src/language/commands/matrix.c
+++ b/src/language/commands/matrix.c
@@ -6170,7 +6170,7 @@ save_file_open (struct save_file *sf, gsl_matrix *m,
            for (size_t i = 0; i < nv.size; i++)
              {
                char *name = trimmed_string (gsl_vector_get (&nv, i));
-              char *error = dict_id_is_valid__ (dict, name);
+              char *error = dict_id_is_valid__ (dict, name, DC_ORDINARY);
                if (!error)
                  string_array_append_nocopy (&names, name);
                else
diff --git a/src/language/commands/vector.c b/src/language/commands/vector.c

index b9407cbb6a5c7959cfcc8bd668edbc983f022140..cece0afb38dc5210ca53de92d3a9186be94b11ce 100644 (file)
--- a/src/language/commands/vector.c
+++ b/src/language/commands/vector.c
@@ -58,7 +58,8 @@ cmd_vector (struct lexer *lexer, struct dataset *ds)
        size_t allocated_vectors = 0;
        while (lex_token (lexer) == T_ID)
         {
-          char *error = dict_id_is_valid__ (dict, lex_tokcstr (lexer));
+          char *error = dict_id_is_valid__ (dict, lex_tokcstr (lexer),
+                                            DC_ORDINARY | DC_SCRATCH);
            if (error)
              {
                lex_error (lexer, "%s", error);
@@ -176,7 +177,8 @@ cmd_vector (struct lexer *lexer, struct dataset *ds)
              for (size_t j = 0; j < n_vars; j++)
                {
                  char *name = xasprintf ("%s%zu", vectors[i], j + 1);
-                char *error = dict_id_is_valid__ (dict, name);
+                char *error = dict_id_is_valid__ (dict, name,
+                                                  DC_ORDINARY | DC_SCRATCH);
                  if (error)
                    {
                      lex_ofs_error (lexer, vectors_start, end_ofs, "%s", error);
diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c

index 57f7042eca11cae9702a7f71a80aeea1774dffe4..02bfcc9dfd3734edaca35b03e5daed8baddfbd8a 100644 (file)
--- a/src/language/lexer/variable-parser.c
+++ b/src/language/lexer/variable-parser.c
@@ -429,14 +429,15 @@ fail:
  }
  
  char *
-parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d)
+parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d,
+                     enum dict_class classes)
  {
    if (!is_dict_name_token (lexer, d))
      {
        lex_error (lexer, ("Syntax error expecting variable name."));
        return NULL;
      }
-  char *error = dict_id_is_valid__ (d, lex_tokcstr (lexer));
+  char *error = dict_id_is_valid__ (d, lex_tokcstr (lexer), classes);
    if (error)
      {
        lex_error (lexer, "%s", error);
@@ -547,18 +548,15 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict,
        names = NULL;
      }
  
+  enum dict_class classes = (pv_opts & PV_NO_SCRATCH
+                             ? DC_ORDINARY
+                             : DC_ORDINARY | DC_SCRATCH);
    do
      {
        int start_ofs = lex_ofs (lexer);
-      name1 = parse_DATA_LIST_var (lexer, dict);
+      name1 = parse_DATA_LIST_var (lexer, dict, classes);
        if (!name1)
          goto exit;
-      if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH)
-       {
-         lex_ofs_error (lexer, start_ofs, start_ofs,
-                         _("Scratch variables not allowed here."));
-         goto exit;
-       }
        if (lex_match (lexer, T_TO))
         {
           unsigned long int num1, num2;
@@ -566,7 +564,7 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict,
            int root_len1, root_len2;
            unsigned long int number;
  
-          name2 = parse_DATA_LIST_var (lexer, dict);
+          name2 = parse_DATA_LIST_var (lexer, dict, classes);
            if (!name2)
              goto exit;
            int end_ofs = lex_ofs (lexer) - 1;
diff --git a/src/language/lexer/variable-parser.h b/src/language/lexer/variable-parser.h

index 8fd4824671e8062aa1e33fea6b3453bf79cea6e9..abc92825aa418da791613873a61f5757cbd094fc 100644 (file)
--- a/src/language/lexer/variable-parser.h
+++ b/src/language/lexer/variable-parser.h
@@ -19,6 +19,7 @@
  
  #include <stdbool.h>
  #include <stddef.h>
+#include "data/dict-class.h"
  
  struct pool;
  struct dictionary;
@@ -60,7 +61,8 @@ bool parse_variables_pool (struct lexer *, struct pool *, const struct dictionar
  bool parse_var_set_vars (struct lexer *, const struct var_set *, struct variable ***, size_t *,
                          int opts);
  
-char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *);
+char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *,
+                           enum dict_class);
  bool parse_DATA_LIST_vars (struct lexer *, const struct dictionary *,
                             char ***names, size_t *n, int opts);
  bool parse_DATA_LIST_vars_pool (struct lexer *, const struct dictionary *,
diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c

index 32a5cc13e26fa40d4c477023984c23d109141c77..dbad71aa16b7e463c64d153c7b63749e8a62ef42 100644 (file)
--- a/src/ui/gui/psppire-dict.c
+++ b/src/ui/gui/psppire-dict.c
@@ -471,7 +471,7 @@ psppire_dict_set_name (PsppireDict* d, gint idx, const gchar *name)
    g_assert (d);
    g_assert (PSPPIRE_IS_DICT (d));
  
-  if (! dict_id_is_valid (d->dict, name))
+  if (! dict_id_is_valid (d->dict, name, DC_ORDINARY))
      return FALSE;
  
    if (idx < dict_get_n_vars (d->dict))
@@ -561,7 +561,7 @@ gboolean
  psppire_dict_check_name (const PsppireDict *dict,
                          const gchar *name)
  {
-  return (dict_id_is_valid (dict->dict, name)
+  return (dict_id_is_valid (dict->dict, name, DC_ORDINARY)
            && !psppire_dict_lookup_var (dict, name));
  }
  
@@ -879,7 +879,7 @@ gboolean
  psppire_dict_rename_var (PsppireDict *dict, struct variable *v,
                          const gchar *name)
  {
-  if (! dict_id_is_valid (dict->dict, name))
+  if (! dict_id_is_valid (dict->dict, name, DC_ORDINARY))
      return FALSE;
  
    /* Make sure no other variable has this name */
diff --git a/src/ui/gui/psppire-import-textfile.c b/src/ui/gui/psppire-import-textfile.c

index 4b5c0963c8da67fc9f7050b0224ebf4c0f782231..167c04ccea4295ffe23ac8d646b76c2c55bf7380 100644 (file)
--- a/src/ui/gui/psppire-import-textfile.c
+++ b/src/ui/gui/psppire-import-textfile.c
@@ -427,7 +427,6 @@ static void
  choose_column_names (PsppireImportAssistant *ia)
  {
    int i;
-  unsigned long int generated_name_count = 0;
    char *encoding = NULL;
    g_object_get (ia->text_file, "encoding", &encoding, NULL);
    if (ia->dict)
@@ -446,12 +445,7 @@ choose_column_names (PsppireImportAssistant *ia)
           candidate_name = psppire_delimited_text_get_header_title (PSPPIRE_DELIMITED_TEXT (ia->delimiters_model), i);
         }
  
-      char *name = dict_make_unique_var_name (ia->dict,
-                                             candidate_name,
-                                             &generated_name_count);
-
-      dict_create_var_assert (ia->dict, name, 0);
-      free (name);
+      dict_create_var_with_unique_name (ia->dict, candidate_name, 0);
      }
  }
  
diff --git a/tests/data/dictionary.at b/tests/data/dictionary.at

index e7e589e88163b0f115635e943de7d531232030a8..aec431b4d4cb67e43d4e88f6d22712d769cd1086 100644 (file)
--- a/tests/data/dictionary.at
+++ b/tests/data/dictionary.at
@@ -18,6 +18,7 @@ AT_BANNER([dictionary])
  
  AT_SETUP([dictionary case-insensitivity])
  AT_DATA([dictionary.sps], [dnl
+SET LOCALE='UTF-8'.
  DATA LIST LIST /aèiöu aeiou.
  BEGIN DATA
  1 2
@@ -42,8 +43,8 @@ Table: Data List
  AÈIÖU,aeiou
  1.00,2.00
  
-"dictionary.sps:8.17-8.29: error: RENAME VARIABLES: Renaming would duplicate variable name aèiöu.
-    8 | RENAME VARIABLE (aeiou=aèiöu).
+"dictionary.sps:9.17-9.29: error: RENAME VARIABLES: Renaming would duplicate variable name aèiöu.
+    9 | RENAME VARIABLE (aeiou=aèiöu).
        |                 ^~~~~~~~~~~~~"
  ])
  
diff --git a/tests/data/pc+-file-reader.at b/tests/data/pc+-file-reader.at

index 115b2f953f26f1c8ebb72c9e6c67d74d6b37e826..0fc02c169d1dbce76650adc676245b6cb12440f9 100644 (file)
--- a/tests/data/pc+-file-reader.at
+++ b/tests/data/pc+-file-reader.at
@@ -1049,11 +1049,11 @@ AT_DATA([pc+-file.sps], [dnl
  GET FILE='pc+-file.sav' ENCODING='us-ascii'.
  ])
  AT_CHECK([pspp -O format=csv pc+-file.sps], [0], [dnl
-warning: `pc+-file.sav' near offset 0x230: Renaming variable with duplicate name `NUM1' to `VAR001'.
+warning: `pc+-file.sav' near offset 0x230: Renaming variable with duplicate name `NUM1' to `NUM1_A'.
  
-warning: `pc+-file.sav' near offset 0x250: Renaming variable with duplicate name `NUM1' to `VAR002'.
+warning: `pc+-file.sav' near offset 0x250: Renaming variable with duplicate name `NUM1' to `NUM1_B'.
  
-warning: `pc+-file.sav' near offset 0x270: Renaming variable with duplicate name `NUM1' to `VAR003'.
+warning: `pc+-file.sav' near offset 0x270: Renaming variable with duplicate name `NUM1' to `NUM1_C'.
  ])
  AT_CLEANUP
  
diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at

index 4057109e1b203eb813b0d8f39b8c6e671fd02a26..7771c27155ad5bdde645f999b742f911039ebffd 100644 (file)
--- a/tests/data/sys-file-reader.at
+++ b/tests/data/sys-file-reader.at
@@ -1748,7 +1748,7 @@ for variant in be le; do
    AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
  ])
    AT_CHECK([pspp -O format=csv sys-file.sps], 0,
-   [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid name `$UM1' to `VAR001'.
+   [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid or duplicate name `$UM1' to `UM1'.
  ])
  done
  AT_CLEANUP
@@ -1774,7 +1774,7 @@ for variant in be le; do
    AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
  ])
    AT_CHECK([pspp -O format=csv sys-file.sps], 0,
-   [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid name `TO' to `VAR001'.
+   [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid or duplicate name `TO' to `TO_A'.
  ])
  done
  AT_CLEANUP
@@ -1830,12 +1830,12 @@ for variant in be le; do
  DISPLAY DICTIONARY.
  ])
    AT_CHECK([pspp -O format=csv sys-file.sps], [0],
-   [warning: `sys-file.sav' near offset 0xd4: Renaming variable with duplicate name `VAR1' to `VAR001'.
+   [warning: `sys-file.sav' near offset 0xd4: Renaming variable with invalid or duplicate name `VAR1' to `VAR1_A'.
  
  Table: Variables
  Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format
  var1,1,Unknown,Input,8,Right,F8.0,F8.0
-var001,2,Unknown,Input,8,Right,F8.0,F8.0
+var1_a,2,Unknown,Input,8,Right,F8.0,F8.0
  ])
  done
  AT_CLEANUP
diff --git a/tests/language/commands/mrsets.at b/tests/language/commands/mrsets.at

index c805b64455f8ff99d8f2697ad2a5f4659550efc8..58ddd1e77100901b3ec8db1cc8c88a845fae36ee 100644 (file)
--- a/tests/language/commands/mrsets.at
+++ b/tests/language/commands/mrsets.at
@@ -278,7 +278,7 @@ AT_CHECK([pspp -O format=csv mrsets.sps], [1], [dnl
     20 | MRSETS /MDGROUP NAME=**.
        |                      ^~"
  
-"mrsets.sps:21.22: error: MRSETS: x is not a valid name for a multiple response set.  Multiple response set names must begin with `$'.
+"mrsets.sps:21.22: error: MRSETS: `x' is not valid here because this identifier must start with `$'.
     21 | MRSETS /MDGROUP NAME=x.
        |                      ^"
author	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sat, 29 Apr 2023 21:36:56 +0000 (14:36 -0700)
src/data/dictionary.c		patch \| blob \| history
src/data/dictionary.h		patch \| blob \| history
src/data/gnumeric-reader.c		patch \| blob \| history
src/data/identifier.h		patch \| blob \| history
src/data/identifier2.c		patch \| blob \| history
src/data/mrset.c		patch \| blob \| history
src/data/ods-reader.c		patch \| blob \| history
src/data/pc+-file-reader.c		patch \| blob \| history
src/data/por-file-reader.c		patch \| blob \| history
src/data/psql-reader.c		patch \| blob \| history
src/data/sys-file-reader.c		patch \| blob \| history
src/language/commands/attributes.c		patch \| blob \| history
src/language/commands/get-data.c		patch \| blob \| history
src/language/commands/matrix.c		patch \| blob \| history
src/language/commands/vector.c		patch \| blob \| history
src/language/lexer/variable-parser.c		patch \| blob \| history
src/language/lexer/variable-parser.h		patch \| blob \| history
src/ui/gui/psppire-dict.c		patch \| blob \| history
src/ui/gui/psppire-import-textfile.c		patch \| blob \| history
tests/data/dictionary.at		patch \| blob \| history
tests/data/pc+-file-reader.at		patch \| blob \| history
tests/data/sys-file-reader.at		patch \| blob \| history
tests/language/commands/mrsets.at		patch \| blob \| history