SAVE TRANSLATE: Allow variable names with space, etc. in output.

author Ben Pfaff <blp@cs.stanford.edu>

Wed, 21 Feb 2018 19:40:26 +0000 (11:40 -0800)

committer Ben Pfaff <blp@cs.stanford.edu>

Wed, 21 Feb 2018 19:40:30 +0000 (11:40 -0800)
author Ben Pfaff <blp@cs.stanford.edu>
Wed, 21 Feb 2018 19:40:26 +0000 (11:40 -0800)
committer Ben Pfaff <blp@cs.stanford.edu>
Wed, 21 Feb 2018 19:40:30 +0000 (11:40 -0800)
diff --git a/doc/files.texi b/doc/files.texi

index f0b56aaf22dd49d61389efd62c4a4dae8071dc37..ede6e37d78f050ab3858db482348a44751f97e55 100644 (file)
--- a/doc/files.texi
+++ b/doc/files.texi
@@ -814,13 +814,15 @@ values as if they were not missing.  Specify MISSING=RECODE to output
  numeric user-missing values like system-missing values and string
  user-missing values as all spaces.
  
-By default, all the variables in the active dataset dictionary are saved
-to the system file, but @subcmd{DROP} or @subcmd{KEEP} can select a subset of variable
-to save.  The @subcmd{RENAME} subcommand can also be used to change the names
-under which variables are saved.  @subcmd{UNSELECTED} determines whether cases
-filtered out by the @cmd{FILTER} command are written to the output file.
-These subcommands have the same syntax and meaning as on the
-@cmd{SAVE} command (@pxref{SAVE}).
+By default, all the variables in the active dataset dictionary are
+saved to the system file, but @subcmd{DROP} or @subcmd{KEEP} can
+select a subset of variable to save.  The @subcmd{RENAME} subcommand
+can also be used to change the names under which variables are saved;
+because they are used only in the output, these names do not have to
+conform to the usual PSPP variable naming rules.  @subcmd{UNSELECTED}
+determines whether cases filtered out by the @cmd{FILTER} command are
+written to the output file.  These subcommands have the same syntax
+and meaning as on the @cmd{SAVE} command (@pxref{SAVE}).
  
  Each supported file type has additional subcommands, explained in
  separate sections below.
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index 0b930d579c3defc7ec19dc0b81de6bb10a07b3d9..ff5f8ec027f16cbe3907ac5ab47acecb1e338425 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -74,6 +74,11 @@ struct dictionary
      struct mrset **mrsets;      /* Multiple response sets. */
      size_t n_mrsets;            /* Number of multiple response sets. */
  
+    /* Whether variable names must be valid identifiers.  Normally, this is
+       true, but sometimes a dictionary is prepared for external use
+       (e.g. output to a CSV file) where names don't have to be valid. */
+    bool names_must_be_ids;
+
      char *encoding;             /* Character encoding of string data */
  
      const struct dict_callbacks *callbacks; /* Callbacks on dictionary
@@ -102,7 +107,8 @@ bool
  dict_id_is_valid (const struct dictionary *dict, const char *id,
                    bool issue_error)
  {
-  return id_is_valid (id, dict->encoding, issue_error);
+  return (!dict->names_must_be_ids
+          || id_is_valid (id, dict->encoding, issue_error));
  }
  
  void
@@ -170,6 +176,7 @@ dict_create (const char *encoding)
    struct dictionary *d = xzalloc (sizeof *d);
  
    d->encoding = xstrdup (encoding);
+  d->names_must_be_ids = true;
    hmap_init (&d->name_map);
    attrset_init (&d->attributes);
  
@@ -193,6 +200,7 @@ dict_clone (const struct dictionary *s)
    size_t i;
  
    d = dict_create (s->encoding);
+  dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s));
  
    for (i = 0; i < s->var_cnt; i++)
      {
@@ -994,6 +1002,27 @@ dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
    return make_numeric_name (dict, num_start);
  }
  
+/* Returns whether variable names must be valid identifiers.  Normally, this is
+   true, but sometimes a dictionary is prepared for external use (e.g. output
+   to a CSV file) where names don't have to be valid. */
+bool
+dict_get_names_must_be_ids (const struct dictionary *d)
+{
+  return d->names_must_be_ids;
+}
+
+/* Sets whether variable names must be valid identifiers.  Normally, this is
+   true, but sometimes a dictionary is prepared for external use (e.g. output
+   to a CSV file) where names don't have to be valid.
+
+   Changing this setting from false to true doesn't make the dictionary check
+   all the existing variable names, so it can cause an invariant violation. */
+void
+dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids)
+{
+  d->names_must_be_ids = names_must_be_ids;
+}
+
  /* Returns the weighting variable in dictionary D, or a null
     pointer if the dictionary is unweighted. */
  struct variable *
diff --git a/src/data/dictionary.h b/src/data/dictionary.h

index 1166fcd280b04ec05dda3ac7dc7033d41133f666..a4be0fd44898f01b4ac4ff76fea663ce51942b6c 100644 (file)
--- a/src/data/dictionary.h
+++ b/src/data/dictionary.h
@@ -91,6 +91,9 @@ bool dict_rename_vars (struct dictionary *,
  char *dict_make_unique_var_name (const struct dictionary *, const char *hint,
                                   unsigned long int *num_start);
  
+bool dict_get_names_must_be_ids (const struct dictionary *);
+void dict_set_names_must_be_ids (struct dictionary *, bool);
+
  /* Weight variable. */
  double dict_get_case_weight (const struct dictionary *,
                              const struct ccase *, bool *);
diff --git a/src/data/variable.c b/src/data/variable.c

index 656a06204f1dc077b73987c4f2e18395209edfbe..44f8a4fe40d3b697869097fa461444076f1045a4 100644 (file)
--- a/src/data/variable.c
+++ b/src/data/variable.c
@@ -186,7 +186,6 @@ static void
  var_set_name_quiet (struct variable *v, const char *name)
  {
    assert (!var_has_vardict (v));
-  assert (id_is_plausible (name, false));
  
    free (v->name);
    v->name = xstrdup (name);
@@ -1132,8 +1131,6 @@ var_set_short_name (struct variable *var, size_t idx, const char *short_name)
  {
    struct variable *ov = var_clone (var);
  
-  assert (short_name == NULL || id_is_plausible (short_name, false));
-
    /* Clear old short name numbered IDX, if any. */
    if (idx < var->short_name_cnt)
      {
diff --git a/src/language/data-io/save-translate.c b/src/language/data-io/save-translate.c

index 26941329f35ad378426ad441654031e9ab586791..e68b4133bb6419f466c3aadc9745e86868b52453 100644 (file)
--- a/src/language/data-io/save-translate.c
+++ b/src/language/data-io/save-translate.c
@@ -68,6 +68,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds)
    type = 0;
  
    dict = dict_clone (dataset_dict (ds));
+  dict_set_names_must_be_ids (dict, false);
    stage = NULL;
    map = NULL;
  
diff --git a/src/language/data-io/trim.c b/src/language/data-io/trim.c

index 947a7524a84eaa9d00773d9ff8af440b49910351..68a2b5ed9c1ef28c8963cccfcdac0b364bf1b04b 100644 (file)
--- a/src/language/data-io/trim.c
+++ b/src/language/data-io/trim.c
@@ -75,28 +75,27 @@ parse_dict_rename (struct lexer *lexer, struct dictionary *dict)
    lex_match (lexer, T_EQUALS);
    if (lex_token (lexer) != T_LPAREN)
      {
-      struct variable *v;
-
-      v = parse_variable (lexer, dict);
+      struct variable *v = parse_variable (lexer, dict);
        if (v == NULL)
         return 0;
-      if (!lex_force_match (lexer, T_EQUALS)
-         || !lex_force_id (lexer)
-          || !dict_id_is_valid (dict, lex_tokcstr (lexer), true))
-       return 0;
-      if (dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL)
+      if (!lex_force_match (lexer, T_EQUALS))
+        return 0;
+
+      char *new_name = parse_DATA_LIST_var (lexer, dict);
+      if (dict_lookup_var (dict, new_name) != NULL)
         {
           msg (SE, _("Cannot rename %s as %s because there already exists "
                      "a variable named %s.  To rename variables with "
                      "overlapping names, use a single RENAME subcommand "
                      "such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, "
                      "`/RENAME (A B C=B C A)'."),
-               var_get_name (v), lex_tokcstr (lexer), lex_tokcstr (lexer));
+               var_get_name (v), new_name, new_name);
+          free (new_name);
           return 0;
         }
  
-      dict_rename_var (dict, v, lex_tokcstr (lexer));
-      lex_get (lexer);
+      dict_rename_var (dict, v, new_name);
+      free (new_name);
        return 1;
      }
  
diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c

index b91c038ac9080f6404cddbfe08344c184394fd57..514ce160f41b2e7fc751ddb508fc89d05f26e9b4 100644 (file)
--- a/src/language/lexer/variable-parser.c
+++ b/src/language/lexer/variable-parser.c
@@ -46,26 +46,42 @@
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
  
-static struct variable * var_set_get_var (const struct var_set *, size_t );
-
+static struct variable *var_set_get_var (const struct var_set *, size_t);
  static struct variable *var_set_lookup_var (const struct var_set *,
                                             const char *);
-
  static bool var_set_lookup_var_idx (const struct var_set *, const char *,
                                     size_t *);
+static bool var_set_get_names_must_be_ids (const struct var_set *);
  
+static bool
+is_name_token (const struct lexer *lexer, bool names_must_be_ids)
+{
+  return (lex_token (lexer) == T_ID
+          || (!names_must_be_ids && lex_token (lexer) == T_STRING));
+}
  
+static bool
+is_vs_name_token (const struct lexer *lexer, const struct var_set *vs)
+{
+  return is_name_token (lexer, var_set_get_names_must_be_ids (vs));
+}
+
+static bool
+is_dict_name_token (const struct lexer *lexer, const struct dictionary *d)
+{
+  return is_name_token (lexer, dict_get_names_must_be_ids (d));
+}
  
  /* Parses a name as a variable within VS.  Sets *IDX to the
     variable's index and returns true if successful.  On failure
     emits an error message and returns false. */
  static bool
  parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
-               size_t *idx)
+                       size_t *idx)
  {
    assert (idx != NULL);
  
-  if (lex_token (lexer) != T_ID)
+  if (!is_vs_name_token (lexer, vs))
      {
        lex_error (lexer, _("expecting variable name"));
        return false;
@@ -344,7 +360,8 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
        lex_match (lexer, T_COMMA);
      }
    while (lex_token (lexer) == T_ALL
-         || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL));
+         || (is_vs_name_token (lexer, vs)
+             && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL));
  
    if (*nv == 0)
      goto fail;
@@ -360,6 +377,22 @@ fail:
    return 0;
  }
  
+char *
+parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d)
+{
+  if (!is_dict_name_token (lexer, d))
+    {
+      lex_error (lexer, "expecting variable name");
+      return NULL;
+    }
+  if (!dict_id_is_valid (d, lex_tokcstr (lexer), true))
+    return NULL;
+
+  char *name = xstrdup (lex_tokcstr (lexer));
+  lex_get (lexer);
+  return name;
+}
+
  /* Attempts to break UTF-8 encoded NAME into a root (whose contents are
     arbitrary except that it does not end in a digit) followed by an integer
     numeric suffix.  On success, stores the value of the suffix into *NUMBERP,
@@ -456,37 +489,22 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict,
  
    do
      {
-      if (lex_token (lexer) != T_ID
-          || !dict_id_is_valid (dict, lex_tokcstr (lexer), true))
-       {
-         lex_error (lexer, "expecting variable name");
-         goto exit;
-       }
-      if (dict_class_from_id (lex_tokcstr (lexer)) == DC_SCRATCH
-          && (pv_opts & PV_NO_SCRATCH))
+      name1 = parse_DATA_LIST_var (lexer, dict);
+      if (!name1)
+        goto exit;
+      if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH)
         {
           msg (SE, _("Scratch variables not allowed here."));
           goto exit;
         }
-      name1 = xstrdup (lex_tokcstr (lexer));
-      lex_get (lexer);
-      if (lex_token (lexer) == T_TO)
+      if (lex_match (lexer, T_TO))
         {
-         char *name2 = NULL;
           unsigned long int num1, num2;
            int n_digits1, n_digits2;
            int root_len1, root_len2;
            unsigned long int number;
  
-         lex_get (lexer);
-         if (lex_token (lexer) != T_ID
-              || !dict_id_is_valid (dict, lex_tokcstr (lexer), true))
-           {
-             lex_error (lexer, "expecting variable name");
-             goto exit;
-           }
-          name2 = xstrdup (lex_tokcstr (lexer));
-         lex_get (lexer);
+          char *name2 = parse_DATA_LIST_var (lexer, dict);
  
            root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1);
            if (root_len1 == 0)
@@ -613,7 +631,7 @@ parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
        *names = NULL;
        *nnames = 0;
      }
-  while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
+  while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL)
      {
        if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL)
         {
@@ -670,6 +688,7 @@ parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struc
  /* A set of variables. */
  struct var_set
    {
+    bool names_must_be_ids;
      size_t (*get_cnt) (const struct var_set *);
      struct variable *(*get_var) (const struct var_set *, size_t idx);
      bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
@@ -727,6 +746,12 @@ var_set_destroy (struct var_set *vs)
    if (vs != NULL)
      vs->destroy (vs);
  }
+
+static bool
+var_set_get_names_must_be_ids (const struct var_set *vs)
+{
+  return vs->names_must_be_ids;
+}
  \f
  /* Returns the number of variables in VS. */
  static size_t
@@ -776,6 +801,7 @@ struct var_set *
  var_set_create_from_dict (const struct dictionary *d)
  {
    struct var_set *vs = xmalloc (sizeof *vs);
+  vs->names_must_be_ids = dict_get_names_must_be_ids (d);
    vs->get_cnt = dict_var_set_get_cnt;
    vs->get_var = dict_var_set_get_var;
    vs->lookup_var_idx = dict_var_set_lookup_var_idx;
@@ -852,6 +878,7 @@ var_set_create_from_array (struct variable *const *var, size_t var_cnt)
    size_t i;
  
    vs = xmalloc (sizeof *vs);
+  vs->names_must_be_ids = true;
    vs->get_cnt = array_var_set_get_cnt;
    vs->get_var = array_var_set_get_var;
    vs->lookup_var_idx = array_var_set_lookup_var_idx;
diff --git a/src/language/lexer/variable-parser.h b/src/language/lexer/variable-parser.h

index 57b4e470632df11499850eb39652e7ab6dc51677..06704d3f4d907db2ef6980740a810acd64cc7561 100644 (file)
--- a/src/language/lexer/variable-parser.h
+++ b/src/language/lexer/variable-parser.h
@@ -59,6 +59,8 @@ bool parse_variables_pool (struct lexer *, struct pool *, const struct dictionar
                            struct variable ***, size_t *, int opts);
  bool parse_var_set_vars (struct lexer *, const struct var_set *, struct variable ***, size_t *,
                          int opts);
+
+char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *);
  bool parse_DATA_LIST_vars (struct lexer *, const struct dictionary *,
                             char ***names, size_t *cnt, int opts);
  bool parse_DATA_LIST_vars_pool (struct lexer *, const struct dictionary *,
diff --git a/tests/language/data-io/save-translate.at b/tests/language/data-io/save-translate.at

index aca5b0ed2a82e382312e8e72475d5cdd496980d9..a81d074b7605454777716dcc88ffd6a4c87a6bd1 100644 (file)
--- a/tests/language/data-io/save-translate.at
+++ b/tests/language/data-io/save-translate.at
@@ -87,9 +87,9 @@ AT_CLEANUP
  
  AT_SETUP([CSV output -- KEEP, RENAME])
  PREPARE_SAVE_TRANSLATE_CSV(
-  [/FIELDNAMES /KEEP=time string /RENAME string=name /UNSELECTED=DELETE])
+  [/FIELDNAMES /KEEP=time string /RENAME string='long name with spaces' /UNSELECTED=DELETE])
  AT_CHECK([cat data.csv], [0], [dnl
-time,name
+time,long name with spaces
  -05:17:00, xxx
  12:00:00,xyzzy
  ])
author	Ben Pfaff <blp@cs.stanford.edu>
	Wed, 21 Feb 2018 19:40:26 +0000 (11:40 -0800)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Wed, 21 Feb 2018 19:40:30 +0000 (11:40 -0800)
doc/files.texi		patch \| blob \| history
src/data/dictionary.c		patch \| blob \| history
src/data/dictionary.h		patch \| blob \| history
src/data/variable.c		patch \| blob \| history
src/language/data-io/save-translate.c		patch \| blob \| history
src/language/data-io/trim.c		patch \| blob \| history
src/language/lexer/variable-parser.c		patch \| blob \| history
src/language/lexer/variable-parser.h		patch \| blob \| history
tests/language/data-io/save-translate.at		patch \| blob \| history