From e2f99612bf4f4691623f16730eed3e55afdc54f0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 21 Feb 2018 11:40:26 -0800 Subject: [PATCH] SAVE TRANSLATE: Allow variable names with space, etc. in output. The SPSS manuals actually document this to work and I've never noticed before. This fixes the feature omission. Reported by Elio Spinello . --- doc/files.texi | 16 +++-- src/data/dictionary.c | 31 ++++++++- src/data/dictionary.h | 3 + src/data/variable.c | 3 - src/language/data-io/save-translate.c | 1 + src/language/data-io/trim.c | 21 +++--- src/language/lexer/variable-parser.c | 83 ++++++++++++++++-------- src/language/lexer/variable-parser.h | 2 + tests/language/data-io/save-translate.at | 4 +- 9 files changed, 112 insertions(+), 52 deletions(-) diff --git a/doc/files.texi b/doc/files.texi index f0b56aaf22..ede6e37d78 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -814,13 +814,15 @@ values as if they were not missing. Specify MISSING=RECODE to output numeric user-missing values like system-missing values and string user-missing values as all spaces. -By default, all the variables in the active dataset dictionary are saved -to the system file, but @subcmd{DROP} or @subcmd{KEEP} can select a subset of variable -to save. The @subcmd{RENAME} subcommand can also be used to change the names -under which variables are saved. @subcmd{UNSELECTED} determines whether cases -filtered out by the @cmd{FILTER} command are written to the output file. -These subcommands have the same syntax and meaning as on the -@cmd{SAVE} command (@pxref{SAVE}). +By default, all the variables in the active dataset dictionary are +saved to the system file, but @subcmd{DROP} or @subcmd{KEEP} can +select a subset of variable to save. The @subcmd{RENAME} subcommand +can also be used to change the names under which variables are saved; +because they are used only in the output, these names do not have to +conform to the usual PSPP variable naming rules. @subcmd{UNSELECTED} +determines whether cases filtered out by the @cmd{FILTER} command are +written to the output file. These subcommands have the same syntax +and meaning as on the @cmd{SAVE} command (@pxref{SAVE}). Each supported file type has additional subcommands, explained in separate sections below. diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 0b930d579c..ff5f8ec027 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -74,6 +74,11 @@ struct dictionary struct mrset **mrsets; /* Multiple response sets. */ size_t n_mrsets; /* Number of multiple response sets. */ + /* Whether variable names must be valid identifiers. Normally, this is + true, but sometimes a dictionary is prepared for external use + (e.g. output to a CSV file) where names don't have to be valid. */ + bool names_must_be_ids; + char *encoding; /* Character encoding of string data */ const struct dict_callbacks *callbacks; /* Callbacks on dictionary @@ -102,7 +107,8 @@ bool dict_id_is_valid (const struct dictionary *dict, const char *id, bool issue_error) { - return id_is_valid (id, dict->encoding, issue_error); + return (!dict->names_must_be_ids + || id_is_valid (id, dict->encoding, issue_error)); } void @@ -170,6 +176,7 @@ dict_create (const char *encoding) struct dictionary *d = xzalloc (sizeof *d); d->encoding = xstrdup (encoding); + d->names_must_be_ids = true; hmap_init (&d->name_map); attrset_init (&d->attributes); @@ -193,6 +200,7 @@ dict_clone (const struct dictionary *s) size_t i; d = dict_create (s->encoding); + dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s)); for (i = 0; i < s->var_cnt; i++) { @@ -994,6 +1002,27 @@ dict_make_unique_var_name (const struct dictionary *dict, const char *hint, return make_numeric_name (dict, num_start); } +/* Returns whether variable names must be valid identifiers. Normally, this is + true, but sometimes a dictionary is prepared for external use (e.g. output + to a CSV file) where names don't have to be valid. */ +bool +dict_get_names_must_be_ids (const struct dictionary *d) +{ + return d->names_must_be_ids; +} + +/* Sets whether variable names must be valid identifiers. Normally, this is + true, but sometimes a dictionary is prepared for external use (e.g. output + to a CSV file) where names don't have to be valid. + + Changing this setting from false to true doesn't make the dictionary check + all the existing variable names, so it can cause an invariant violation. */ +void +dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids) +{ + d->names_must_be_ids = names_must_be_ids; +} + /* Returns the weighting variable in dictionary D, or a null pointer if the dictionary is unweighted. */ struct variable * diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 1166fcd280..a4be0fd448 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -91,6 +91,9 @@ bool dict_rename_vars (struct dictionary *, char *dict_make_unique_var_name (const struct dictionary *, const char *hint, unsigned long int *num_start); +bool dict_get_names_must_be_ids (const struct dictionary *); +void dict_set_names_must_be_ids (struct dictionary *, bool); + /* Weight variable. */ double dict_get_case_weight (const struct dictionary *, const struct ccase *, bool *); diff --git a/src/data/variable.c b/src/data/variable.c index 656a06204f..44f8a4fe40 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -186,7 +186,6 @@ static void var_set_name_quiet (struct variable *v, const char *name) { assert (!var_has_vardict (v)); - assert (id_is_plausible (name, false)); free (v->name); v->name = xstrdup (name); @@ -1132,8 +1131,6 @@ var_set_short_name (struct variable *var, size_t idx, const char *short_name) { struct variable *ov = var_clone (var); - assert (short_name == NULL || id_is_plausible (short_name, false)); - /* Clear old short name numbered IDX, if any. */ if (idx < var->short_name_cnt) { diff --git a/src/language/data-io/save-translate.c b/src/language/data-io/save-translate.c index 26941329f3..e68b4133bb 100644 --- a/src/language/data-io/save-translate.c +++ b/src/language/data-io/save-translate.c @@ -68,6 +68,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) type = 0; dict = dict_clone (dataset_dict (ds)); + dict_set_names_must_be_ids (dict, false); stage = NULL; map = NULL; diff --git a/src/language/data-io/trim.c b/src/language/data-io/trim.c index 947a7524a8..68a2b5ed9c 100644 --- a/src/language/data-io/trim.c +++ b/src/language/data-io/trim.c @@ -75,28 +75,27 @@ parse_dict_rename (struct lexer *lexer, struct dictionary *dict) lex_match (lexer, T_EQUALS); if (lex_token (lexer) != T_LPAREN) { - struct variable *v; - - v = parse_variable (lexer, dict); + struct variable *v = parse_variable (lexer, dict); if (v == NULL) return 0; - if (!lex_force_match (lexer, T_EQUALS) - || !lex_force_id (lexer) - || !dict_id_is_valid (dict, lex_tokcstr (lexer), true)) - return 0; - if (dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL) + if (!lex_force_match (lexer, T_EQUALS)) + return 0; + + char *new_name = parse_DATA_LIST_var (lexer, dict); + if (dict_lookup_var (dict, new_name) != NULL) { msg (SE, _("Cannot rename %s as %s because there already exists " "a variable named %s. To rename variables with " "overlapping names, use a single RENAME subcommand " "such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, " "`/RENAME (A B C=B C A)'."), - var_get_name (v), lex_tokcstr (lexer), lex_tokcstr (lexer)); + var_get_name (v), new_name, new_name); + free (new_name); return 0; } - dict_rename_var (dict, v, lex_tokcstr (lexer)); - lex_get (lexer); + dict_rename_var (dict, v, new_name); + free (new_name); return 1; } diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index b91c038ac9..514ce160f4 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -46,26 +46,42 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -static struct variable * var_set_get_var (const struct var_set *, size_t ); - +static struct variable *var_set_get_var (const struct var_set *, size_t); static struct variable *var_set_lookup_var (const struct var_set *, const char *); - static bool var_set_lookup_var_idx (const struct var_set *, const char *, size_t *); +static bool var_set_get_names_must_be_ids (const struct var_set *); +static bool +is_name_token (const struct lexer *lexer, bool names_must_be_ids) +{ + return (lex_token (lexer) == T_ID + || (!names_must_be_ids && lex_token (lexer) == T_STRING)); +} +static bool +is_vs_name_token (const struct lexer *lexer, const struct var_set *vs) +{ + return is_name_token (lexer, var_set_get_names_must_be_ids (vs)); +} + +static bool +is_dict_name_token (const struct lexer *lexer, const struct dictionary *d) +{ + return is_name_token (lexer, dict_get_names_must_be_ids (d)); +} /* Parses a name as a variable within VS. Sets *IDX to the variable's index and returns true if successful. On failure emits an error message and returns false. */ static bool parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs, - size_t *idx) + size_t *idx) { assert (idx != NULL); - if (lex_token (lexer) != T_ID) + if (!is_vs_name_token (lexer, vs)) { lex_error (lexer, _("expecting variable name")); return false; @@ -344,7 +360,8 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, lex_match (lexer, T_COMMA); } while (lex_token (lexer) == T_ALL - || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL)); + || (is_vs_name_token (lexer, vs) + && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL)); if (*nv == 0) goto fail; @@ -360,6 +377,22 @@ fail: return 0; } +char * +parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d) +{ + if (!is_dict_name_token (lexer, d)) + { + lex_error (lexer, "expecting variable name"); + return NULL; + } + if (!dict_id_is_valid (d, lex_tokcstr (lexer), true)) + return NULL; + + char *name = xstrdup (lex_tokcstr (lexer)); + lex_get (lexer); + return name; +} + /* Attempts to break UTF-8 encoded NAME into a root (whose contents are arbitrary except that it does not end in a digit) followed by an integer numeric suffix. On success, stores the value of the suffix into *NUMBERP, @@ -456,37 +489,22 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, do { - if (lex_token (lexer) != T_ID - || !dict_id_is_valid (dict, lex_tokcstr (lexer), true)) - { - lex_error (lexer, "expecting variable name"); - goto exit; - } - if (dict_class_from_id (lex_tokcstr (lexer)) == DC_SCRATCH - && (pv_opts & PV_NO_SCRATCH)) + name1 = parse_DATA_LIST_var (lexer, dict); + if (!name1) + goto exit; + if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH) { msg (SE, _("Scratch variables not allowed here.")); goto exit; } - name1 = xstrdup (lex_tokcstr (lexer)); - lex_get (lexer); - if (lex_token (lexer) == T_TO) + if (lex_match (lexer, T_TO)) { - char *name2 = NULL; unsigned long int num1, num2; int n_digits1, n_digits2; int root_len1, root_len2; unsigned long int number; - lex_get (lexer); - if (lex_token (lexer) != T_ID - || !dict_id_is_valid (dict, lex_tokcstr (lexer), true)) - { - lex_error (lexer, "expecting variable name"); - goto exit; - } - name2 = xstrdup (lex_tokcstr (lexer)); - lex_get (lexer); + char *name2 = parse_DATA_LIST_var (lexer, dict); root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1); if (root_len1 == 0) @@ -613,7 +631,7 @@ parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict, *names = NULL; *nnames = 0; } - while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL) + while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL) { if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL) { @@ -670,6 +688,7 @@ parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struc /* A set of variables. */ struct var_set { + bool names_must_be_ids; size_t (*get_cnt) (const struct var_set *); struct variable *(*get_var) (const struct var_set *, size_t idx); bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *); @@ -727,6 +746,12 @@ var_set_destroy (struct var_set *vs) if (vs != NULL) vs->destroy (vs); } + +static bool +var_set_get_names_must_be_ids (const struct var_set *vs) +{ + return vs->names_must_be_ids; +} /* Returns the number of variables in VS. */ static size_t @@ -776,6 +801,7 @@ struct var_set * var_set_create_from_dict (const struct dictionary *d) { struct var_set *vs = xmalloc (sizeof *vs); + vs->names_must_be_ids = dict_get_names_must_be_ids (d); vs->get_cnt = dict_var_set_get_cnt; vs->get_var = dict_var_set_get_var; vs->lookup_var_idx = dict_var_set_lookup_var_idx; @@ -852,6 +878,7 @@ var_set_create_from_array (struct variable *const *var, size_t var_cnt) size_t i; vs = xmalloc (sizeof *vs); + vs->names_must_be_ids = true; vs->get_cnt = array_var_set_get_cnt; vs->get_var = array_var_set_get_var; vs->lookup_var_idx = array_var_set_lookup_var_idx; diff --git a/src/language/lexer/variable-parser.h b/src/language/lexer/variable-parser.h index 57b4e47063..06704d3f4d 100644 --- a/src/language/lexer/variable-parser.h +++ b/src/language/lexer/variable-parser.h @@ -59,6 +59,8 @@ bool parse_variables_pool (struct lexer *, struct pool *, const struct dictionar struct variable ***, size_t *, int opts); bool parse_var_set_vars (struct lexer *, const struct var_set *, struct variable ***, size_t *, int opts); + +char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *); bool parse_DATA_LIST_vars (struct lexer *, const struct dictionary *, char ***names, size_t *cnt, int opts); bool parse_DATA_LIST_vars_pool (struct lexer *, const struct dictionary *, diff --git a/tests/language/data-io/save-translate.at b/tests/language/data-io/save-translate.at index aca5b0ed2a..a81d074b76 100644 --- a/tests/language/data-io/save-translate.at +++ b/tests/language/data-io/save-translate.at @@ -87,9 +87,9 @@ AT_CLEANUP AT_SETUP([CSV output -- KEEP, RENAME]) PREPARE_SAVE_TRANSLATE_CSV( - [/FIELDNAMES /KEEP=time string /RENAME string=name /UNSELECTED=DELETE]) + [/FIELDNAMES /KEEP=time string /RENAME string='long name with spaces' /UNSELECTED=DELETE]) AT_CHECK([cat data.csv], [0], [dnl -time,name +time,long name with spaces -05:17:00, xxx 12:00:00,xyzzy ]) -- 2.30.2