From 53d339111a9f51561cfccc65764874cdf54e501a Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sun, 18 Aug 2019 14:26:32 +0200 Subject: [PATCH] /RENAME subcommand: Allow quoted strings in destination variables. Allow syntax like /RENAME = old = "new var" old2 = "another new var". Fixes bug #56771 --- NEWS | 3 + src/language/data-io/combine-files.c | 2 +- src/language/data-io/get.c | 2 +- src/language/data-io/save-translate.c | 2 +- src/language/data-io/save.c | 2 +- src/language/data-io/trim.c | 151 ++++++++++++----------- src/language/data-io/trim.h | 4 +- tests/language/data-io/save-translate.at | 53 +++++++- 8 files changed, 139 insertions(+), 80 deletions(-) diff --git a/NEWS b/NEWS index a733db7c33..7df1da7248 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,9 @@ Changes from 1.2.0 to 1.3.0: * Bug fix for CVE-2018-20230. + * The /RENAME subcommand in SAVE TRANSLATE et al. has been extended + to support the syntax also accepted by other software. + * The EXAMINE command will now perform the Shapiro-Wilk test when one or more plots are requested. diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index b4eac56a6b..4e6ae4f884 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -238,7 +238,7 @@ combine_files (enum comb_command_type command, while (lex_match (lexer, T_SLASH)) if (lex_match_id (lexer, "RENAME")) { - if (!parse_dict_rename (lexer, file->dict)) + if (!parse_dict_rename (lexer, file->dict, false)) goto error; } else if (lex_match_id (lexer, "IN")) diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 0f66f64f98..00e78c8f00 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -139,7 +139,7 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); - if (!parse_dict_trim (lexer, dict)) + if (!parse_dict_trim (lexer, dict, false)) goto error; } dict_compact_values (dict); diff --git a/src/language/data-io/save-translate.c b/src/language/data-io/save-translate.c index 0da3c8b96f..72b6e3ae72 100644 --- a/src/language/data-io/save-translate.c +++ b/src/language/data-io/save-translate.c @@ -233,7 +233,7 @@ cmd_save_translate (struct lexer *lexer, struct dataset *ds) goto error; } } - else if (!parse_dict_trim (lexer, dict)) + else if (!parse_dict_trim (lexer, dict, true)) goto error; } diff --git a/src/language/data-io/save.c b/src/language/data-io/save.c index efc7e09d23..f43aba7a46 100644 --- a/src/language/data-io/save.c +++ b/src/language/data-io/save.c @@ -295,7 +295,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, porfile_opts.digits = lex_integer (lexer); lex_get (lexer); } - else if (!parse_dict_trim (lexer, dict)) + else if (!parse_dict_trim (lexer, dict, false)) goto error; if (!lex_match (lexer, T_SLASH)) diff --git a/src/language/data-io/trim.c b/src/language/data-io/trim.c index c2697a26ac..657d23aa0b 100644 --- a/src/language/data-io/trim.c +++ b/src/language/data-io/trim.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2010, 2011, + 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,9 +35,11 @@ /* Commands that read and write system files share a great deal of common syntactic structure for rearranging and dropping variables. This function parses this syntax and modifies DICT - appropriately. Returns true on success, false on failure. */ + appropriately. If RELAX is true, then the modified dictionary + need not conform to the usual variable name rules. Returns + true on success, false on failure. */ bool -parse_dict_trim (struct lexer *lexer, struct dictionary *dict) +parse_dict_trim (struct lexer *lexer, struct dictionary *dict, bool relax) { if (lex_match_id (lexer, "MAP")) { @@ -48,7 +51,7 @@ parse_dict_trim (struct lexer *lexer, struct dictionary *dict) else if (lex_match_id (lexer, "KEEP")) return parse_dict_keep (lexer, dict); else if (lex_match_id (lexer, "RENAME")) - return parse_dict_rename (lexer, dict); + return parse_dict_rename (lexer, dict, relax); else { lex_error (lexer, _("expecting a valid subcommand")); @@ -57,93 +60,95 @@ parse_dict_trim (struct lexer *lexer, struct dictionary *dict) } /* Parses and performs the RENAME subcommand of GET, SAVE, and - related commands. */ + related commands. If RELAX is true, then the new variable + names need not conform to the normal dictionary rules. +*/ bool -parse_dict_rename (struct lexer *lexer, struct dictionary *dict) +parse_dict_rename (struct lexer *lexer, struct dictionary *dict, + bool relax) { - size_t i; - - int success = 0; - - struct variable **v; - char **new_names; - size_t nv, nn; - char *err_name; - - int group; - + struct variable **oldvars = NULL; + size_t n_newvars = 0; + int group = 0; + char **newnames = NULL; lex_match (lexer, T_EQUALS); - if (lex_token (lexer) != T_LPAREN) - { - struct variable *v = parse_variable (lexer, dict); - if (v == NULL) - return 0; - if (!lex_force_match (lexer, T_EQUALS)) - return 0; - char *new_name = parse_DATA_LIST_var (lexer, dict); - if (dict_lookup_var (dict, new_name) != NULL) - { - msg (SE, _("Cannot rename %s as %s because there already exists " - "a variable named %s. To rename variables with " - "overlapping names, use a single RENAME subcommand " - "such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, " - "`/RENAME (A B C=B C A)'."), - var_get_name (v), new_name, new_name); - free (new_name); - return 0; - } + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) + { + size_t n_oldvars = 0; + oldvars = NULL; + n_newvars = 0; + n_oldvars = 0; + oldvars = NULL; - dict_rename_var (dict, v, new_name); - free (new_name); - return 1; - } + bool paren = lex_match (lexer, T_LPAREN); + group++; + if (!parse_variables (lexer, dict, &oldvars, &n_oldvars, PV_NO_DUPLICATE)) + goto fail; - nv = nn = 0; - v = NULL; - new_names = 0; - group = 1; - while (lex_match (lexer, T_LPAREN)) - { - size_t old_nv = nv; + if (!lex_force_match (lexer, T_EQUALS)) + goto fail; - if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND)) - goto done; - if (!lex_match (lexer, T_EQUALS)) + newnames = xmalloc (sizeof *newnames * n_oldvars); + while (lex_token (lexer) == T_ID || lex_token (lexer) == T_STRING) { - lex_error_expecting (lexer, "`='"); - goto done; + if (n_newvars >= n_oldvars) + break; + const char *new_name = lex_tokcstr (lexer); + if (!relax && ! id_is_plausible (new_name, true)) + goto fail; + + if (dict_lookup_var (dict, new_name) != NULL) + { + msg (SE, _("Cannot rename %s as %s because there already exists " + "a variable named %s. To rename variables with " + "overlapping names, use a single RENAME subcommand " + "such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, " + "`/RENAME (A B C=B C A)'."), + var_get_name (oldvars[n_newvars]), new_name, new_name); + goto fail; + } + newnames[n_newvars] = strdup (new_name); + lex_get (lexer); + n_newvars++; } - if (!parse_DATA_LIST_vars (lexer, dict, &new_names, &nn, - PV_APPEND | PV_NO_SCRATCH | PV_NO_DUPLICATE)) - goto done; - if (nn != nv) + if (n_newvars != n_oldvars) { msg (SE, _("Number of variables on left side of `=' (%zu) does not " "match number of variables on right side (%zu), in " "parenthesized group %d of RENAME subcommand."), - nv - old_nv, nn - old_nv, group); - goto done; + n_newvars, n_oldvars, group); + goto fail; } - if (!lex_force_match (lexer, T_RPAREN)) - goto done; - group++; - } - if (!dict_rename_vars (dict, v, new_names, nv, &err_name)) - { - msg (SE, _("Requested renaming duplicates variable name %s."), err_name); - goto done; + if (paren) + if (!lex_force_match (lexer, T_RPAREN)) + goto fail; + + char *errname = 0; + if (!dict_rename_vars (dict, oldvars, newnames, n_newvars, &errname)) + { + msg (SE, + _("Requested renaming duplicates variable name %s."), + errname); + goto fail; + } + free (oldvars); + for (int i = 0; i < n_newvars; ++i) + free (newnames[i]); + free (newnames); + newnames = NULL; } - success = 1; - done: - for (i = 0; i < nn; i++) - free (new_names[i]); - free (new_names); - free (v); + return true; - return success; + fail: + free (oldvars); + for (int i = 0; i < n_newvars; ++i) + free (newnames[i]); + free (newnames); + newnames = NULL; + return false; } /* Parses and performs the DROP subcommand of GET, SAVE, and diff --git a/src/language/data-io/trim.h b/src/language/data-io/trim.h index 106d1a8f32..188acb4781 100644 --- a/src/language/data-io/trim.h +++ b/src/language/data-io/trim.h @@ -21,8 +21,8 @@ struct lexer; struct dictionary; -bool parse_dict_trim (struct lexer *, struct dictionary *); -bool parse_dict_rename (struct lexer *, struct dictionary *); +bool parse_dict_trim (struct lexer *, struct dictionary *, bool); +bool parse_dict_rename (struct lexer *, struct dictionary *, bool); bool parse_dict_drop (struct lexer *, struct dictionary *); bool parse_dict_keep (struct lexer *, struct dictionary *); diff --git a/tests/language/data-io/save-translate.at b/tests/language/data-io/save-translate.at index a81d074b76..a80c49f9c3 100644 --- a/tests/language/data-io/save-translate.at +++ b/tests/language/data-io/save-translate.at @@ -85,7 +85,7 @@ number:time:date:datetime:string:filter ]) AT_CLEANUP -AT_SETUP([CSV output -- KEEP, RENAME]) +AT_SETUP([CSV output -- KEEP, RENAME quoted]) PREPARE_SAVE_TRANSLATE_CSV( [/FIELDNAMES /KEEP=time string /RENAME string='long name with spaces' /UNSELECTED=DELETE]) AT_CHECK([cat data.csv], [0], [dnl @@ -95,6 +95,56 @@ time,long name with spaces ]) AT_CLEANUP + +AT_SETUP([CSV output -- KEEP, RENAME multi quoted]) +PREPARE_SAVE_TRANSLATE_CSV( + [/FIELDNAMES + /RENAME = + number = "this one" + time = "that one" + date = "which one?" + datetime = "another variable replacement" + string="long name with spaces" + /UNSELECTED=DELETE]) +AT_CHECK([cat data.csv], [0], [dnl +this one,that one,which one?,another variable replacement,long name with spaces,filter + ,-05:17:00,10/31/2010,04/09/2008 09:29:00, xxx,1 +1.625,12:00:00, , ,xyzzy,1 +]) +AT_CLEANUP + + +AT_SETUP([CSV output -- KEEP, RENAME bad name ]) +AT_DATA([bad.sps], [ +data list notable list /Var1 Var2 Var3 Var4 Var5 *. +begin data +1 2 3 4 5 +end data. + +SAVE TRANSLATE +/OUTFILE="foo.csv" + /TYPE=CSV + /MAP + /REPLACE + /FIELDNAMES + /Unselected=DELETE + /RENAME = + Var4 = foobar + (Var1 Var2 = one Var3 ) + (Var3 = "The second") + /CELLS=VALUES +. +]) + +AT_CHECK([pspp -O format=csv bad.sps], [1], [dnl +"bad.sps:16: error: SAVE TRANSLATE: Cannot rename Var2 as Var3 because there already exists a variable named Var3. To rename variables with overlapping names, use a single RENAME subcommand such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, `/RENAME (A B C=B C A)'." +]) + + +AT_CLEANUP + + + AT_BANNER([SAVE TRANSLATE /TYPE=TAB]) AT_SETUP([TAB output]) @@ -106,3 +156,4 @@ number time date datetime string filter 1.625 12:00:00 xyzzy 1 ]) AT_CLEANUP + -- 2.30.2