1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/dataset.h"
22 #include "data/dictionary.h"
23 #include "data/variable.h"
24 #include "language/command.h"
25 #include "language/lexer/lexer.h"
26 #include "language/lexer/variable-parser.h"
27 #include "libpspp/array.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/bit-vector.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/misc.h"
34 #include "libpspp/str.h"
36 #include "gl/xalloc.h"
39 #define _(msgid) gettext (msgid)
41 /* These control the ordering produced by
42 compare_variables_given_ordering(). */
45 bool forward; /* true=FORWARD, false=BACKWARD. */
46 bool positional; /* true=POSITIONAL, false=ALPHA. */
49 /* Increasing order of variable index. */
50 static struct ordering forward_positional_ordering = {1, 1};
52 static int compare_variables_given_ordering (const void *, const void *,
53 const void *ordering);
55 /* Explains how to modify the variables in a dictionary. */
56 struct var_modification
58 /* New variable ordering. */
59 struct variable **reorder_vars;
62 /* DROP/KEEP information. */
63 struct variable **drop_vars;
66 /* New variable names. */
67 struct variable **rename_vars;
72 static bool rearrange_dict (struct dictionary *d,
73 const struct var_modification *vm);
75 /* Performs MODIFY VARS command. */
77 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
79 if (proc_make_temporary_transformations_permanent (ds))
80 msg (SE, _("%s may not be used after %s. "
81 "Temporary transformations will be made permanent."),
82 "MODIFY VARS", "TEMPORARY");
84 /* Bits indicated whether we've already encountered a subcommand of this
86 unsigned int already_encountered = 0;
89 int ret_code = CMD_CASCADING_FAILURE;
91 /* What we are going to do to the active dataset. */
92 struct var_modification vm =
103 /* Parse each subcommand. */
104 lex_match (lexer, T_SLASH);
107 if (lex_match_id (lexer, "REORDER"))
109 if (already_encountered & 1)
111 lex_sbc_only_once ("REORDER");
114 already_encountered |= 1;
116 struct variable **v = NULL;
119 lex_match (lexer, T_EQUALS);
122 struct ordering ordering;
125 ordering.forward = ordering.positional = true;
128 if (lex_match_id (lexer, "FORWARD"))
129 ordering.forward = true;
130 else if (lex_match_id (lexer, "BACKWARD"))
131 ordering.forward = false;
132 else if (lex_match_id (lexer, "POSITIONAL"))
133 ordering.positional = true;
134 else if (lex_match_id (lexer, "ALPHA"))
135 ordering.positional = false;
140 if (lex_match (lexer, T_ALL)
141 || lex_token (lexer) == T_SLASH
142 || lex_token (lexer) == T_ENDCMD)
146 msg (SE, _("Cannot specify ALL after specifying a set "
150 dict_get_vars_mutable (dataset_dict (ds), &v, &nv,
155 if (!lex_match (lexer, T_LPAREN))
157 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
161 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
162 PV_APPEND | PV_NO_DUPLICATE))
167 if (!lex_match (lexer, T_RPAREN))
169 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
174 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
175 compare_variables_given_ordering, &ordering);
177 while (lex_token (lexer) != T_SLASH
178 && lex_token (lexer) != T_ENDCMD);
183 else if (lex_match_id (lexer, "RENAME"))
185 if (already_encountered & 2)
187 lex_sbc_only_once ("RENAME");
190 already_encountered |= 2;
192 lex_match (lexer, T_EQUALS);
195 size_t prev_nv_1 = vm.n_rename;
196 size_t prev_nv_2 = vm.n_rename;
198 if (!lex_match (lexer, T_LPAREN))
200 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
203 if (!parse_variables (lexer, dataset_dict (ds),
204 &vm.rename_vars, &vm.n_rename,
205 PV_APPEND | PV_NO_DUPLICATE))
207 if (!lex_match (lexer, T_EQUALS))
209 lex_error_expecting (lexer, "`='", NULL_SENTINEL);
213 if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
214 &vm.new_names, &prev_nv_1, PV_APPEND))
216 if (prev_nv_1 != vm.n_rename)
218 msg (SE, _("Differing number of variables in old name list "
219 "(%zu) and in new name list (%zu)."),
220 vm.n_rename - prev_nv_2, prev_nv_1 - prev_nv_2);
221 for (size_t i = 0; i < prev_nv_1; i++)
222 free (vm.new_names[i]);
227 if (!lex_match (lexer, T_RPAREN))
229 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
233 while (lex_token (lexer) != T_ENDCMD
234 && lex_token (lexer) != T_SLASH);
236 else if (lex_match_id (lexer, "KEEP"))
238 if (already_encountered & 4)
241 _("%s subcommand may be given at most once. It may "
242 "not be given in conjunction with the %s subcommand."),
246 already_encountered |= 4;
248 struct variable **keep_vars, **drop_vars;
249 size_t n_keep, n_drop;
250 lex_match (lexer, T_EQUALS);
251 if (!parse_variables (lexer, dataset_dict (ds),
252 &keep_vars, &n_keep, PV_NONE))
255 /* Transform the list of variables to keep into a list of
256 variables to drop. First sort the keep list, then figure
257 out which variables are missing. */
258 sort (keep_vars, n_keep, sizeof *keep_vars,
259 compare_variables_given_ordering,
260 &forward_positional_ordering);
262 struct variable **all_vars;
264 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &n_all, 0);
265 assert (n_all >= n_keep);
267 n_drop = n_all - n_keep;
268 drop_vars = xnmalloc (n_drop, sizeof *keep_vars);
269 if (set_difference (all_vars, n_all,
273 compare_variables_given_ordering,
274 &forward_positional_ordering)
281 vm.drop_vars = drop_vars;
284 else if (lex_match_id (lexer, "DROP"))
286 struct variable **drop_vars;
289 if (already_encountered & 4)
291 msg (SE, _("%s subcommand may be given at most once. It may "
292 "not be given in conjunction with the %s "
298 already_encountered |= 4;
300 lex_match (lexer, T_EQUALS);
301 if (!parse_variables (lexer, dataset_dict (ds),
302 &drop_vars, &n_drop, PV_NONE))
304 vm.drop_vars = drop_vars;
307 else if (lex_match_id (lexer, "MAP"))
309 struct dictionary *temp = dict_clone (dataset_dict (ds));
310 int success = rearrange_dict (temp, &vm);
313 /* FIXME: display new dictionary. */
319 if (lex_token (lexer) == T_ID)
320 msg (SE, _("Unrecognized subcommand name `%s'."),
321 lex_tokcstr (lexer));
323 msg (SE, _("Subcommand name expected."));
327 if (lex_token (lexer) == T_ENDCMD)
329 if (lex_token (lexer) != T_SLASH)
331 lex_error_expecting (lexer, "`/'", "`.'", NULL_SENTINEL);
337 if (already_encountered & (1 | 4))
340 if (!proc_execute (ds))
344 if (!rearrange_dict (dataset_dict (ds), &vm))
347 ret_code = CMD_SUCCESS;
350 free (vm.reorder_vars);
351 free (vm.rename_vars);
352 for (size_t i = 0; i < vm.n_rename; i++)
353 free (vm.new_names[i]);
359 /* Compares A and B according to the settings in ORDERING, returning a
360 strcmp()-type result. */
362 compare_variables_given_ordering (const void *a_, const void *b_,
363 const void *ordering_)
365 struct variable *const *pa = a_;
366 struct variable *const *pb = b_;
367 const struct variable *a = *pa;
368 const struct variable *b = *pb;
369 const struct ordering *ordering = ordering_;
372 if (ordering->positional)
374 size_t a_index = var_get_dict_index (a);
375 size_t b_index = var_get_dict_index (b);
376 result = a_index < b_index ? -1 : a_index > b_index;
379 result = utf8_strcasecmp (var_get_name (a), var_get_name (b));
380 if (!ordering->forward)
385 /* Pairs a variable with a new name. */
388 struct variable *var;
389 const char *new_name;
392 /* A algo_compare_func that compares new_name members in struct var_renaming
393 structures A and B. */
395 compare_var_renaming_by_new_name (const void *a_, const void *b_,
396 const void *aux UNUSED)
398 const struct var_renaming *a = a_;
399 const struct var_renaming *b = b_;
401 return utf8_strcasecmp (a->new_name, b->new_name);
404 /* Returns true if performing VM on dictionary D would not cause problems such
405 as duplicate variable names. Returns false otherwise, and issues an error
408 validate_var_modification (const struct dictionary *d,
409 const struct var_modification *vm)
411 /* Variable reordering can't be a problem, so we don't simulate
412 it. Variable renaming can cause duplicate names, but
413 dropping variables can eliminate them, so we simulate both
416 /* All variables, in index order. */
417 struct variable **all_vars;
419 dict_get_vars_mutable (d, &all_vars, &n_all, 0);
421 /* Drop variables, in index order. */
422 size_t n_drop = vm->n_drop;
423 struct variable **drop_vars = xnmalloc (n_drop, sizeof *drop_vars);
424 memcpy (drop_vars, vm->drop_vars, n_drop * sizeof *drop_vars);
425 sort (drop_vars, n_drop, sizeof *drop_vars,
426 compare_variables_given_ordering, &forward_positional_ordering);
428 /* Keep variables, in index order. */
429 assert (n_all >= n_drop);
430 size_t n_keep = n_all - n_drop;
431 struct variable **keep_vars = xnmalloc (n_keep, sizeof *keep_vars);
432 if (set_difference (all_vars, n_all,
436 compare_variables_given_ordering,
437 &forward_positional_ordering) != n_keep)
440 /* Copy variables into var_renaming array. */
441 struct var_renaming *var_renaming = xnmalloc (n_keep, sizeof *var_renaming);
442 for (size_t i = 0; i < n_keep; i++)
444 var_renaming[i].var = keep_vars[i];
445 var_renaming[i].new_name = var_get_name (keep_vars[i]);
448 /* Rename variables in var_renaming array. */
449 for (size_t i = 0; i < vm->n_rename; i++)
451 struct variable *const *kv;
452 struct var_renaming *vr;
454 /* Get the var_renaming element. */
455 kv = binary_search (keep_vars, n_keep, sizeof *keep_vars,
457 compare_variables_given_ordering,
458 &forward_positional_ordering);
461 vr = var_renaming + (kv - keep_vars);
463 vr->new_name = vm->new_names[i];
466 /* Sort var_renaming array by new names and check for duplicates. */
467 sort (var_renaming, n_keep, sizeof *var_renaming,
468 compare_var_renaming_by_new_name, NULL);
469 bool ok = !adjacent_find_equal (var_renaming, n_keep, sizeof *var_renaming,
470 compare_var_renaming_by_new_name, NULL);
481 /* Reorders, removes, and renames variables in dictionary D according to VM.
482 Returns true if successful, false if there would have been duplicate
483 variable names if the modifications had been carried out. In the latter
484 case, the dictionary is not modified. */
486 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
488 /* Check whether the modifications will cause duplicate names. */
489 if (!validate_var_modification (d, vm))
492 /* Record the old names of variables to rename. After variables are deleted,
493 we can't depend on the variables to still exist, but we can still look
495 char **rename_old_names = xnmalloc (vm->n_rename, sizeof *rename_old_names);
496 for (size_t i = 0; i < vm->n_rename; i++)
497 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
499 /* Reorder and delete variables. */
500 dict_reorder_vars (d, vm->reorder_vars, vm->n_reorder);
501 dict_delete_vars (d, vm->drop_vars, vm->n_drop);
503 /* Compose lists of variables to rename and their new names. */
504 struct variable **rename_vars = xnmalloc (vm->n_rename, sizeof *rename_vars);
505 char **rename_new_names = xnmalloc (vm->n_rename, sizeof *rename_new_names);
507 for (size_t i = 0; i < vm->n_rename; i++)
509 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
513 rename_vars[n_rename] = var;
514 rename_new_names[n_rename] = vm->new_names[i];
519 if (dict_rename_vars (d, rename_vars, rename_new_names, n_rename,
524 for (size_t i = 0; i < vm->n_rename; i++)
525 free (rename_old_names[i]);
526 free (rename_old_names);
528 free (rename_new_names);