1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/dataset.h"
22 #include "data/dictionary.h"
23 #include "data/variable.h"
24 #include "language/command.h"
25 #include "language/lexer/lexer.h"
26 #include "language/lexer/variable-parser.h"
27 #include "libpspp/array.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/bit-vector.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/misc.h"
34 #include "libpspp/str.h"
36 #include "gl/xalloc.h"
39 #define _(msgid) gettext (msgid)
41 /* These control the ordering produced by
42 compare_variables_given_ordering(). */
45 bool forward; /* true=FORWARD, false=BACKWARD. */
46 bool positional; /* true=POSITIONAL, false=ALPHA. */
49 /* Increasing order of variable index. */
50 static struct ordering forward_positional_ordering = {1, 1};
52 static int compare_variables_given_ordering (const void *, const void *,
53 const void *ordering);
55 /* Explains how to modify the variables in a dictionary. */
56 struct var_modification
58 /* New variable ordering. */
59 struct variable **reorder_vars;
62 /* DROP/KEEP information. */
63 struct variable **drop_vars;
66 /* New variable names. */
67 struct variable **rename_vars;
72 static bool rearrange_dict (struct dictionary *d,
73 const struct var_modification *vm);
75 /* Performs MODIFY VARS command. */
77 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
79 if (proc_make_temporary_transformations_permanent (ds))
80 msg (SE, _("%s may not be used after %s. "
81 "Temporary transformations will be made permanent."),
82 "MODIFY VARS", "TEMPORARY");
84 /* Bits indicated whether we've already encountered a subcommand of this
86 unsigned int already_encountered = 0;
89 int ret_code = CMD_CASCADING_FAILURE;
91 /* What we are going to do to the active dataset. */
92 struct var_modification vm =
103 /* Parse each subcommand. */
104 lex_match (lexer, T_SLASH);
107 if (lex_match_id (lexer, "REORDER"))
109 if (already_encountered & 1)
111 lex_sbc_only_once ("REORDER");
114 already_encountered |= 1;
116 struct variable **v = NULL;
119 lex_match (lexer, T_EQUALS);
122 struct ordering ordering;
125 ordering.forward = ordering.positional = true;
128 if (lex_match_id (lexer, "FORWARD"))
129 ordering.forward = true;
130 else if (lex_match_id (lexer, "BACKWARD"))
131 ordering.forward = false;
132 else if (lex_match_id (lexer, "POSITIONAL"))
133 ordering.positional = true;
134 else if (lex_match_id (lexer, "ALPHA"))
135 ordering.positional = false;
140 if (lex_match (lexer, T_ALL)
141 || lex_token (lexer) == T_SLASH
142 || lex_token (lexer) == T_ENDCMD)
146 msg (SE, _("Cannot specify ALL after specifying a set "
150 dict_get_vars_mutable (dataset_dict (ds), &v, &nv,
155 if (!lex_match (lexer, T_LPAREN))
157 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
161 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
162 PV_APPEND | PV_NO_DUPLICATE))
167 if (!lex_match (lexer, T_RPAREN))
169 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
175 if (!ordering.positional)
176 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
177 compare_variables_given_ordering, &ordering);
178 else if (!ordering.forward)
179 reverse_array(&v[prev_nv], nv - prev_nv, sizeof *v);
181 while (lex_token (lexer) != T_SLASH
182 && lex_token (lexer) != T_ENDCMD);
187 else if (lex_match_id (lexer, "RENAME"))
189 if (already_encountered & 2)
191 lex_sbc_only_once ("RENAME");
194 already_encountered |= 2;
196 lex_match (lexer, T_EQUALS);
199 size_t prev_nv_1 = vm.n_rename;
200 size_t prev_nv_2 = vm.n_rename;
202 if (!lex_match (lexer, T_LPAREN))
204 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
207 if (!parse_variables (lexer, dataset_dict (ds),
208 &vm.rename_vars, &vm.n_rename,
209 PV_APPEND | PV_NO_DUPLICATE))
211 if (!lex_match (lexer, T_EQUALS))
213 lex_error_expecting (lexer, "`='", NULL_SENTINEL);
217 if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
218 &vm.new_names, &prev_nv_1, PV_APPEND))
220 if (prev_nv_1 != vm.n_rename)
222 msg (SE, _("Differing number of variables in old name list "
223 "(%zu) and in new name list (%zu)."),
224 vm.n_rename - prev_nv_2, prev_nv_1 - prev_nv_2);
225 for (size_t i = 0; i < prev_nv_1; i++)
226 free (vm.new_names[i]);
231 if (!lex_match (lexer, T_RPAREN))
233 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
237 while (lex_token (lexer) != T_ENDCMD
238 && lex_token (lexer) != T_SLASH);
240 else if (lex_match_id (lexer, "KEEP"))
242 if (already_encountered & 4)
245 _("%s subcommand may be given at most once. It may "
246 "not be given in conjunction with the %s subcommand."),
250 already_encountered |= 4;
252 struct variable **keep_vars, **drop_vars;
253 size_t n_keep, n_drop;
254 lex_match (lexer, T_EQUALS);
255 if (!parse_variables (lexer, dataset_dict (ds),
256 &keep_vars, &n_keep, PV_NONE))
259 /* Transform the list of variables to keep into a list of
260 variables to drop. First sort the keep list, then figure
261 out which variables are missing. */
262 sort (keep_vars, n_keep, sizeof *keep_vars,
263 compare_variables_given_ordering,
264 &forward_positional_ordering);
266 struct variable **all_vars;
268 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &n_all, 0);
269 assert (n_all >= n_keep);
271 n_drop = n_all - n_keep;
272 drop_vars = xnmalloc (n_drop, sizeof *keep_vars);
273 if (set_difference (all_vars, n_all,
277 compare_variables_given_ordering,
278 &forward_positional_ordering)
285 vm.drop_vars = drop_vars;
288 else if (lex_match_id (lexer, "DROP"))
290 struct variable **drop_vars;
293 if (already_encountered & 4)
295 msg (SE, _("%s subcommand may be given at most once. It may "
296 "not be given in conjunction with the %s "
302 already_encountered |= 4;
304 lex_match (lexer, T_EQUALS);
305 if (!parse_variables (lexer, dataset_dict (ds),
306 &drop_vars, &n_drop, PV_NONE))
308 vm.drop_vars = drop_vars;
311 if (n_drop == dict_get_var_cnt (dataset_dict (ds)))
313 msg (SE, _("%s may not be used to delete all variables "
314 "from the active dataset dictionary. "
315 "Use %s instead."), "MODIFY VARS", "NEW FILE");
319 else if (lex_match_id (lexer, "MAP"))
321 struct dictionary *temp = dict_clone (dataset_dict (ds));
322 int success = rearrange_dict (temp, &vm);
325 /* FIXME: display new dictionary. */
331 if (lex_token (lexer) == T_ID)
332 msg (SE, _("Unrecognized subcommand name `%s'."),
333 lex_tokcstr (lexer));
335 msg (SE, _("Subcommand name expected."));
339 if (lex_token (lexer) == T_ENDCMD)
341 if (lex_token (lexer) != T_SLASH)
343 lex_error_expecting (lexer, "`/'", "`.'", NULL_SENTINEL);
349 if (already_encountered & (1 | 4))
352 if (!proc_execute (ds))
356 if (!rearrange_dict (dataset_dict (ds), &vm))
359 ret_code = CMD_SUCCESS;
362 free (vm.reorder_vars);
363 free (vm.rename_vars);
364 for (size_t i = 0; i < vm.n_rename; i++)
365 free (vm.new_names[i]);
371 /* Compares A and B according to the settings in ORDERING, returning a
372 strcmp()-type result. */
374 compare_variables_given_ordering (const void *a_, const void *b_,
375 const void *ordering_)
377 struct variable *const *pa = a_;
378 struct variable *const *pb = b_;
379 const struct variable *a = *pa;
380 const struct variable *b = *pb;
381 const struct ordering *ordering = ordering_;
384 if (ordering->positional)
386 size_t a_index = var_get_dict_index (a);
387 size_t b_index = var_get_dict_index (b);
388 result = a_index < b_index ? -1 : a_index > b_index;
391 result = utf8_strcasecmp (var_get_name (a), var_get_name (b));
392 if (!ordering->forward)
397 /* Pairs a variable with a new name. */
400 struct variable *var;
401 const char *new_name;
404 /* A algo_compare_func that compares new_name members in struct var_renaming
405 structures A and B. */
407 compare_var_renaming_by_new_name (const void *a_, const void *b_,
408 const void *aux UNUSED)
410 const struct var_renaming *a = a_;
411 const struct var_renaming *b = b_;
413 return utf8_strcasecmp (a->new_name, b->new_name);
416 /* Returns true if performing VM on dictionary D would not cause problems such
417 as duplicate variable names. Returns false otherwise, and issues an error
420 validate_var_modification (const struct dictionary *d,
421 const struct var_modification *vm)
423 /* Variable reordering can't be a problem, so we don't simulate
424 it. Variable renaming can cause duplicate names, but
425 dropping variables can eliminate them, so we simulate both
428 /* All variables, in index order. */
429 struct variable **all_vars;
431 dict_get_vars_mutable (d, &all_vars, &n_all, 0);
433 /* Drop variables, in index order. */
434 size_t n_drop = vm->n_drop;
435 struct variable **drop_vars = xnmalloc (n_drop, sizeof *drop_vars);
436 memcpy (drop_vars, vm->drop_vars, n_drop * sizeof *drop_vars);
437 sort (drop_vars, n_drop, sizeof *drop_vars,
438 compare_variables_given_ordering, &forward_positional_ordering);
440 /* Keep variables, in index order. */
441 assert (n_all >= n_drop);
442 size_t n_keep = n_all - n_drop;
443 struct variable **keep_vars = xnmalloc (n_keep, sizeof *keep_vars);
444 if (set_difference (all_vars, n_all,
448 compare_variables_given_ordering,
449 &forward_positional_ordering) != n_keep)
452 /* Copy variables into var_renaming array. */
453 struct var_renaming *var_renaming = xnmalloc (n_keep, sizeof *var_renaming);
454 for (size_t i = 0; i < n_keep; i++)
456 var_renaming[i].var = keep_vars[i];
457 var_renaming[i].new_name = var_get_name (keep_vars[i]);
460 /* Rename variables in var_renaming array. */
461 for (size_t i = 0; i < vm->n_rename; i++)
463 struct variable *const *kv;
464 struct var_renaming *vr;
466 /* Get the var_renaming element. */
467 kv = binary_search (keep_vars, n_keep, sizeof *keep_vars,
469 compare_variables_given_ordering,
470 &forward_positional_ordering);
473 vr = var_renaming + (kv - keep_vars);
475 vr->new_name = vm->new_names[i];
478 /* Sort var_renaming array by new names and check for duplicates. */
479 sort (var_renaming, n_keep, sizeof *var_renaming,
480 compare_var_renaming_by_new_name, NULL);
481 bool ok = !adjacent_find_equal (var_renaming, n_keep, sizeof *var_renaming,
482 compare_var_renaming_by_new_name, NULL);
493 /* Reorders, removes, and renames variables in dictionary D according to VM.
494 Returns true if successful, false if there would have been duplicate
495 variable names if the modifications had been carried out. In the latter
496 case, the dictionary is not modified. */
498 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
500 /* Check whether the modifications will cause duplicate names. */
501 if (!validate_var_modification (d, vm))
504 /* Record the old names of variables to rename. After variables are deleted,
505 we can't depend on the variables to still exist, but we can still look
507 char **rename_old_names = xnmalloc (vm->n_rename, sizeof *rename_old_names);
508 for (size_t i = 0; i < vm->n_rename; i++)
509 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
511 /* Reorder and delete variables. */
512 dict_reorder_vars (d, vm->reorder_vars, vm->n_reorder);
513 dict_delete_vars (d, vm->drop_vars, vm->n_drop);
515 /* Compose lists of variables to rename and their new names. */
516 struct variable **rename_vars = xnmalloc (vm->n_rename, sizeof *rename_vars);
517 char **rename_new_names = xnmalloc (vm->n_rename, sizeof *rename_new_names);
519 for (size_t i = 0; i < vm->n_rename; i++)
521 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
525 rename_vars[n_rename] = var;
526 rename_new_names[n_rename] = vm->new_names[i];
531 if (dict_rename_vars (d, rename_vars, rename_new_names, n_rename,
536 for (size_t i = 0; i < vm->n_rename; i++)
537 free (rename_old_names[i]);
538 free (rename_old_names);
540 free (rename_new_names);