1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/dataset.h"
22 #include "data/dictionary.h"
23 #include "data/variable.h"
24 #include "language/command.h"
25 #include "language/lexer/lexer.h"
26 #include "language/lexer/variable-parser.h"
27 #include "libpspp/array.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/bit-vector.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/misc.h"
34 #include "libpspp/str.h"
36 #include "gl/xalloc.h"
39 #define _(msgid) gettext (msgid)
41 /* These control the ordering produced by
42 compare_variables_given_ordering(). */
45 int forward; /* 1=FORWARD, 0=BACKWARD. */
46 int positional; /* 1=POSITIONAL, 0=ALPHA. */
49 /* Increasing order of variable index. */
50 static struct ordering forward_positional_ordering = {1, 1};
52 static int compare_variables_given_ordering (const void *, const void *,
53 const void *ordering);
55 /* Explains how to modify the variables in a dictionary. */
56 struct var_modification
58 /* New variable ordering. */
59 struct variable **reorder_vars;
62 /* DROP/KEEP information. */
63 struct variable **drop_vars;
66 /* New variable names. */
67 struct variable **rename_vars;
72 static bool rearrange_dict (struct dictionary *d,
73 const struct var_modification *vm);
75 /* Performs MODIFY VARS command. */
77 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
79 /* Bits indicated whether we've already encountered a subcommand of
81 unsigned already_encountered = 0;
83 /* What we are going to do to the active dataset. */
84 struct var_modification vm;
87 int ret_code = CMD_CASCADING_FAILURE;
91 if (proc_make_temporary_transformations_permanent (ds))
92 msg (SE, _("%s may not be used after %s. "
93 "Temporary transformations will be made permanent."), "MODIFY VARS", "TEMPORARY");
95 vm.reorder_vars = NULL;
97 vm.rename_vars = NULL;
103 /* Parse each subcommand. */
104 lex_match (lexer, T_SLASH);
107 if (lex_match_id (lexer, "REORDER"))
109 struct variable **v = NULL;
112 if (already_encountered & 1)
114 lex_sbc_only_once ("REORDER");
117 already_encountered |= 1;
119 lex_match (lexer, T_EQUALS);
122 struct ordering ordering;
125 ordering.forward = ordering.positional = 1;
126 if (lex_match_id (lexer, "FORWARD"));
127 else if (lex_match_id (lexer, "BACKWARD"))
128 ordering.forward = 0;
129 if (lex_match_id (lexer, "POSITIONAL"));
130 else if (lex_match_id (lexer, "ALPHA"))
131 ordering.positional = 0;
133 if (lex_match (lexer, T_ALL) || lex_token (lexer) == T_SLASH || lex_token (lexer) == T_ENDCMD)
137 msg (SE, _("Cannot specify ALL after specifying a set "
141 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, DC_SYSTEM);
145 if (!lex_match (lexer, T_LPAREN))
147 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
151 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
152 PV_APPEND | PV_NO_DUPLICATE))
157 if (!lex_match (lexer, T_RPAREN))
159 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
164 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
165 compare_variables_given_ordering, &ordering);
167 while (lex_token (lexer) != T_SLASH
168 && lex_token (lexer) != T_ENDCMD);
173 else if (lex_match_id (lexer, "RENAME"))
175 if (already_encountered & 2)
177 lex_sbc_only_once ("RENAME");
180 already_encountered |= 2;
182 lex_match (lexer, T_EQUALS);
185 size_t prev_nv_1 = vm.rename_cnt;
186 size_t prev_nv_2 = vm.rename_cnt;
188 if (!lex_match (lexer, T_LPAREN))
190 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
193 if (!parse_variables (lexer, dataset_dict (ds),
194 &vm.rename_vars, &vm.rename_cnt,
195 PV_APPEND | PV_NO_DUPLICATE))
197 if (!lex_match (lexer, T_EQUALS))
199 lex_error_expecting (lexer, "`='", NULL_SENTINEL);
202 if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
203 &vm.new_names, &prev_nv_1, PV_APPEND))
205 if (prev_nv_1 != vm.rename_cnt)
207 msg (SE, _("Differing number of variables in old name list "
208 "(%zu) and in new name list (%zu)."),
209 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
210 for (i = 0; i < prev_nv_1; i++)
211 free (vm.new_names[i]);
216 if (!lex_match (lexer, T_RPAREN))
218 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
222 while (lex_token (lexer) != T_ENDCMD
223 && lex_token (lexer) != T_SLASH);
225 else if (lex_match_id (lexer, "KEEP"))
227 struct variable **keep_vars, **all_vars, **drop_vars;
228 size_t keep_cnt, all_cnt, drop_cnt;
230 if (already_encountered & 4)
232 msg (SE, _("%s subcommand may be given at most once. It may "
233 "not be given in conjunction with the %s subcommand."),
237 already_encountered |= 4;
239 lex_match (lexer, T_EQUALS);
240 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
243 /* Transform the list of variables to keep into a list of
244 variables to drop. First sort the keep list, then figure
245 out which variables are missing. */
246 sort (keep_vars, keep_cnt, sizeof *keep_vars,
247 compare_variables_given_ordering, &forward_positional_ordering);
249 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
250 assert (all_cnt >= keep_cnt);
252 drop_cnt = all_cnt - keep_cnt;
253 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
254 if (set_difference (all_vars, all_cnt,
258 compare_variables_given_ordering,
259 &forward_positional_ordering)
266 vm.drop_vars = drop_vars;
267 vm.drop_cnt = drop_cnt;
269 else if (lex_match_id (lexer, "DROP"))
271 struct variable **drop_vars;
274 if (already_encountered & 4)
276 msg (SE, _("%s subcommand may be given at most once. It may "
277 "not be given in conjunction with the %s "
283 already_encountered |= 4;
285 lex_match (lexer, T_EQUALS);
286 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
288 vm.drop_vars = drop_vars;
289 vm.drop_cnt = drop_cnt;
291 else if (lex_match_id (lexer, "MAP"))
293 struct dictionary *temp = dict_clone (dataset_dict (ds));
294 int success = rearrange_dict (temp, &vm);
297 /* FIXME: display new dictionary. */
303 if (lex_token (lexer) == T_ID)
304 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokcstr (lexer));
306 msg (SE, _("Subcommand name expected."));
310 if (lex_token (lexer) == T_ENDCMD)
312 if (lex_token (lexer) != T_SLASH)
314 lex_error_expecting (lexer, "`/'", "`.'", NULL_SENTINEL);
320 if (already_encountered & (1 | 4))
323 if (!proc_execute (ds))
327 if (!rearrange_dict (dataset_dict (ds), &vm))
330 ret_code = CMD_SUCCESS;
333 free (vm.reorder_vars);
334 free (vm.rename_vars);
335 for (i = 0; i < vm.rename_cnt; i++)
336 free (vm.new_names[i]);
342 /* Compares A and B according to the settings in
343 ORDERING, returning a strcmp()-type result. */
345 compare_variables_given_ordering (const void *a_, const void *b_,
346 const void *ordering_)
348 struct variable *const *pa = a_;
349 struct variable *const *pb = b_;
350 const struct variable *a = *pa;
351 const struct variable *b = *pb;
352 const struct ordering *ordering = ordering_;
355 if (ordering->positional)
357 size_t a_index = var_get_dict_index (a);
358 size_t b_index = var_get_dict_index (b);
359 result = a_index < b_index ? -1 : a_index > b_index;
362 result = utf8_strcasecmp (var_get_name (a), var_get_name (b));
363 if (!ordering->forward)
368 /* Pairs a variable with a new name. */
371 struct variable *var;
372 const char *new_name;
375 /* A algo_compare_func that compares new_name members in struct
376 var_renaming structures A and B. */
378 compare_var_renaming_by_new_name (const void *a_, const void *b_,
379 const void *aux UNUSED)
381 const struct var_renaming *a = a_;
382 const struct var_renaming *b = b_;
384 return utf8_strcasecmp (a->new_name, b->new_name);
387 /* Returns true if performing VM on dictionary D would not cause
388 problems such as duplicate variable names. Returns false
389 otherwise, and issues an error message. */
391 validate_var_modification (const struct dictionary *d,
392 const struct var_modification *vm)
394 /* Variable reordering can't be a problem, so we don't simulate
395 it. Variable renaming can cause duplicate names, but
396 dropping variables can eliminate them, so we simulate both
398 struct variable **all_vars;
399 struct variable **keep_vars;
400 struct variable **drop_vars;
401 size_t keep_cnt, drop_cnt;
404 struct var_renaming *var_renaming;
408 /* All variables, in index order. */
409 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
411 /* Drop variables, in index order. */
412 drop_cnt = vm->drop_cnt;
413 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
414 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
415 sort (drop_vars, drop_cnt, sizeof *drop_vars,
416 compare_variables_given_ordering, &forward_positional_ordering);
418 /* Keep variables, in index order. */
419 assert (all_cnt >= drop_cnt);
420 keep_cnt = all_cnt - drop_cnt;
421 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
422 if (set_difference (all_vars, all_cnt,
426 compare_variables_given_ordering,
427 &forward_positional_ordering) != keep_cnt)
430 /* Copy variables into var_renaming array. */
431 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
432 for (i = 0; i < keep_cnt; i++)
434 var_renaming[i].var = keep_vars[i];
435 var_renaming[i].new_name = var_get_name (keep_vars[i]);
438 /* Rename variables in var_renaming array. */
439 for (i = 0; i < vm->rename_cnt; i++)
441 struct variable *const *kv;
442 struct var_renaming *vr;
444 /* Get the var_renaming element. */
445 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
447 compare_variables_given_ordering,
448 &forward_positional_ordering);
451 vr = var_renaming + (kv - keep_vars);
453 vr->new_name = vm->new_names[i];
456 /* Sort var_renaming array by new names and check for
458 sort (var_renaming, keep_cnt, sizeof *var_renaming,
459 compare_var_renaming_by_new_name, NULL);
460 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
461 compare_var_renaming_by_new_name, NULL) == NULL;
472 /* Reoders, removes, and renames variables in dictionary D
473 according to VM. Returns true if successful, false if there
474 would have been duplicate variable names if the modifications
475 had been carried out. In the latter case, the dictionary is
478 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
480 char **rename_old_names;
482 struct variable **rename_vars;
483 char **rename_new_names;
488 /* Check whether the modifications will cause duplicate
490 if (!validate_var_modification (d, vm))
493 /* Record the old names of variables to rename. After
494 variables are deleted, we can't depend on the variables to
495 still exist, but we can still look them up by name. */
496 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
497 for (i = 0; i < vm->rename_cnt; i++)
498 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
500 /* Reorder and delete variables. */
501 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
502 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
504 /* Compose lists of variables to rename and their new names. */
505 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
506 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
508 for (i = 0; i < vm->rename_cnt; i++)
510 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
514 rename_vars[rename_cnt] = var;
515 rename_new_names[rename_cnt] = vm->new_names[i];
520 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
525 for (i = 0; i < vm->rename_cnt; i++)
526 free (rename_old_names[i]);
527 free (rename_old_names);
529 free (rename_new_names);