1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/dataset.h"
22 #include "data/dictionary.h"
23 #include "data/variable.h"
24 #include "language/command.h"
25 #include "language/lexer/lexer.h"
26 #include "language/lexer/variable-parser.h"
27 #include "libpspp/array.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/bit-vector.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/message.h"
32 #include "libpspp/misc.h"
33 #include "libpspp/str.h"
35 #include "gl/xalloc.h"
38 #define _(msgid) gettext (msgid)
40 /* These control the ordering produced by
41 compare_variables_given_ordering(). */
44 int forward; /* 1=FORWARD, 0=BACKWARD. */
45 int positional; /* 1=POSITIONAL, 0=ALPHA. */
48 /* Increasing order of variable index. */
49 static struct ordering forward_positional_ordering = {1, 1};
51 static int compare_variables_given_ordering (const void *, const void *,
52 const void *ordering);
54 /* Explains how to modify the variables in a dictionary. */
55 struct var_modification
57 /* New variable ordering. */
58 struct variable **reorder_vars;
61 /* DROP/KEEP information. */
62 struct variable **drop_vars;
65 /* New variable names. */
66 struct variable **rename_vars;
71 static bool rearrange_dict (struct dictionary *d,
72 const struct var_modification *vm);
74 /* Performs MODIFY VARS command. */
76 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
78 /* Bits indicated whether we've already encountered a subcommand of
80 unsigned already_encountered = 0;
82 /* What we are going to do to the active dataset. */
83 struct var_modification vm;
86 int ret_code = CMD_CASCADING_FAILURE;
90 if (proc_make_temporary_transformations_permanent (ds))
91 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
92 "Temporary transformations will be made permanent."));
94 vm.reorder_vars = NULL;
96 vm.rename_vars = NULL;
102 /* Parse each subcommand. */
103 lex_match (lexer, T_SLASH);
106 if (lex_match_id (lexer, "REORDER"))
108 struct variable **v = NULL;
111 if (already_encountered & 1)
113 msg (SE, _("%s subcommand may be given at most once."), "REORDER");
116 already_encountered |= 1;
118 lex_match (lexer, T_EQUALS);
121 struct ordering ordering;
124 ordering.forward = ordering.positional = 1;
125 if (lex_match_id (lexer, "FORWARD"));
126 else if (lex_match_id (lexer, "BACKWARD"))
127 ordering.forward = 0;
128 if (lex_match_id (lexer, "POSITIONAL"));
129 else if (lex_match_id (lexer, "ALPHA"))
130 ordering.positional = 0;
132 if (lex_match (lexer, T_ALL) || lex_token (lexer) == T_SLASH || lex_token (lexer) == T_ENDCMD)
136 msg (SE, _("Cannot specify ALL after specifying a set "
140 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, DC_SYSTEM);
144 if (!lex_match (lexer, T_LPAREN))
146 msg (SE, _("`(' expected on %s subcommand."), "REORDER");
150 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
151 PV_APPEND | PV_NO_DUPLICATE))
156 if (!lex_match (lexer, T_RPAREN))
158 msg (SE, _("`)' expected following variable names on "
159 "REORDER subcommand."));
164 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
165 compare_variables_given_ordering, &ordering);
167 while (lex_token (lexer) != T_SLASH
168 && lex_token (lexer) != T_ENDCMD);
173 else if (lex_match_id (lexer, "RENAME"))
175 if (already_encountered & 2)
177 msg (SE, _("%s subcommand may be given at most once."), "RENAME");
180 already_encountered |= 2;
182 lex_match (lexer, T_EQUALS);
185 size_t prev_nv_1 = vm.rename_cnt;
186 size_t prev_nv_2 = vm.rename_cnt;
188 if (!lex_match (lexer, T_LPAREN))
190 msg (SE, _("`(' expected on %s subcommand."), "RENAME");
193 if (!parse_variables (lexer, dataset_dict (ds),
194 &vm.rename_vars, &vm.rename_cnt,
195 PV_APPEND | PV_NO_DUPLICATE))
197 if (!lex_match (lexer, T_EQUALS))
199 msg (SE, _("`=' expected between lists of new and old variable "
200 "names on RENAME subcommand."));
203 if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
204 &vm.new_names, &prev_nv_1, PV_APPEND))
206 if (prev_nv_1 != vm.rename_cnt)
208 msg (SE, _("Differing number of variables in old name list "
209 "(%zu) and in new name list (%zu)."),
210 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
211 for (i = 0; i < prev_nv_1; i++)
212 free (vm.new_names[i]);
217 if (!lex_match (lexer, T_RPAREN))
219 msg (SE, _("`)' expected after variable lists on RENAME "
224 while (lex_token (lexer) != T_ENDCMD
225 && lex_token (lexer) != T_SLASH);
227 else if (lex_match_id (lexer, "KEEP"))
229 struct variable **keep_vars, **all_vars, **drop_vars;
230 size_t keep_cnt, all_cnt, drop_cnt;
232 if (already_encountered & 4)
234 msg (SE, _("KEEP subcommand may be given at most once. It may "
235 "not be given in conjunction with the DROP subcommand."));
238 already_encountered |= 4;
240 lex_match (lexer, T_EQUALS);
241 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
244 /* Transform the list of variables to keep into a list of
245 variables to drop. First sort the keep list, then figure
246 out which variables are missing. */
247 sort (keep_vars, keep_cnt, sizeof *keep_vars,
248 compare_variables_given_ordering, &forward_positional_ordering);
250 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
251 assert (all_cnt >= keep_cnt);
253 drop_cnt = all_cnt - keep_cnt;
254 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
255 if (set_difference (all_vars, all_cnt,
259 compare_variables_given_ordering,
260 &forward_positional_ordering)
267 vm.drop_vars = drop_vars;
268 vm.drop_cnt = drop_cnt;
270 else if (lex_match_id (lexer, "DROP"))
272 struct variable **drop_vars;
275 if (already_encountered & 4)
277 msg (SE, _("DROP subcommand may be given at most once. It may "
278 "not be given in conjunction with the KEEP "
282 already_encountered |= 4;
284 lex_match (lexer, T_EQUALS);
285 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
287 vm.drop_vars = drop_vars;
288 vm.drop_cnt = drop_cnt;
290 else if (lex_match_id (lexer, "MAP"))
292 struct dictionary *temp = dict_clone (dataset_dict (ds));
293 int success = rearrange_dict (temp, &vm);
296 /* FIXME: display new dictionary. */
302 if (lex_token (lexer) == T_ID)
303 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokcstr (lexer));
305 msg (SE, _("Subcommand name expected."));
309 if (lex_token (lexer) == T_ENDCMD)
311 if (lex_token (lexer) != T_SLASH)
313 msg (SE, _("`/' or `.' expected."));
319 if (already_encountered & (1 | 4))
322 if (!proc_execute (ds))
326 if (!rearrange_dict (dataset_dict (ds), &vm))
329 ret_code = CMD_SUCCESS;
332 free (vm.reorder_vars);
333 free (vm.rename_vars);
334 for (i = 0; i < vm.rename_cnt; i++)
335 free (vm.new_names[i]);
341 /* Compares A and B according to the settings in
342 ORDERING, returning a strcmp()-type result. */
344 compare_variables_given_ordering (const void *a_, const void *b_,
345 const void *ordering_)
347 struct variable *const *pa = a_;
348 struct variable *const *pb = b_;
349 const struct variable *a = *pa;
350 const struct variable *b = *pb;
351 const struct ordering *ordering = ordering_;
354 if (ordering->positional)
356 size_t a_index = var_get_dict_index (a);
357 size_t b_index = var_get_dict_index (b);
358 result = a_index < b_index ? -1 : a_index > b_index;
361 result = strcasecmp (var_get_name (a), var_get_name (b));
362 if (!ordering->forward)
367 /* Pairs a variable with a new name. */
370 struct variable *var;
371 const char *new_name;
374 /* A algo_compare_func that compares new_name members in struct
375 var_renaming structures A and B. */
377 compare_var_renaming_by_new_name (const void *a_, const void *b_,
378 const void *aux UNUSED)
380 const struct var_renaming *a = a_;
381 const struct var_renaming *b = b_;
383 return strcasecmp (a->new_name, b->new_name);
386 /* Returns true if performing VM on dictionary D would not cause
387 problems such as duplicate variable names. Returns false
388 otherwise, and issues an error message. */
390 validate_var_modification (const struct dictionary *d,
391 const struct var_modification *vm)
393 /* Variable reordering can't be a problem, so we don't simulate
394 it. Variable renaming can cause duplicate names, but
395 dropping variables can eliminate them, so we simulate both
397 struct variable **all_vars;
398 struct variable **keep_vars;
399 struct variable **drop_vars;
400 size_t keep_cnt, drop_cnt;
403 struct var_renaming *var_renaming;
407 /* All variables, in index order. */
408 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
410 /* Drop variables, in index order. */
411 drop_cnt = vm->drop_cnt;
412 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
413 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
414 sort (drop_vars, drop_cnt, sizeof *drop_vars,
415 compare_variables_given_ordering, &forward_positional_ordering);
417 /* Keep variables, in index order. */
418 assert (all_cnt >= drop_cnt);
419 keep_cnt = all_cnt - drop_cnt;
420 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
421 if (set_difference (all_vars, all_cnt,
425 compare_variables_given_ordering,
426 &forward_positional_ordering) != keep_cnt)
429 /* Copy variables into var_renaming array. */
430 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
431 for (i = 0; i < keep_cnt; i++)
433 var_renaming[i].var = keep_vars[i];
434 var_renaming[i].new_name = var_get_name (keep_vars[i]);
437 /* Rename variables in var_renaming array. */
438 for (i = 0; i < vm->rename_cnt; i++)
440 struct variable *const *kv;
441 struct var_renaming *vr;
443 /* Get the var_renaming element. */
444 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
446 compare_variables_given_ordering,
447 &forward_positional_ordering);
450 vr = var_renaming + (kv - keep_vars);
452 vr->new_name = vm->new_names[i];
455 /* Sort var_renaming array by new names and check for
457 sort (var_renaming, keep_cnt, sizeof *var_renaming,
458 compare_var_renaming_by_new_name, NULL);
459 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
460 compare_var_renaming_by_new_name, NULL) == NULL;
471 /* Reoders, removes, and renames variables in dictionary D
472 according to VM. Returns true if successful, false if there
473 would have been duplicate variable names if the modifications
474 had been carried out. In the latter case, the dictionary is
477 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
479 char **rename_old_names;
481 struct variable **rename_vars;
482 char **rename_new_names;
487 /* Check whether the modifications will cause duplicate
489 if (!validate_var_modification (d, vm))
492 /* Record the old names of variables to rename. After
493 variables are deleted, we can't depend on the variables to
494 still exist, but we can still look them up by name. */
495 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
496 for (i = 0; i < vm->rename_cnt; i++)
497 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
499 /* Reorder and delete variables. */
500 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
501 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
503 /* Compose lists of variables to rename and their new names. */
504 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
505 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
507 for (i = 0; i < vm->rename_cnt; i++)
509 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
513 rename_vars[rename_cnt] = var;
514 rename_new_names[rename_cnt] = vm->new_names[i];
519 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
524 for (i = 0; i < vm->rename_cnt; i++)
525 free (rename_old_names[i]);
526 free (rename_old_names);
528 free (rename_new_names);