1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/dataset.h"
22 #include "data/dictionary.h"
23 #include "data/variable.h"
24 #include "language/command.h"
25 #include "language/lexer/lexer.h"
26 #include "language/lexer/variable-parser.h"
27 #include "libpspp/array.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/bit-vector.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/message.h"
32 #include "libpspp/misc.h"
33 #include "libpspp/str.h"
35 #include "gl/xalloc.h"
38 #define _(msgid) gettext (msgid)
40 /* These control the ordering produced by
41 compare_variables_given_ordering(). */
44 int forward; /* 1=FORWARD, 0=BACKWARD. */
45 int positional; /* 1=POSITIONAL, 0=ALPHA. */
48 /* Increasing order of variable index. */
49 static struct ordering forward_positional_ordering = {1, 1};
51 static int compare_variables_given_ordering (const void *, const void *,
52 const void *ordering);
54 /* Explains how to modify the variables in a dictionary. */
55 struct var_modification
57 /* New variable ordering. */
58 struct variable **reorder_vars;
61 /* DROP/KEEP information. */
62 struct variable **drop_vars;
65 /* New variable names. */
66 struct variable **rename_vars;
71 static bool rearrange_dict (struct dictionary *d,
72 const struct var_modification *vm);
74 /* Performs MODIFY VARS command. */
76 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
78 /* Bits indicated whether we've already encountered a subcommand of
80 unsigned already_encountered = 0;
82 /* What we are going to do to the active dataset. */
83 struct var_modification vm;
86 int ret_code = CMD_CASCADING_FAILURE;
90 if (proc_make_temporary_transformations_permanent (ds))
91 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
92 "Temporary transformations will be made permanent."));
94 vm.reorder_vars = NULL;
96 vm.rename_vars = NULL;
102 /* Parse each subcommand. */
103 lex_match (lexer, T_SLASH);
106 if (lex_match_id (lexer, "REORDER"))
108 struct variable **v = NULL;
111 if (already_encountered & 1)
113 lex_sbc_only_once ("REORDER");
116 already_encountered |= 1;
118 lex_match (lexer, T_EQUALS);
121 struct ordering ordering;
124 ordering.forward = ordering.positional = 1;
125 if (lex_match_id (lexer, "FORWARD"));
126 else if (lex_match_id (lexer, "BACKWARD"))
127 ordering.forward = 0;
128 if (lex_match_id (lexer, "POSITIONAL"));
129 else if (lex_match_id (lexer, "ALPHA"))
130 ordering.positional = 0;
132 if (lex_match (lexer, T_ALL) || lex_token (lexer) == T_SLASH || lex_token (lexer) == T_ENDCMD)
136 msg (SE, _("Cannot specify ALL after specifying a set "
140 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, DC_SYSTEM);
144 if (!lex_match (lexer, T_LPAREN))
146 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
150 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
151 PV_APPEND | PV_NO_DUPLICATE))
156 if (!lex_match (lexer, T_RPAREN))
158 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
163 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
164 compare_variables_given_ordering, &ordering);
166 while (lex_token (lexer) != T_SLASH
167 && lex_token (lexer) != T_ENDCMD);
172 else if (lex_match_id (lexer, "RENAME"))
174 if (already_encountered & 2)
176 lex_sbc_only_once ("RENAME");
179 already_encountered |= 2;
181 lex_match (lexer, T_EQUALS);
184 size_t prev_nv_1 = vm.rename_cnt;
185 size_t prev_nv_2 = vm.rename_cnt;
187 if (!lex_match (lexer, T_LPAREN))
189 lex_error_expecting (lexer, "`('", NULL_SENTINEL);
192 if (!parse_variables (lexer, dataset_dict (ds),
193 &vm.rename_vars, &vm.rename_cnt,
194 PV_APPEND | PV_NO_DUPLICATE))
196 if (!lex_match (lexer, T_EQUALS))
198 lex_error_expecting (lexer, "`='", NULL_SENTINEL);
201 if (!parse_DATA_LIST_vars (lexer, dataset_dict (ds),
202 &vm.new_names, &prev_nv_1, PV_APPEND))
204 if (prev_nv_1 != vm.rename_cnt)
206 msg (SE, _("Differing number of variables in old name list "
207 "(%zu) and in new name list (%zu)."),
208 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
209 for (i = 0; i < prev_nv_1; i++)
210 free (vm.new_names[i]);
215 if (!lex_match (lexer, T_RPAREN))
217 lex_error_expecting (lexer, "`)'", NULL_SENTINEL);
221 while (lex_token (lexer) != T_ENDCMD
222 && lex_token (lexer) != T_SLASH);
224 else if (lex_match_id (lexer, "KEEP"))
226 struct variable **keep_vars, **all_vars, **drop_vars;
227 size_t keep_cnt, all_cnt, drop_cnt;
229 if (already_encountered & 4)
231 msg (SE, _("KEEP subcommand may be given at most once. It may "
232 "not be given in conjunction with the DROP subcommand."));
235 already_encountered |= 4;
237 lex_match (lexer, T_EQUALS);
238 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
241 /* Transform the list of variables to keep into a list of
242 variables to drop. First sort the keep list, then figure
243 out which variables are missing. */
244 sort (keep_vars, keep_cnt, sizeof *keep_vars,
245 compare_variables_given_ordering, &forward_positional_ordering);
247 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
248 assert (all_cnt >= keep_cnt);
250 drop_cnt = all_cnt - keep_cnt;
251 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
252 if (set_difference (all_vars, all_cnt,
256 compare_variables_given_ordering,
257 &forward_positional_ordering)
264 vm.drop_vars = drop_vars;
265 vm.drop_cnt = drop_cnt;
267 else if (lex_match_id (lexer, "DROP"))
269 struct variable **drop_vars;
272 if (already_encountered & 4)
274 msg (SE, _("DROP subcommand may be given at most once. It may "
275 "not be given in conjunction with the KEEP "
279 already_encountered |= 4;
281 lex_match (lexer, T_EQUALS);
282 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
284 vm.drop_vars = drop_vars;
285 vm.drop_cnt = drop_cnt;
287 else if (lex_match_id (lexer, "MAP"))
289 struct dictionary *temp = dict_clone (dataset_dict (ds));
290 int success = rearrange_dict (temp, &vm);
293 /* FIXME: display new dictionary. */
299 if (lex_token (lexer) == T_ID)
300 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokcstr (lexer));
302 msg (SE, _("Subcommand name expected."));
306 if (lex_token (lexer) == T_ENDCMD)
308 if (lex_token (lexer) != T_SLASH)
310 lex_error_expecting (lexer, "`/'", "`.'", NULL_SENTINEL);
316 if (already_encountered & (1 | 4))
319 if (!proc_execute (ds))
323 if (!rearrange_dict (dataset_dict (ds), &vm))
326 ret_code = CMD_SUCCESS;
329 free (vm.reorder_vars);
330 free (vm.rename_vars);
331 for (i = 0; i < vm.rename_cnt; i++)
332 free (vm.new_names[i]);
338 /* Compares A and B according to the settings in
339 ORDERING, returning a strcmp()-type result. */
341 compare_variables_given_ordering (const void *a_, const void *b_,
342 const void *ordering_)
344 struct variable *const *pa = a_;
345 struct variable *const *pb = b_;
346 const struct variable *a = *pa;
347 const struct variable *b = *pb;
348 const struct ordering *ordering = ordering_;
351 if (ordering->positional)
353 size_t a_index = var_get_dict_index (a);
354 size_t b_index = var_get_dict_index (b);
355 result = a_index < b_index ? -1 : a_index > b_index;
358 result = strcasecmp (var_get_name (a), var_get_name (b));
359 if (!ordering->forward)
364 /* Pairs a variable with a new name. */
367 struct variable *var;
368 const char *new_name;
371 /* A algo_compare_func that compares new_name members in struct
372 var_renaming structures A and B. */
374 compare_var_renaming_by_new_name (const void *a_, const void *b_,
375 const void *aux UNUSED)
377 const struct var_renaming *a = a_;
378 const struct var_renaming *b = b_;
380 return strcasecmp (a->new_name, b->new_name);
383 /* Returns true if performing VM on dictionary D would not cause
384 problems such as duplicate variable names. Returns false
385 otherwise, and issues an error message. */
387 validate_var_modification (const struct dictionary *d,
388 const struct var_modification *vm)
390 /* Variable reordering can't be a problem, so we don't simulate
391 it. Variable renaming can cause duplicate names, but
392 dropping variables can eliminate them, so we simulate both
394 struct variable **all_vars;
395 struct variable **keep_vars;
396 struct variable **drop_vars;
397 size_t keep_cnt, drop_cnt;
400 struct var_renaming *var_renaming;
404 /* All variables, in index order. */
405 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
407 /* Drop variables, in index order. */
408 drop_cnt = vm->drop_cnt;
409 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
410 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
411 sort (drop_vars, drop_cnt, sizeof *drop_vars,
412 compare_variables_given_ordering, &forward_positional_ordering);
414 /* Keep variables, in index order. */
415 assert (all_cnt >= drop_cnt);
416 keep_cnt = all_cnt - drop_cnt;
417 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
418 if (set_difference (all_vars, all_cnt,
422 compare_variables_given_ordering,
423 &forward_positional_ordering) != keep_cnt)
426 /* Copy variables into var_renaming array. */
427 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
428 for (i = 0; i < keep_cnt; i++)
430 var_renaming[i].var = keep_vars[i];
431 var_renaming[i].new_name = var_get_name (keep_vars[i]);
434 /* Rename variables in var_renaming array. */
435 for (i = 0; i < vm->rename_cnt; i++)
437 struct variable *const *kv;
438 struct var_renaming *vr;
440 /* Get the var_renaming element. */
441 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
443 compare_variables_given_ordering,
444 &forward_positional_ordering);
447 vr = var_renaming + (kv - keep_vars);
449 vr->new_name = vm->new_names[i];
452 /* Sort var_renaming array by new names and check for
454 sort (var_renaming, keep_cnt, sizeof *var_renaming,
455 compare_var_renaming_by_new_name, NULL);
456 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
457 compare_var_renaming_by_new_name, NULL) == NULL;
468 /* Reoders, removes, and renames variables in dictionary D
469 according to VM. Returns true if successful, false if there
470 would have been duplicate variable names if the modifications
471 had been carried out. In the latter case, the dictionary is
474 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
476 char **rename_old_names;
478 struct variable **rename_vars;
479 char **rename_new_names;
484 /* Check whether the modifications will cause duplicate
486 if (!validate_var_modification (d, vm))
489 /* Record the old names of variables to rename. After
490 variables are deleted, we can't depend on the variables to
491 still exist, but we can still look them up by name. */
492 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
493 for (i = 0; i < vm->rename_cnt; i++)
494 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
496 /* Reorder and delete variables. */
497 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
498 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
500 /* Compose lists of variables to rename and their new names. */
501 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
502 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
504 for (i = 0; i < vm->rename_cnt; i++)
506 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
510 rename_vars[rename_cnt] = var;
511 rename_new_names[rename_cnt] = vm->new_names[i];
516 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
521 for (i = 0; i < vm->rename_cnt; i++)
522 free (rename_old_names[i]);
523 free (rename_old_names);
525 free (rename_new_names);