1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/dictionary.h>
22 #include <data/procedure.h>
23 #include <data/variable.h>
24 #include <language/command.h>
25 #include <language/lexer/lexer.h>
26 #include <language/lexer/variable-parser.h>
27 #include <libpspp/assertion.h>
28 #include <libpspp/array.h>
29 #include <libpspp/bit-vector.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/hash.h>
32 #include <libpspp/message.h>
33 #include <libpspp/misc.h>
34 #include <libpspp/str.h>
39 #define _(msgid) gettext (msgid)
41 /* These control the ordering produced by
42 compare_variables_given_ordering(). */
45 int forward; /* 1=FORWARD, 0=BACKWARD. */
46 int positional; /* 1=POSITIONAL, 0=ALPHA. */
49 /* Increasing order of variable index. */
50 static struct ordering forward_positional_ordering = {1, 1};
52 static int compare_variables_given_ordering (const void *, const void *,
53 const void *ordering);
55 /* Explains how to modify the variables in a dictionary. */
56 struct var_modification
58 /* New variable ordering. */
59 struct variable **reorder_vars;
62 /* DROP/KEEP information. */
63 struct variable **drop_vars;
66 /* New variable names. */
67 struct variable **rename_vars;
72 static bool rearrange_dict (struct dictionary *d,
73 const struct var_modification *vm);
75 /* Performs MODIFY VARS command. */
77 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
79 /* Bits indicated whether we've already encountered a subcommand of
81 unsigned already_encountered = 0;
83 /* What we're gonna do to the active file. */
84 struct var_modification vm;
87 int ret_code = CMD_CASCADING_FAILURE;
91 if (proc_make_temporary_transformations_permanent (ds))
92 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
93 "Temporary transformations will be made permanent."));
95 vm.reorder_vars = NULL;
97 vm.rename_vars = NULL;
103 /* Parse each subcommand. */
104 lex_match (lexer, '/');
107 if (lex_match_id (lexer, "REORDER"))
109 struct variable **v = NULL;
112 if (already_encountered & 1)
114 msg (SE, _("REORDER subcommand may be given at most once."));
117 already_encountered |= 1;
119 lex_match (lexer, '=');
122 struct ordering ordering;
125 ordering.forward = ordering.positional = 1;
126 if (lex_match_id (lexer, "FORWARD"));
127 else if (lex_match_id (lexer, "BACKWARD"))
128 ordering.forward = 0;
129 if (lex_match_id (lexer, "POSITIONAL"));
130 else if (lex_match_id (lexer, "ALPHA"))
131 ordering.positional = 0;
133 if (lex_match (lexer, T_ALL) || lex_token (lexer) == '/' || lex_token (lexer) == '.')
137 msg (SE, _("Cannot specify ALL after specifying a set "
141 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, DC_SYSTEM);
145 if (!lex_match (lexer, '('))
147 msg (SE, _("`(' expected on REORDER subcommand."));
151 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
152 PV_APPEND | PV_NO_DUPLICATE))
157 if (!lex_match (lexer, ')'))
159 msg (SE, _("`)' expected following variable names on "
160 "REORDER subcommand."));
165 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
166 compare_variables_given_ordering, &ordering);
168 while (lex_token (lexer) != '/' && lex_token (lexer) != '.');
173 else if (lex_match_id (lexer, "RENAME"))
175 if (already_encountered & 2)
177 msg (SE, _("RENAME subcommand may be given at most once."));
180 already_encountered |= 2;
182 lex_match (lexer, '=');
185 size_t prev_nv_1 = vm.rename_cnt;
186 size_t prev_nv_2 = vm.rename_cnt;
188 if (!lex_match (lexer, '('))
190 msg (SE, _("`(' expected on RENAME subcommand."));
193 if (!parse_variables (lexer, dataset_dict (ds),
194 &vm.rename_vars, &vm.rename_cnt,
195 PV_APPEND | PV_NO_DUPLICATE))
197 if (!lex_match (lexer, '='))
199 msg (SE, _("`=' expected between lists of new and old variable "
200 "names on RENAME subcommand."));
203 if (!parse_DATA_LIST_vars (lexer, &vm.new_names,
204 &prev_nv_1, PV_APPEND))
206 if (prev_nv_1 != vm.rename_cnt)
208 msg (SE, _("Differing number of variables in old name list "
209 "(%zu) and in new name list (%zu)."),
210 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
211 for (i = 0; i < prev_nv_1; i++)
212 free (vm.new_names[i]);
217 if (!lex_match (lexer, ')'))
219 msg (SE, _("`)' expected after variable lists on RENAME "
224 while (lex_token (lexer) != '.' && lex_token (lexer) != '/');
226 else if (lex_match_id (lexer, "KEEP"))
228 struct variable **keep_vars, **all_vars, **drop_vars;
229 size_t keep_cnt, all_cnt, drop_cnt;
231 if (already_encountered & 4)
233 msg (SE, _("KEEP subcommand may be given at most once. It may "
234 "not be given in conjunction with the DROP subcommand."));
237 already_encountered |= 4;
239 lex_match (lexer, '=');
240 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
243 /* Transform the list of variables to keep into a list of
244 variables to drop. First sort the keep list, then figure
245 out which variables are missing. */
246 sort (keep_vars, keep_cnt, sizeof *keep_vars,
247 compare_variables_given_ordering, &forward_positional_ordering);
249 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
250 assert (all_cnt >= keep_cnt);
252 drop_cnt = all_cnt - keep_cnt;
253 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
254 if (set_difference (all_vars, all_cnt,
258 compare_variables_given_ordering,
259 &forward_positional_ordering)
266 vm.drop_vars = drop_vars;
267 vm.drop_cnt = drop_cnt;
269 else if (lex_match_id (lexer, "DROP"))
271 struct variable **drop_vars;
274 if (already_encountered & 4)
276 msg (SE, _("DROP subcommand may be given at most once. It may "
277 "not be given in conjunction with the KEEP "
281 already_encountered |= 4;
283 lex_match (lexer, '=');
284 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
286 vm.drop_vars = drop_vars;
287 vm.drop_cnt = drop_cnt;
289 else if (lex_match_id (lexer, "MAP"))
291 struct dictionary *temp = dict_clone (dataset_dict (ds));
292 int success = rearrange_dict (temp, &vm);
295 /* FIXME: display new dictionary. */
301 if (lex_token (lexer) == T_ID)
302 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokid (lexer));
304 msg (SE, _("Subcommand name expected."));
308 if (lex_token (lexer) == '.')
310 if (lex_token (lexer) != '/')
312 msg (SE, _("`/' or `.' expected."));
318 if (already_encountered & (1 | 4))
321 if (!proc_execute (ds))
325 if (!rearrange_dict (dataset_dict (ds), &vm))
328 ret_code = CMD_SUCCESS;
331 free (vm.reorder_vars);
332 free (vm.rename_vars);
333 for (i = 0; i < vm.rename_cnt; i++)
334 free (vm.new_names[i]);
340 /* Compares A and B according to the settings in
341 ORDERING, returning a strcmp()-type result. */
343 compare_variables_given_ordering (const void *a_, const void *b_,
344 const void *ordering_)
346 struct variable *const *pa = a_;
347 struct variable *const *pb = b_;
348 const struct variable *a = *pa;
349 const struct variable *b = *pb;
350 const struct ordering *ordering = ordering_;
353 if (ordering->positional)
355 size_t a_index = var_get_dict_index (a);
356 size_t b_index = var_get_dict_index (b);
357 result = a_index < b_index ? -1 : a_index > b_index;
360 result = strcasecmp (var_get_name (a), var_get_name (b));
361 if (!ordering->forward)
366 /* Pairs a variable with a new name. */
369 struct variable *var;
370 char new_name[VAR_NAME_LEN + 1];
373 /* A algo_compare_func that compares new_name members in struct
374 var_renaming structures A and B. */
376 compare_var_renaming_by_new_name (const void *a_, const void *b_,
377 const void *aux UNUSED)
379 const struct var_renaming *a = a_;
380 const struct var_renaming *b = b_;
382 return strcasecmp (a->new_name, b->new_name);
385 /* Returns true if performing VM on dictionary D would not cause
386 problems such as duplicate variable names. Returns false
387 otherwise, and issues an error message. */
389 validate_var_modification (const struct dictionary *d,
390 const struct var_modification *vm)
392 /* Variable reordering can't be a problem, so we don't simulate
393 it. Variable renaming can cause duplicate names, but
394 dropping variables can eliminate them, so we simulate both
396 struct variable **all_vars;
397 struct variable **keep_vars;
398 struct variable **drop_vars;
399 size_t keep_cnt, drop_cnt;
402 struct var_renaming *var_renaming;
406 /* All variables, in index order. */
407 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
409 /* Drop variables, in index order. */
410 drop_cnt = vm->drop_cnt;
411 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
412 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
413 sort (drop_vars, drop_cnt, sizeof *drop_vars,
414 compare_variables_given_ordering, &forward_positional_ordering);
416 /* Keep variables, in index order. */
417 assert (all_cnt >= drop_cnt);
418 keep_cnt = all_cnt - drop_cnt;
419 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
420 if (set_difference (all_vars, all_cnt,
424 compare_variables_given_ordering,
425 &forward_positional_ordering) != keep_cnt)
428 /* Copy variables into var_renaming array. */
429 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
430 for (i = 0; i < keep_cnt; i++)
432 var_renaming[i].var = keep_vars[i];
433 strcpy (var_renaming[i].new_name, var_get_name (keep_vars[i]));
436 /* Rename variables in var_renaming array. */
437 for (i = 0; i < vm->rename_cnt; i++)
439 struct variable *const *kv;
440 struct var_renaming *vr;
442 /* Get the var_renaming element. */
443 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
445 compare_variables_given_ordering,
446 &forward_positional_ordering);
449 vr = var_renaming + (kv - keep_vars);
451 strcpy (vr->new_name, vm->new_names[i]);
454 /* Sort var_renaming array by new names and check for
456 sort (var_renaming, keep_cnt, sizeof *var_renaming,
457 compare_var_renaming_by_new_name, NULL);
458 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
459 compare_var_renaming_by_new_name, NULL) == NULL;
470 /* Reoders, removes, and renames variables in dictionary D
471 according to VM. Returns true if successful, false if there
472 would have been duplicate variable names if the modifications
473 had been carried out. In the latter case, the dictionary is
476 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
478 char **rename_old_names;
480 struct variable **rename_vars;
481 char **rename_new_names;
486 /* Check whether the modifications will cause duplicate
488 if (!validate_var_modification (d, vm))
491 /* Record the old names of variables to rename. After
492 variables are deleted, we can't depend on the variables to
493 still exist, but we can still look them up by name. */
494 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
495 for (i = 0; i < vm->rename_cnt; i++)
496 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
498 /* Reorder and delete variables. */
499 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
500 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
502 /* Compose lists of variables to rename and their new names. */
503 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
504 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
506 for (i = 0; i < vm->rename_cnt; i++)
508 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
512 rename_vars[rename_cnt] = var;
513 rename_new_names[rename_cnt] = vm->new_names[i];
518 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
523 for (i = 0; i < vm->rename_cnt; i++)
524 free (rename_old_names[i]);
525 free (rename_old_names);
527 free (rename_new_names);