1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/dictionary.h>
22 #include <data/procedure.h>
23 #include <data/variable.h>
24 #include <language/command.h>
25 #include <language/lexer/lexer.h>
26 #include <language/lexer/variable-parser.h>
27 #include <libpspp/assertion.h>
28 #include <libpspp/array.h>
29 #include <libpspp/bit-vector.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/message.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/str.h>
38 #define _(msgid) gettext (msgid)
40 /* These control the ordering produced by
41 compare_variables_given_ordering(). */
44 int forward; /* 1=FORWARD, 0=BACKWARD. */
45 int positional; /* 1=POSITIONAL, 0=ALPHA. */
48 /* Increasing order of variable index. */
49 static struct ordering forward_positional_ordering = {1, 1};
51 static int compare_variables_given_ordering (const void *, const void *,
52 const void *ordering);
54 /* Explains how to modify the variables in a dictionary. */
55 struct var_modification
57 /* New variable ordering. */
58 struct variable **reorder_vars;
61 /* DROP/KEEP information. */
62 struct variable **drop_vars;
65 /* New variable names. */
66 struct variable **rename_vars;
71 static bool rearrange_dict (struct dictionary *d,
72 const struct var_modification *vm);
74 /* Performs MODIFY VARS command. */
76 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
78 /* Bits indicated whether we've already encountered a subcommand of
80 unsigned already_encountered = 0;
82 /* What we're gonna do to the active file. */
83 struct var_modification vm;
86 int ret_code = CMD_CASCADING_FAILURE;
90 if (proc_make_temporary_transformations_permanent (ds))
91 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
92 "Temporary transformations will be made permanent."));
94 vm.reorder_vars = NULL;
96 vm.rename_vars = NULL;
102 /* Parse each subcommand. */
103 lex_match (lexer, '/');
106 if (lex_match_id (lexer, "REORDER"))
108 struct variable **v = NULL;
111 if (already_encountered & 1)
113 msg (SE, _("%s subcommand may be given at most once."), "REORDER");
116 already_encountered |= 1;
118 lex_match (lexer, '=');
121 struct ordering ordering;
124 ordering.forward = ordering.positional = 1;
125 if (lex_match_id (lexer, "FORWARD"));
126 else if (lex_match_id (lexer, "BACKWARD"))
127 ordering.forward = 0;
128 if (lex_match_id (lexer, "POSITIONAL"));
129 else if (lex_match_id (lexer, "ALPHA"))
130 ordering.positional = 0;
132 if (lex_match (lexer, T_ALL) || lex_token (lexer) == '/' || lex_token (lexer) == '.')
136 msg (SE, _("Cannot specify ALL after specifying a set "
140 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, DC_SYSTEM);
144 if (!lex_match (lexer, '('))
146 msg (SE, _("`(' expected on %s subcommand."), "REORDER");
150 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
151 PV_APPEND | PV_NO_DUPLICATE))
156 if (!lex_match (lexer, ')'))
158 msg (SE, _("`)' expected following variable names on "
159 "REORDER subcommand."));
164 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
165 compare_variables_given_ordering, &ordering);
167 while (lex_token (lexer) != '/' && lex_token (lexer) != '.');
172 else if (lex_match_id (lexer, "RENAME"))
174 if (already_encountered & 2)
176 msg (SE, _("%s subcommand may be given at most once."), "RENAME");
179 already_encountered |= 2;
181 lex_match (lexer, '=');
184 size_t prev_nv_1 = vm.rename_cnt;
185 size_t prev_nv_2 = vm.rename_cnt;
187 if (!lex_match (lexer, '('))
189 msg (SE, _("`(' expected on %s subcommand."), "RENAME");
192 if (!parse_variables (lexer, dataset_dict (ds),
193 &vm.rename_vars, &vm.rename_cnt,
194 PV_APPEND | PV_NO_DUPLICATE))
196 if (!lex_match (lexer, '='))
198 msg (SE, _("`=' expected between lists of new and old variable "
199 "names on RENAME subcommand."));
202 if (!parse_DATA_LIST_vars (lexer, &vm.new_names,
203 &prev_nv_1, PV_APPEND))
205 if (prev_nv_1 != vm.rename_cnt)
207 msg (SE, _("Differing number of variables in old name list "
208 "(%zu) and in new name list (%zu)."),
209 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
210 for (i = 0; i < prev_nv_1; i++)
211 free (vm.new_names[i]);
216 if (!lex_match (lexer, ')'))
218 msg (SE, _("`)' expected after variable lists on RENAME "
223 while (lex_token (lexer) != '.' && lex_token (lexer) != '/');
225 else if (lex_match_id (lexer, "KEEP"))
227 struct variable **keep_vars, **all_vars, **drop_vars;
228 size_t keep_cnt, all_cnt, drop_cnt;
230 if (already_encountered & 4)
232 msg (SE, _("KEEP subcommand may be given at most once. It may "
233 "not be given in conjunction with the DROP subcommand."));
236 already_encountered |= 4;
238 lex_match (lexer, '=');
239 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
242 /* Transform the list of variables to keep into a list of
243 variables to drop. First sort the keep list, then figure
244 out which variables are missing. */
245 sort (keep_vars, keep_cnt, sizeof *keep_vars,
246 compare_variables_given_ordering, &forward_positional_ordering);
248 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
249 assert (all_cnt >= keep_cnt);
251 drop_cnt = all_cnt - keep_cnt;
252 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
253 if (set_difference (all_vars, all_cnt,
257 compare_variables_given_ordering,
258 &forward_positional_ordering)
265 vm.drop_vars = drop_vars;
266 vm.drop_cnt = drop_cnt;
268 else if (lex_match_id (lexer, "DROP"))
270 struct variable **drop_vars;
273 if (already_encountered & 4)
275 msg (SE, _("DROP subcommand may be given at most once. It may "
276 "not be given in conjunction with the KEEP "
280 already_encountered |= 4;
282 lex_match (lexer, '=');
283 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
285 vm.drop_vars = drop_vars;
286 vm.drop_cnt = drop_cnt;
288 else if (lex_match_id (lexer, "MAP"))
290 struct dictionary *temp = dict_clone (dataset_dict (ds));
291 int success = rearrange_dict (temp, &vm);
294 /* FIXME: display new dictionary. */
300 if (lex_token (lexer) == T_ID)
301 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokid (lexer));
303 msg (SE, _("Subcommand name expected."));
307 if (lex_token (lexer) == '.')
309 if (lex_token (lexer) != '/')
311 msg (SE, _("`/' or `.' expected."));
317 if (already_encountered & (1 | 4))
320 if (!proc_execute (ds))
324 if (!rearrange_dict (dataset_dict (ds), &vm))
327 ret_code = CMD_SUCCESS;
330 free (vm.reorder_vars);
331 free (vm.rename_vars);
332 for (i = 0; i < vm.rename_cnt; i++)
333 free (vm.new_names[i]);
339 /* Compares A and B according to the settings in
340 ORDERING, returning a strcmp()-type result. */
342 compare_variables_given_ordering (const void *a_, const void *b_,
343 const void *ordering_)
345 struct variable *const *pa = a_;
346 struct variable *const *pb = b_;
347 const struct variable *a = *pa;
348 const struct variable *b = *pb;
349 const struct ordering *ordering = ordering_;
352 if (ordering->positional)
354 size_t a_index = var_get_dict_index (a);
355 size_t b_index = var_get_dict_index (b);
356 result = a_index < b_index ? -1 : a_index > b_index;
359 result = strcasecmp (var_get_name (a), var_get_name (b));
360 if (!ordering->forward)
365 /* Pairs a variable with a new name. */
368 struct variable *var;
369 char new_name[VAR_NAME_LEN + 1];
372 /* A algo_compare_func that compares new_name members in struct
373 var_renaming structures A and B. */
375 compare_var_renaming_by_new_name (const void *a_, const void *b_,
376 const void *aux UNUSED)
378 const struct var_renaming *a = a_;
379 const struct var_renaming *b = b_;
381 return strcasecmp (a->new_name, b->new_name);
384 /* Returns true if performing VM on dictionary D would not cause
385 problems such as duplicate variable names. Returns false
386 otherwise, and issues an error message. */
388 validate_var_modification (const struct dictionary *d,
389 const struct var_modification *vm)
391 /* Variable reordering can't be a problem, so we don't simulate
392 it. Variable renaming can cause duplicate names, but
393 dropping variables can eliminate them, so we simulate both
395 struct variable **all_vars;
396 struct variable **keep_vars;
397 struct variable **drop_vars;
398 size_t keep_cnt, drop_cnt;
401 struct var_renaming *var_renaming;
405 /* All variables, in index order. */
406 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
408 /* Drop variables, in index order. */
409 drop_cnt = vm->drop_cnt;
410 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
411 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
412 sort (drop_vars, drop_cnt, sizeof *drop_vars,
413 compare_variables_given_ordering, &forward_positional_ordering);
415 /* Keep variables, in index order. */
416 assert (all_cnt >= drop_cnt);
417 keep_cnt = all_cnt - drop_cnt;
418 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
419 if (set_difference (all_vars, all_cnt,
423 compare_variables_given_ordering,
424 &forward_positional_ordering) != keep_cnt)
427 /* Copy variables into var_renaming array. */
428 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
429 for (i = 0; i < keep_cnt; i++)
431 var_renaming[i].var = keep_vars[i];
432 strcpy (var_renaming[i].new_name, var_get_name (keep_vars[i]));
435 /* Rename variables in var_renaming array. */
436 for (i = 0; i < vm->rename_cnt; i++)
438 struct variable *const *kv;
439 struct var_renaming *vr;
441 /* Get the var_renaming element. */
442 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
444 compare_variables_given_ordering,
445 &forward_positional_ordering);
448 vr = var_renaming + (kv - keep_vars);
450 strcpy (vr->new_name, vm->new_names[i]);
453 /* Sort var_renaming array by new names and check for
455 sort (var_renaming, keep_cnt, sizeof *var_renaming,
456 compare_var_renaming_by_new_name, NULL);
457 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
458 compare_var_renaming_by_new_name, NULL) == NULL;
469 /* Reoders, removes, and renames variables in dictionary D
470 according to VM. Returns true if successful, false if there
471 would have been duplicate variable names if the modifications
472 had been carried out. In the latter case, the dictionary is
475 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
477 char **rename_old_names;
479 struct variable **rename_vars;
480 char **rename_new_names;
485 /* Check whether the modifications will cause duplicate
487 if (!validate_var_modification (d, vm))
490 /* Record the old names of variables to rename. After
491 variables are deleted, we can't depend on the variables to
492 still exist, but we can still look them up by name. */
493 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
494 for (i = 0; i < vm->rename_cnt; i++)
495 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
497 /* Reorder and delete variables. */
498 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
499 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
501 /* Compose lists of variables to rename and their new names. */
502 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
503 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
505 for (i = 0; i < vm->rename_cnt; i++)
507 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
511 rename_vars[rename_cnt] = var;
512 rename_new_names[rename_cnt] = vm->new_names[i];
517 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
522 for (i = 0; i < vm->rename_cnt; i++)
523 free (rename_old_names[i]);
524 free (rename_old_names);
526 free (rename_new_names);