1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include <data/dictionary.h>
24 #include <data/procedure.h>
25 #include <data/variable.h>
26 #include <language/command.h>
27 #include <language/lexer/lexer.h>
28 #include <language/lexer/variable-parser.h>
29 #include <libpspp/alloc.h>
30 #include <libpspp/assertion.h>
31 #include <libpspp/array.h>
32 #include <libpspp/bit-vector.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/message.h>
37 #include <libpspp/misc.h>
38 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
43 /* FIXME: should change weighting variable, etc. */
44 /* These control the ordering produced by
45 compare_variables_given_ordering(). */
48 int forward; /* 1=FORWARD, 0=BACKWARD. */
49 int positional; /* 1=POSITIONAL, 0=ALPHA. */
52 /* Increasing order of variable index. */
53 static struct ordering forward_positional_ordering = {1, 1};
55 static int compare_variables_given_ordering (const void *, const void *,
56 const void *ordering);
58 /* Explains how to modify the variables in a dictionary. */
59 struct var_modification
61 /* New variable ordering. */
62 struct variable **reorder_vars;
65 /* DROP/KEEP information. */
66 struct variable **drop_vars;
69 /* New variable names. */
70 struct variable **rename_vars;
75 static bool rearrange_dict (struct dictionary *d,
76 const struct var_modification *vm);
78 /* Performs MODIFY VARS command. */
80 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
82 /* Bits indicated whether we've already encountered a subcommand of
84 unsigned already_encountered = 0;
86 /* What we're gonna do to the active file. */
87 struct var_modification vm;
90 int ret_code = CMD_CASCADING_FAILURE;
94 if (proc_make_temporary_transformations_permanent (ds))
95 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
96 "Temporary transformations will be made permanent."));
98 vm.reorder_vars = NULL;
100 vm.rename_vars = NULL;
106 /* Parse each subcommand. */
107 lex_match (lexer, '/');
110 if (lex_match_id (lexer, "REORDER"))
112 struct variable **v = NULL;
115 if (already_encountered & 1)
117 msg (SE, _("REORDER subcommand may be given at most once."));
120 already_encountered |= 1;
122 lex_match (lexer, '=');
125 struct ordering ordering;
128 ordering.forward = ordering.positional = 1;
129 if (lex_match_id (lexer, "FORWARD"));
130 else if (lex_match_id (lexer, "BACKWARD"))
131 ordering.forward = 0;
132 if (lex_match_id (lexer, "POSITIONAL"));
133 else if (lex_match_id (lexer, "ALPHA"))
134 ordering.positional = 0;
136 if (lex_match (lexer, T_ALL) || lex_token (lexer) == '/' || lex_token (lexer) == '.')
140 msg (SE, _("Cannot specify ALL after specifying a set "
144 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, 1u << DC_SYSTEM);
148 if (!lex_match (lexer, '('))
150 msg (SE, _("`(' expected on REORDER subcommand."));
154 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
155 PV_APPEND | PV_NO_DUPLICATE))
160 if (!lex_match (lexer, ')'))
162 msg (SE, _("`)' expected following variable names on "
163 "REORDER subcommand."));
168 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
169 compare_variables_given_ordering, &ordering);
171 while (lex_token (lexer) != '/' && lex_token (lexer) != '.');
176 else if (lex_match_id (lexer, "RENAME"))
178 if (already_encountered & 2)
180 msg (SE, _("RENAME subcommand may be given at most once."));
183 already_encountered |= 2;
185 lex_match (lexer, '=');
188 size_t prev_nv_1 = vm.rename_cnt;
189 size_t prev_nv_2 = vm.rename_cnt;
191 if (!lex_match (lexer, '('))
193 msg (SE, _("`(' expected on RENAME subcommand."));
196 if (!parse_variables (lexer, dataset_dict (ds),
197 &vm.rename_vars, &vm.rename_cnt,
198 PV_APPEND | PV_NO_DUPLICATE))
200 if (!lex_match (lexer, '='))
202 msg (SE, _("`=' expected between lists of new and old variable "
203 "names on RENAME subcommand."));
206 if (!parse_DATA_LIST_vars (lexer, &vm.new_names,
207 &prev_nv_1, PV_APPEND))
209 if (prev_nv_1 != vm.rename_cnt)
211 msg (SE, _("Differing number of variables in old name list "
212 "(%d) and in new name list (%d)."),
213 (int) (vm.rename_cnt - prev_nv_2),
214 (int) (prev_nv_1 - prev_nv_2));
215 for (i = 0; i < prev_nv_1; i++)
216 free (vm.new_names[i]);
221 if (!lex_match (lexer, ')'))
223 msg (SE, _("`)' expected after variable lists on RENAME "
228 while (lex_token (lexer) != '.' && lex_token (lexer) != '/');
230 else if (lex_match_id (lexer, "KEEP"))
232 struct variable **keep_vars, **all_vars, **drop_vars;
233 size_t keep_cnt, all_cnt, drop_cnt;
235 if (already_encountered & 4)
237 msg (SE, _("KEEP subcommand may be given at most once. It may not"
238 "be given in conjunction with the DROP subcommand."));
241 already_encountered |= 4;
243 lex_match (lexer, '=');
244 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
247 /* Transform the list of variables to keep into a list of
248 variables to drop. First sort the keep list, then figure
249 out which variables are missing. */
250 sort (keep_vars, keep_cnt, sizeof *keep_vars,
251 compare_variables_given_ordering, &forward_positional_ordering);
253 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
254 assert (all_cnt >= keep_cnt);
256 drop_cnt = all_cnt - keep_cnt;
257 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
258 if (set_difference (all_vars, all_cnt,
262 compare_variables_given_ordering,
263 &forward_positional_ordering)
270 vm.drop_vars = drop_vars;
271 vm.drop_cnt = drop_cnt;
273 else if (lex_match_id (lexer, "DROP"))
275 struct variable **drop_vars;
278 if (already_encountered & 4)
280 msg (SE, _("DROP subcommand may be given at most once. It may "
281 "not be given in conjunction with the KEEP "
285 already_encountered |= 4;
287 lex_match (lexer, '=');
288 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
290 vm.drop_vars = drop_vars;
291 vm.drop_cnt = drop_cnt;
293 else if (lex_match_id (lexer, "MAP"))
295 struct dictionary *temp = dict_clone (dataset_dict (ds));
296 int success = rearrange_dict (temp, &vm);
299 /* FIXME: display new dictionary. */
305 if (lex_token (lexer) == T_ID)
306 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokid (lexer));
308 msg (SE, _("Subcommand name expected."));
312 if (lex_token (lexer) == '.')
314 if (lex_token (lexer) != '/')
316 msg (SE, _("`/' or `.' expected."));
322 if (already_encountered & (1 | 4))
325 if (!procedure (ds,NULL, NULL))
329 if (!rearrange_dict (dataset_dict (ds), &vm))
332 ret_code = CMD_SUCCESS;
335 free (vm.reorder_vars);
336 free (vm.rename_vars);
337 for (i = 0; i < vm.rename_cnt; i++)
338 free (vm.new_names[i]);
344 /* Compares A and B according to the settings in
345 ORDERING, returning a strcmp()-type result. */
347 compare_variables_given_ordering (const void *a_, const void *b_,
348 const void *ordering_)
350 struct variable *const *pa = a_;
351 struct variable *const *pb = b_;
352 const struct variable *a = *pa;
353 const struct variable *b = *pb;
354 const struct ordering *ordering = ordering_;
357 if (ordering->positional)
359 size_t a_index = var_get_dict_index (a);
360 size_t b_index = var_get_dict_index (b);
361 result = a_index < b_index ? -1 : a_index > b_index;
364 result = strcasecmp (var_get_name (a), var_get_name (b));
365 if (!ordering->forward)
370 /* Pairs a variable with a new name. */
373 struct variable *var;
374 char new_name[LONG_NAME_LEN + 1];
377 /* A algo_compare_func that compares new_name members in struct
378 var_renaming structures A and B. */
380 compare_var_renaming_by_new_name (const void *a_, const void *b_,
381 const void *aux UNUSED)
383 const struct var_renaming *a = a_;
384 const struct var_renaming *b = b_;
386 return strcasecmp (a->new_name, b->new_name);
389 /* Returns true if performing VM on dictionary D would not cause
390 problems such as duplicate variable names. Returns false
391 otherwise, and issues an error message. */
393 validate_var_modification (const struct dictionary *d,
394 const struct var_modification *vm)
396 /* Variable reordering can't be a problem, so we don't simulate
397 it. Variable renaming can cause duplicate names, but
398 dropping variables can eliminate them, so we simulate both
400 struct variable **all_vars;
401 struct variable **keep_vars;
402 struct variable **drop_vars;
403 size_t keep_cnt, drop_cnt;
406 struct var_renaming *var_renaming;
410 /* All variables, in index order. */
411 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
413 /* Drop variables, in index order. */
414 drop_cnt = vm->drop_cnt;
415 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
416 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
417 sort (drop_vars, drop_cnt, sizeof *drop_vars,
418 compare_variables_given_ordering, &forward_positional_ordering);
420 /* Keep variables, in index order. */
421 assert (all_cnt >= drop_cnt);
422 keep_cnt = all_cnt - drop_cnt;
423 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
424 if (set_difference (all_vars, all_cnt,
428 compare_variables_given_ordering,
429 &forward_positional_ordering) != keep_cnt)
432 /* Copy variables into var_renaming array. */
433 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
434 for (i = 0; i < keep_cnt; i++)
436 var_renaming[i].var = keep_vars[i];
437 strcpy (var_renaming[i].new_name, var_get_name (keep_vars[i]));
440 /* Rename variables in var_renaming array. */
441 for (i = 0; i < vm->rename_cnt; i++)
443 struct variable *const *kv;
444 struct var_renaming *vr;
446 /* Get the var_renaming element. */
447 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
449 compare_variables_given_ordering,
450 &forward_positional_ordering);
453 vr = var_renaming + (kv - keep_vars);
455 strcpy (vr->new_name, vm->new_names[i]);
458 /* Sort var_renaming array by new names and check for
460 sort (var_renaming, keep_cnt, sizeof *var_renaming,
461 compare_var_renaming_by_new_name, NULL);
462 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
463 compare_var_renaming_by_new_name, NULL) == NULL;
474 /* Reoders, removes, and renames variables in dictionary D
475 according to VM. Returns true if successful, false if there
476 would have been duplicate variable names if the modifications
477 had been carried out. In the latter case, the dictionary is
480 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
482 char **rename_old_names;
484 struct variable **rename_vars;
485 char **rename_new_names;
490 /* Check whether the modifications will cause duplicate
492 if (!validate_var_modification (d, vm))
495 /* Record the old names of variables to rename. After
496 variables are deleted, we can't depend on the variables to
497 still exist, but we can still look them up by name. */
498 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
499 for (i = 0; i < vm->rename_cnt; i++)
500 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
502 /* Reorder and delete variables. */
503 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
504 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
506 /* Compose lists of variables to rename and their new names. */
507 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
508 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
510 for (i = 0; i < vm->rename_cnt; i++)
512 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
516 rename_vars[rename_cnt] = var;
517 rename_new_names[rename_cnt] = vm->new_names[i];
522 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
527 for (i = 0; i < vm->rename_cnt; i++)
528 free (rename_old_names[i]);
529 free (rename_old_names);
531 free (rename_new_names);