1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include <data/dictionary.h>
24 #include <data/procedure.h>
25 #include <data/variable.h>
26 #include <language/command.h>
27 #include <language/lexer/lexer.h>
28 #include <language/lexer/variable-parser.h>
29 #include <libpspp/alloc.h>
30 #include <libpspp/assertion.h>
31 #include <libpspp/array.h>
32 #include <libpspp/bit-vector.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/message.h>
37 #include <libpspp/misc.h>
38 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
43 /* These control the ordering produced by
44 compare_variables_given_ordering(). */
47 int forward; /* 1=FORWARD, 0=BACKWARD. */
48 int positional; /* 1=POSITIONAL, 0=ALPHA. */
51 /* Increasing order of variable index. */
52 static struct ordering forward_positional_ordering = {1, 1};
54 static int compare_variables_given_ordering (const void *, const void *,
55 const void *ordering);
57 /* Explains how to modify the variables in a dictionary. */
58 struct var_modification
60 /* New variable ordering. */
61 struct variable **reorder_vars;
64 /* DROP/KEEP information. */
65 struct variable **drop_vars;
68 /* New variable names. */
69 struct variable **rename_vars;
74 static bool rearrange_dict (struct dictionary *d,
75 const struct var_modification *vm);
77 /* Performs MODIFY VARS command. */
79 cmd_modify_vars (struct lexer *lexer, struct dataset *ds)
81 /* Bits indicated whether we've already encountered a subcommand of
83 unsigned already_encountered = 0;
85 /* What we're gonna do to the active file. */
86 struct var_modification vm;
89 int ret_code = CMD_CASCADING_FAILURE;
93 if (proc_make_temporary_transformations_permanent (ds))
94 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
95 "Temporary transformations will be made permanent."));
97 vm.reorder_vars = NULL;
99 vm.rename_vars = NULL;
105 /* Parse each subcommand. */
106 lex_match (lexer, '/');
109 if (lex_match_id (lexer, "REORDER"))
111 struct variable **v = NULL;
114 if (already_encountered & 1)
116 msg (SE, _("REORDER subcommand may be given at most once."));
119 already_encountered |= 1;
121 lex_match (lexer, '=');
124 struct ordering ordering;
127 ordering.forward = ordering.positional = 1;
128 if (lex_match_id (lexer, "FORWARD"));
129 else if (lex_match_id (lexer, "BACKWARD"))
130 ordering.forward = 0;
131 if (lex_match_id (lexer, "POSITIONAL"));
132 else if (lex_match_id (lexer, "ALPHA"))
133 ordering.positional = 0;
135 if (lex_match (lexer, T_ALL) || lex_token (lexer) == '/' || lex_token (lexer) == '.')
139 msg (SE, _("Cannot specify ALL after specifying a set "
143 dict_get_vars_mutable (dataset_dict (ds), &v, &nv, 1u << DC_SYSTEM);
147 if (!lex_match (lexer, '('))
149 msg (SE, _("`(' expected on REORDER subcommand."));
153 if (!parse_variables (lexer, dataset_dict (ds), &v, &nv,
154 PV_APPEND | PV_NO_DUPLICATE))
159 if (!lex_match (lexer, ')'))
161 msg (SE, _("`)' expected following variable names on "
162 "REORDER subcommand."));
167 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
168 compare_variables_given_ordering, &ordering);
170 while (lex_token (lexer) != '/' && lex_token (lexer) != '.');
175 else if (lex_match_id (lexer, "RENAME"))
177 if (already_encountered & 2)
179 msg (SE, _("RENAME subcommand may be given at most once."));
182 already_encountered |= 2;
184 lex_match (lexer, '=');
187 size_t prev_nv_1 = vm.rename_cnt;
188 size_t prev_nv_2 = vm.rename_cnt;
190 if (!lex_match (lexer, '('))
192 msg (SE, _("`(' expected on RENAME subcommand."));
195 if (!parse_variables (lexer, dataset_dict (ds),
196 &vm.rename_vars, &vm.rename_cnt,
197 PV_APPEND | PV_NO_DUPLICATE))
199 if (!lex_match (lexer, '='))
201 msg (SE, _("`=' expected between lists of new and old variable "
202 "names on RENAME subcommand."));
205 if (!parse_DATA_LIST_vars (lexer, &vm.new_names,
206 &prev_nv_1, PV_APPEND))
208 if (prev_nv_1 != vm.rename_cnt)
210 msg (SE, _("Differing number of variables in old name list "
211 "(%d) and in new name list (%d)."),
212 (int) (vm.rename_cnt - prev_nv_2),
213 (int) (prev_nv_1 - prev_nv_2));
214 for (i = 0; i < prev_nv_1; i++)
215 free (vm.new_names[i]);
220 if (!lex_match (lexer, ')'))
222 msg (SE, _("`)' expected after variable lists on RENAME "
227 while (lex_token (lexer) != '.' && lex_token (lexer) != '/');
229 else if (lex_match_id (lexer, "KEEP"))
231 struct variable **keep_vars, **all_vars, **drop_vars;
232 size_t keep_cnt, all_cnt, drop_cnt;
234 if (already_encountered & 4)
236 msg (SE, _("KEEP subcommand may be given at most once. It may not"
237 "be given in conjunction with the DROP subcommand."));
240 already_encountered |= 4;
242 lex_match (lexer, '=');
243 if (!parse_variables (lexer, dataset_dict (ds), &keep_vars, &keep_cnt, PV_NONE))
246 /* Transform the list of variables to keep into a list of
247 variables to drop. First sort the keep list, then figure
248 out which variables are missing. */
249 sort (keep_vars, keep_cnt, sizeof *keep_vars,
250 compare_variables_given_ordering, &forward_positional_ordering);
252 dict_get_vars_mutable (dataset_dict (ds), &all_vars, &all_cnt, 0);
253 assert (all_cnt >= keep_cnt);
255 drop_cnt = all_cnt - keep_cnt;
256 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
257 if (set_difference (all_vars, all_cnt,
261 compare_variables_given_ordering,
262 &forward_positional_ordering)
269 vm.drop_vars = drop_vars;
270 vm.drop_cnt = drop_cnt;
272 else if (lex_match_id (lexer, "DROP"))
274 struct variable **drop_vars;
277 if (already_encountered & 4)
279 msg (SE, _("DROP subcommand may be given at most once. It may "
280 "not be given in conjunction with the KEEP "
284 already_encountered |= 4;
286 lex_match (lexer, '=');
287 if (!parse_variables (lexer, dataset_dict (ds), &drop_vars, &drop_cnt, PV_NONE))
289 vm.drop_vars = drop_vars;
290 vm.drop_cnt = drop_cnt;
292 else if (lex_match_id (lexer, "MAP"))
294 struct dictionary *temp = dict_clone (dataset_dict (ds));
295 int success = rearrange_dict (temp, &vm);
298 /* FIXME: display new dictionary. */
304 if (lex_token (lexer) == T_ID)
305 msg (SE, _("Unrecognized subcommand name `%s'."), lex_tokid (lexer));
307 msg (SE, _("Subcommand name expected."));
311 if (lex_token (lexer) == '.')
313 if (lex_token (lexer) != '/')
315 msg (SE, _("`/' or `.' expected."));
321 if (already_encountered & (1 | 4))
324 if (!proc_execute (ds))
328 if (!rearrange_dict (dataset_dict (ds), &vm))
331 ret_code = CMD_SUCCESS;
334 free (vm.reorder_vars);
335 free (vm.rename_vars);
336 for (i = 0; i < vm.rename_cnt; i++)
337 free (vm.new_names[i]);
343 /* Compares A and B according to the settings in
344 ORDERING, returning a strcmp()-type result. */
346 compare_variables_given_ordering (const void *a_, const void *b_,
347 const void *ordering_)
349 struct variable *const *pa = a_;
350 struct variable *const *pb = b_;
351 const struct variable *a = *pa;
352 const struct variable *b = *pb;
353 const struct ordering *ordering = ordering_;
356 if (ordering->positional)
358 size_t a_index = var_get_dict_index (a);
359 size_t b_index = var_get_dict_index (b);
360 result = a_index < b_index ? -1 : a_index > b_index;
363 result = strcasecmp (var_get_name (a), var_get_name (b));
364 if (!ordering->forward)
369 /* Pairs a variable with a new name. */
372 struct variable *var;
373 char new_name[LONG_NAME_LEN + 1];
376 /* A algo_compare_func that compares new_name members in struct
377 var_renaming structures A and B. */
379 compare_var_renaming_by_new_name (const void *a_, const void *b_,
380 const void *aux UNUSED)
382 const struct var_renaming *a = a_;
383 const struct var_renaming *b = b_;
385 return strcasecmp (a->new_name, b->new_name);
388 /* Returns true if performing VM on dictionary D would not cause
389 problems such as duplicate variable names. Returns false
390 otherwise, and issues an error message. */
392 validate_var_modification (const struct dictionary *d,
393 const struct var_modification *vm)
395 /* Variable reordering can't be a problem, so we don't simulate
396 it. Variable renaming can cause duplicate names, but
397 dropping variables can eliminate them, so we simulate both
399 struct variable **all_vars;
400 struct variable **keep_vars;
401 struct variable **drop_vars;
402 size_t keep_cnt, drop_cnt;
405 struct var_renaming *var_renaming;
409 /* All variables, in index order. */
410 dict_get_vars_mutable (d, &all_vars, &all_cnt, 0);
412 /* Drop variables, in index order. */
413 drop_cnt = vm->drop_cnt;
414 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
415 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
416 sort (drop_vars, drop_cnt, sizeof *drop_vars,
417 compare_variables_given_ordering, &forward_positional_ordering);
419 /* Keep variables, in index order. */
420 assert (all_cnt >= drop_cnt);
421 keep_cnt = all_cnt - drop_cnt;
422 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
423 if (set_difference (all_vars, all_cnt,
427 compare_variables_given_ordering,
428 &forward_positional_ordering) != keep_cnt)
431 /* Copy variables into var_renaming array. */
432 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
433 for (i = 0; i < keep_cnt; i++)
435 var_renaming[i].var = keep_vars[i];
436 strcpy (var_renaming[i].new_name, var_get_name (keep_vars[i]));
439 /* Rename variables in var_renaming array. */
440 for (i = 0; i < vm->rename_cnt; i++)
442 struct variable *const *kv;
443 struct var_renaming *vr;
445 /* Get the var_renaming element. */
446 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
448 compare_variables_given_ordering,
449 &forward_positional_ordering);
452 vr = var_renaming + (kv - keep_vars);
454 strcpy (vr->new_name, vm->new_names[i]);
457 /* Sort var_renaming array by new names and check for
459 sort (var_renaming, keep_cnt, sizeof *var_renaming,
460 compare_var_renaming_by_new_name, NULL);
461 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
462 compare_var_renaming_by_new_name, NULL) == NULL;
473 /* Reoders, removes, and renames variables in dictionary D
474 according to VM. Returns true if successful, false if there
475 would have been duplicate variable names if the modifications
476 had been carried out. In the latter case, the dictionary is
479 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
481 char **rename_old_names;
483 struct variable **rename_vars;
484 char **rename_new_names;
489 /* Check whether the modifications will cause duplicate
491 if (!validate_var_modification (d, vm))
494 /* Record the old names of variables to rename. After
495 variables are deleted, we can't depend on the variables to
496 still exist, but we can still look them up by name. */
497 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
498 for (i = 0; i < vm->rename_cnt; i++)
499 rename_old_names[i] = xstrdup (var_get_name (vm->rename_vars[i]));
501 /* Reorder and delete variables. */
502 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
503 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
505 /* Compose lists of variables to rename and their new names. */
506 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
507 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
509 for (i = 0; i < vm->rename_cnt; i++)
511 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
515 rename_vars[rename_cnt] = var;
516 rename_new_names[rename_cnt] = vm->new_names[i];
521 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
526 for (i = 0; i < vm->rename_cnt; i++)
527 free (rename_old_names[i]);
528 free (rename_old_names);
530 free (rename_new_names);