1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/dictionary.h>
25 #include <data/procedure.h>
26 #include <data/variable.h>
27 #include <language/command.h>
28 #include <language/lexer/lexer.h>
29 #include <language/lexer/variable-parser.h>
30 #include <libpspp/alloc.h>
31 #include <libpspp/assertion.h>
32 #include <libpspp/array.h>
33 #include <libpspp/bit-vector.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/message.h>
37 #include <libpspp/message.h>
38 #include <libpspp/misc.h>
39 #include <libpspp/str.h>
42 #define _(msgid) gettext (msgid)
44 /* FIXME: should change weighting variable, etc. */
45 /* These control the ordering produced by
46 compare_variables_given_ordering(). */
49 int forward; /* 1=FORWARD, 0=BACKWARD. */
50 int positional; /* 1=POSITIONAL, 0=ALPHA. */
53 /* Increasing order of variable index. */
54 static struct ordering forward_positional_ordering = {1, 1};
56 static int compare_variables_given_ordering (const void *, const void *,
59 /* Explains how to modify the variables in a dictionary. */
60 struct var_modification
62 /* New variable ordering. */
63 struct variable **reorder_vars;
66 /* DROP/KEEP information. */
67 struct variable **drop_vars;
70 /* New variable names. */
71 struct variable **rename_vars;
76 static bool rearrange_dict (struct dictionary *d,
77 const struct var_modification *vm);
79 /* Performs MODIFY VARS command. */
81 cmd_modify_vars (void)
83 /* Bits indicated whether we've already encountered a subcommand of
85 unsigned already_encountered = 0;
87 /* What we're gonna do to the active file. */
88 struct var_modification vm;
91 int ret_code = CMD_CASCADING_FAILURE;
95 if (proc_make_temporary_transformations_permanent (current_dataset))
96 msg (SE, _("MODIFY VARS may not be used after TEMPORARY. "
97 "Temporary transformations will be made permanent."));
99 vm.reorder_vars = NULL;
101 vm.rename_vars = NULL;
107 /* Parse each subcommand. */
111 if (lex_match_id ("REORDER"))
113 struct variable **v = NULL;
116 if (already_encountered & 1)
118 msg (SE, _("REORDER subcommand may be given at most once."));
121 already_encountered |= 1;
126 struct ordering ordering;
129 ordering.forward = ordering.positional = 1;
130 if (lex_match_id ("FORWARD"));
131 else if (lex_match_id ("BACKWARD"))
132 ordering.forward = 0;
133 if (lex_match_id ("POSITIONAL"));
134 else if (lex_match_id ("ALPHA"))
135 ordering.positional = 0;
137 if (lex_match (T_ALL) || token == '/' || token == '.')
141 msg (SE, _("Cannot specify ALL after specifying a set "
145 dict_get_vars (dataset_dict (current_dataset), &v, &nv, 1u << DC_SYSTEM);
149 if (!lex_match ('('))
151 msg (SE, _("`(' expected on REORDER subcommand."));
155 if (!parse_variables (dataset_dict (current_dataset), &v, &nv,
156 PV_APPEND | PV_NO_DUPLICATE))
161 if (!lex_match (')'))
163 msg (SE, _("`)' expected following variable names on "
164 "REORDER subcommand."));
169 sort (&v[prev_nv], nv - prev_nv, sizeof *v,
170 compare_variables_given_ordering, &ordering);
172 while (token != '/' && token != '.');
177 else if (lex_match_id ("RENAME"))
179 if (already_encountered & 2)
181 msg (SE, _("RENAME subcommand may be given at most once."));
184 already_encountered |= 2;
189 size_t prev_nv_1 = vm.rename_cnt;
190 size_t prev_nv_2 = vm.rename_cnt;
192 if (!lex_match ('('))
194 msg (SE, _("`(' expected on RENAME subcommand."));
197 if (!parse_variables (dataset_dict (current_dataset), &vm.rename_vars, &vm.rename_cnt,
198 PV_APPEND | PV_NO_DUPLICATE))
200 if (!lex_match ('='))
202 msg (SE, _("`=' expected between lists of new and old variable "
203 "names on RENAME subcommand."));
206 if (!parse_DATA_LIST_vars (&vm.new_names, &prev_nv_1, PV_APPEND))
208 if (prev_nv_1 != vm.rename_cnt)
210 msg (SE, _("Differing number of variables in old name list "
211 "(%d) and in new name list (%d)."),
212 vm.rename_cnt - prev_nv_2, prev_nv_1 - prev_nv_2);
213 for (i = 0; i < prev_nv_1; i++)
214 free (vm.new_names[i]);
219 if (!lex_match (')'))
221 msg (SE, _("`)' expected after variable lists on RENAME "
226 while (token != '.' && token != '/');
228 else if (lex_match_id ("KEEP"))
230 struct variable **keep_vars, **all_vars, **drop_vars;
231 size_t keep_cnt, all_cnt, drop_cnt;
233 if (already_encountered & 4)
235 msg (SE, _("KEEP subcommand may be given at most once. It may not"
236 "be given in conjunction with the DROP subcommand."));
239 already_encountered |= 4;
242 if (!parse_variables (dataset_dict (current_dataset), &keep_vars, &keep_cnt, PV_NONE))
245 /* Transform the list of variables to keep into a list of
246 variables to drop. First sort the keep list, then figure
247 out which variables are missing. */
248 sort (keep_vars, keep_cnt, sizeof *keep_vars,
249 compare_variables_given_ordering, &forward_positional_ordering);
251 dict_get_vars (dataset_dict (current_dataset), &all_vars, &all_cnt, 0);
252 assert (all_cnt >= keep_cnt);
254 drop_cnt = all_cnt - keep_cnt;
255 drop_vars = xnmalloc (drop_cnt, sizeof *keep_vars);
256 if (set_difference (all_vars, all_cnt,
260 compare_variables_given_ordering,
261 &forward_positional_ordering)
268 vm.drop_vars = drop_vars;
269 vm.drop_cnt = drop_cnt;
271 else if (lex_match_id ("DROP"))
273 struct variable **drop_vars;
276 if (already_encountered & 4)
278 msg (SE, _("DROP subcommand may be given at most once. It may "
279 "not be given in conjunction with the KEEP "
283 already_encountered |= 4;
286 if (!parse_variables (dataset_dict (current_dataset), &drop_vars, &drop_cnt, PV_NONE))
288 vm.drop_vars = drop_vars;
289 vm.drop_cnt = drop_cnt;
291 else if (lex_match_id ("MAP"))
293 struct dictionary *temp = dict_clone (dataset_dict (current_dataset));
294 int success = rearrange_dict (temp, &vm);
297 /* FIXME: display new dictionary. */
304 msg (SE, _("Unrecognized subcommand name `%s'."), tokid);
306 msg (SE, _("Subcommand name expected."));
314 msg (SE, _("`/' or `.' expected."));
320 if (already_encountered & (1 | 4))
323 if (!procedure (current_dataset,NULL, NULL))
327 if (!rearrange_dict (dataset_dict (current_dataset), &vm))
330 ret_code = CMD_SUCCESS;
333 free (vm.reorder_vars);
334 free (vm.rename_vars);
335 for (i = 0; i < vm.rename_cnt; i++)
336 free (vm.new_names[i]);
342 /* Compares A and B according to the settings in
343 ORDERING, returning a strcmp()-type result. */
345 compare_variables_given_ordering (const void *a_, const void *b_,
348 struct variable *const *pa = a_;
349 struct variable *const *pb = b_;
350 const struct variable *a = *pa;
351 const struct variable *b = *pb;
352 const struct ordering *ordering = ordering_;
355 if (ordering->positional)
356 result = a->index < b->index ? -1 : a->index > b->index;
358 result = strcasecmp (a->name, b->name);
359 if (!ordering->forward)
364 /* Pairs a variable with a new name. */
367 struct variable *var;
368 char new_name[LONG_NAME_LEN + 1];
371 /* A algo_compare_func that compares new_name members in struct
372 var_renaming structures A and B. */
374 compare_var_renaming_by_new_name (const void *a_, const void *b_,
377 const struct var_renaming *a = a_;
378 const struct var_renaming *b = b_;
380 return strcasecmp (a->new_name, b->new_name);
383 /* Returns true if performing VM on dictionary D would not cause
384 problems such as duplicate variable names. Returns false
385 otherwise, and issues an error message. */
387 validate_var_modification (const struct dictionary *d,
388 const struct var_modification *vm)
390 /* Variable reordering can't be a problem, so we don't simulate
391 it. Variable renaming can cause duplicate names, but
392 dropping variables can eliminate them, so we simulate both
394 struct variable **all_vars;
395 struct variable **keep_vars;
396 struct variable **drop_vars;
397 size_t keep_cnt, drop_cnt;
400 struct var_renaming *var_renaming;
404 /* All variables, in index order. */
405 dict_get_vars (d, &all_vars, &all_cnt, 0);
407 /* Drop variables, in index order. */
408 drop_cnt = vm->drop_cnt;
409 drop_vars = xnmalloc (drop_cnt, sizeof *drop_vars);
410 memcpy (drop_vars, vm->drop_vars, drop_cnt * sizeof *drop_vars);
411 sort (drop_vars, drop_cnt, sizeof *drop_vars,
412 compare_variables_given_ordering, &forward_positional_ordering);
414 /* Keep variables, in index order. */
415 assert (all_cnt >= drop_cnt);
416 keep_cnt = all_cnt - drop_cnt;
417 keep_vars = xnmalloc (keep_cnt, sizeof *keep_vars);
418 if (set_difference (all_vars, all_cnt,
422 compare_variables_given_ordering,
423 &forward_positional_ordering) != keep_cnt)
426 /* Copy variables into var_renaming array. */
427 var_renaming = xnmalloc (keep_cnt, sizeof *var_renaming);
428 for (i = 0; i < keep_cnt; i++)
430 var_renaming[i].var = keep_vars[i];
431 strcpy (var_renaming[i].new_name, keep_vars[i]->name);
434 /* Rename variables in var_renaming array. */
435 for (i = 0; i < vm->rename_cnt; i++)
437 struct variable *const *kv;
438 struct var_renaming *vr;
440 /* Get the var_renaming element. */
441 kv = binary_search (keep_vars, keep_cnt, sizeof *keep_vars,
443 compare_variables_given_ordering,
444 &forward_positional_ordering);
447 vr = var_renaming + (kv - keep_vars);
449 strcpy (vr->new_name, vm->new_names[i]);
452 /* Sort var_renaming array by new names and check for
454 sort (var_renaming, keep_cnt, sizeof *var_renaming,
455 compare_var_renaming_by_new_name, NULL);
456 valid = adjacent_find_equal (var_renaming, keep_cnt, sizeof *var_renaming,
457 compare_var_renaming_by_new_name, NULL) == NULL;
468 /* Reoders, removes, and renames variables in dictionary D
469 according to VM. Returns true if successful, false if there
470 would have been duplicate variable names if the modifications
471 had been carried out. In the latter case, the dictionary is
474 rearrange_dict (struct dictionary *d, const struct var_modification *vm)
476 char **rename_old_names;
478 struct variable **rename_vars;
479 char **rename_new_names;
484 /* Check whether the modifications will cause duplicate
486 if (!validate_var_modification (d, vm))
489 /* Record the old names of variables to rename. After
490 variables are deleted, we can't depend on the variables to
491 still exist, but we can still look them up by name. */
492 rename_old_names = xnmalloc (vm->rename_cnt, sizeof *rename_old_names);
493 for (i = 0; i < vm->rename_cnt; i++)
494 rename_old_names[i] = xstrdup (vm->rename_vars[i]->name);
496 /* Reorder and delete variables. */
497 dict_reorder_vars (d, vm->reorder_vars, vm->reorder_cnt);
498 dict_delete_vars (d, vm->drop_vars, vm->drop_cnt);
500 /* Compose lists of variables to rename and their new names. */
501 rename_vars = xnmalloc (vm->rename_cnt, sizeof *rename_vars);
502 rename_new_names = xnmalloc (vm->rename_cnt, sizeof *rename_new_names);
504 for (i = 0; i < vm->rename_cnt; i++)
506 struct variable *var = dict_lookup_var (d, rename_old_names[i]);
510 rename_vars[rename_cnt] = var;
511 rename_new_names[rename_cnt] = vm->new_names[i];
516 if (dict_rename_vars (d, rename_vars, rename_new_names, rename_cnt,
521 for (i = 0; i < vm->rename_cnt; i++)
522 free (rename_old_names[i]);
523 free (rename_old_names);
525 free (rename_new_names);