1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/case.h"
22 #include "data/casereader.h"
23 #include "data/dictionary.h"
24 #include "data/procedure.h"
25 #include "data/transformations.h"
26 #include "data/variable.h"
27 #include "language/command.h"
28 #include "language/lexer/lexer.h"
29 #include "language/lexer/variable-parser.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/hash.h"
32 #include "libpspp/message.h"
33 #include "libpspp/pool.h"
34 #include "libpspp/str.h"
36 #include "gl/xalloc.h"
39 #define _(msgid) gettext (msgid)
41 /* FIXME: Implement PRINT subcommand. */
43 /* Explains how to recode one value. `from' must be first element. */
46 union value from; /* Original value. */
47 double to; /* Recoded value. */
50 /* Explains how to recode an AUTORECODE variable. */
53 const struct variable *src; /* Source variable. */
54 struct variable *dest; /* Target variable. */
55 struct hsh_table *items; /* Hash table of `freq's. */
58 /* AUTORECODE transformation. */
59 struct autorecode_trns
61 struct pool *pool; /* Contains AUTORECODE specs. */
62 struct arc_spec *specs; /* AUTORECODE specifications. */
63 size_t spec_cnt; /* Number of specifications. */
66 /* Descending or ascending sort order. */
73 /* AUTORECODE data. */
76 const struct variable **src_vars; /* Source variables. */
77 char **dst_names; /* Target variable names. */
78 struct variable **dst_vars; /* Target variables. */
79 struct hsh_table **src_values; /* `union value's of source vars. */
80 size_t var_cnt; /* Number of variables. */
81 struct pool *src_values_pool; /* Pool used by src_values. */
82 enum direction direction; /* Sort order. */
83 int print; /* Print mapping table if nonzero. */
86 static trns_proc_func autorecode_trns_proc;
87 static trns_free_func autorecode_trns_free;
88 static hsh_compare_func compare_value;
89 static hsh_hash_func hash_value;
91 static void recode (struct dataset *, const struct autorecode_pgm *);
92 static void arc_free (struct autorecode_pgm *);
94 /* Performs the AUTORECODE procedure. */
96 cmd_autorecode (struct lexer *lexer, struct dataset *ds)
98 struct autorecode_pgm arc;
99 struct casereader *input;
106 arc.dst_names = NULL;
108 arc.src_values = NULL;
110 arc.src_values_pool = NULL;
111 arc.direction = ASCENDING;
115 lex_match_id (lexer, "VARIABLES");
116 lex_match (lexer, '=');
117 if (!parse_variables_const (lexer, dataset_dict (ds), &arc.src_vars,
118 &arc.var_cnt, PV_NO_DUPLICATE))
120 if (!lex_force_match_id (lexer, "INTO"))
122 lex_match (lexer, '=');
123 if (!parse_DATA_LIST_vars (lexer, &arc.dst_names, &dst_cnt, PV_NONE))
125 if (dst_cnt != arc.var_cnt)
129 msg (SE, _("Source variable count (%zu) does not match "
130 "target variable count (%zu)."),
131 arc.var_cnt, dst_cnt);
133 for (i = 0; i < dst_cnt; i++)
134 free (arc.dst_names[i]);
135 free (arc.dst_names);
136 arc.dst_names = NULL;
140 while (lex_match (lexer, '/'))
141 if (lex_match_id (lexer, "DESCENDING"))
142 arc.direction = DESCENDING;
143 else if (lex_match_id (lexer, "PRINT"))
145 if (lex_token (lexer) != '.')
147 lex_error (lexer, _("expecting end of command"));
151 for (i = 0; i < arc.var_cnt; i++)
155 if (dict_lookup_var (dataset_dict (ds), arc.dst_names[i]) != NULL)
157 msg (SE, _("Target variable %s duplicates existing variable %s."),
158 arc.dst_names[i], arc.dst_names[i]);
161 for (j = 0; j < i; j++)
162 if (!strcasecmp (arc.dst_names[i], arc.dst_names[j]))
164 msg (SE, _("Duplicate variable name %s among target variables."),
170 arc.src_values_pool = pool_create ();
171 arc.dst_vars = xnmalloc (arc.var_cnt, sizeof *arc.dst_vars);
172 arc.src_values = xnmalloc (arc.var_cnt, sizeof *arc.src_values);
173 for (i = 0; i < dst_cnt; i++)
174 arc.src_values[i] = hsh_create (10, compare_value, hash_value, NULL,
178 input = proc_open (ds);
179 for (; (c = casereader_read (input)) != NULL; case_unref (c))
180 for (i = 0; i < arc.var_cnt; i++)
182 const union value *vp;
185 vp = case_data (c, arc.src_vars[i]);
186 vpp = (union value **) hsh_probe (arc.src_values[i], vp);
189 *vpp = pool_alloc (arc.src_values_pool, sizeof **vpp);
190 value_clone_pool (arc.src_values_pool, *vpp, vp,
191 var_get_width (arc.src_vars[i]));
194 ok = casereader_destroy (input);
195 ok = proc_commit (ds) && ok;
197 for (i = 0; i < arc.var_cnt; i++)
198 arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds),
199 arc.dst_names[i], 0);
203 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
207 return CMD_CASCADING_FAILURE;
211 arc_free (struct autorecode_pgm *arc)
213 free (arc->src_vars);
214 if (arc->dst_names != NULL)
218 for (i = 0; i < arc->var_cnt; i++)
219 free (arc->dst_names[i]);
220 free (arc->dst_names);
222 free (arc->dst_vars);
223 if (arc->src_values != NULL)
227 for (i = 0; i < arc->var_cnt; i++)
228 hsh_destroy (arc->src_values[i]);
229 free (arc->src_values);
231 pool_destroy (arc->src_values_pool);
235 /* AUTORECODE transformation. */
238 recode (struct dataset *ds, const struct autorecode_pgm *arc)
240 struct autorecode_trns *trns;
243 trns = pool_create_container (struct autorecode_trns, pool);
244 trns->specs = pool_nalloc (trns->pool, arc->var_cnt, sizeof *trns->specs);
245 trns->spec_cnt = arc->var_cnt;
246 for (i = 0; i < arc->var_cnt; i++)
248 struct arc_spec *spec = &trns->specs[i];
249 void *const *p = hsh_sort (arc->src_values[i]);
250 int count = hsh_count (arc->src_values[i]);
253 spec->src = arc->src_vars[i];
254 spec->dest = arc->dst_vars[i];
255 spec->items = hsh_create (2 * count, compare_value, hash_value,
256 NULL, arc->src_vars[i]);
258 for (j = 0; *p; p++, j++)
260 struct arc_item *item = pool_alloc (trns->pool, sizeof *item);
261 union value *vp = *p;
263 value_clone_pool (trns->pool, &item->from, vp,
264 var_get_width (arc->src_vars[i]));
265 item->to = arc->direction == ASCENDING ? j + 1 : count - j;
266 hsh_force_insert (spec->items, item);
269 add_transformation (ds,
270 autorecode_trns_proc, autorecode_trns_free, trns);
273 /* Executes an AUTORECODE transformation. */
275 autorecode_trns_proc (void *trns_, struct ccase **c,
276 casenumber case_idx UNUSED)
278 struct autorecode_trns *trns = trns_;
281 *c = case_unshare (*c);
282 for (i = 0; i < trns->spec_cnt; i++)
284 struct arc_spec *spec = &trns->specs[i];
285 struct arc_item *item;
287 item = hsh_force_find (spec->items, case_data (*c, spec->src));
289 case_data_rw (*c, spec->dest)->f = item->to;
291 return TRNS_CONTINUE;
294 /* Frees an AUTORECODE transformation. */
296 autorecode_trns_free (void *trns_)
298 struct autorecode_trns *trns = trns_;
301 for (i = 0; i < trns->spec_cnt; i++)
302 hsh_destroy (trns->specs[i].items);
303 pool_destroy (trns->pool);
307 /* AUTORECODE procedure. */
310 compare_value (const void *a_, const void *b_, const void *var_)
312 const union value *a = a_;
313 const union value *b = b_;
314 const struct variable *var = var_;
316 return value_compare_3way (a, b, var_get_width (var));
320 hash_value (const void *value_, const void *var_)
322 const union value *value = value_;
323 const struct variable *var = var_;
325 return value_hash (value, var_get_width (var), 0);