1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case-map.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dictionary.h"
27 #include "data/variable.h"
28 #include "data/case.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/hash-functions.h"
31 #include "libpspp/hmap.h"
33 #include "gl/xalloc.h"
38 struct caseproto *proto; /* Prototype for output cases. */
39 int *map; /* For each destination index, the
40 corresponding source index. */
43 static struct ccase *translate_case (struct ccase *, void *map_);
44 static bool destroy_case_map (void *map_);
46 /* Creates and returns an empty map that outputs cases matching
48 static struct case_map *
49 create_case_map (const struct caseproto *proto)
51 size_t n_values = caseproto_get_n_widths (proto);
55 map = xmalloc (sizeof *map);
56 map->proto = caseproto_ref (proto);
57 map->map = xnmalloc (n_values, sizeof *map->map);
58 for (i = 0; i < n_values; i++)
64 /* Inserts into MAP a mapping of the value at index FROM in the
65 source case to the value at index TO in the destination
68 insert_mapping (struct case_map *map, size_t from, size_t to)
70 assert (to < caseproto_get_n_widths (map->proto));
71 assert (map->map[to] == -1);
75 /* Destroys case map MAP. */
77 case_map_destroy (struct case_map *map)
81 caseproto_unref (map->proto);
87 /* If MAP is nonnull, returns a new case that is the result of
88 applying case map MAP to SRC, and unrefs SRC.
90 If MAP is null, returns SRC unchanged. */
92 case_map_execute (const struct case_map *map, struct ccase *src)
96 size_t n_values = caseproto_get_n_widths (map->proto);
100 dst = case_create (map->proto);
101 for (dst_idx = 0; dst_idx < n_values; dst_idx++)
103 int src_idx = map->map[dst_idx];
105 value_copy (case_data_rw_idx (dst, dst_idx), case_data_idx (src, src_idx), caseproto_get_width (map->proto, dst_idx));
114 /* Returns the prototype for output cases created by MAP. The
115 caller must not unref the returned case prototype. */
116 const struct caseproto *
117 case_map_get_proto (const struct case_map *map)
122 /* Creates and returns a new casereader whose cases are produced
123 by reading from SUBREADER and executing the actions of MAP.
124 The casereader will have as many `union value's as MAP. When
125 the new casereader is destroyed, MAP will be destroyed too.
127 After this function is called, SUBREADER must not ever again
128 be referenced directly. It will be destroyed automatically
129 when the returned casereader is destroyed. */
131 case_map_create_input_translator (struct case_map *map,
132 struct casereader *subreader)
134 static const struct casereader_translator_class class = {
135 translate_case, destroy_case_map,
137 return casereader_create_translator (subreader,
138 case_map_get_proto (map),
142 /* Creates and returns a new casewriter. Cases written to the
143 new casewriter will be passed through MAP and written to
144 SUBWRITER. The casewriter will have as many `union value's as
145 MAP. When the new casewriter is destroyed, MAP will be
148 After this function is called, SUBWRITER must not ever again
149 be referenced directly. It will be destroyed automatically
150 when the returned casewriter is destroyed. */
152 case_map_create_output_translator (struct case_map *map,
153 struct casewriter *subwriter)
155 return casewriter_create_translator (subwriter,
156 case_map_get_proto (map),
162 /* Casereader/casewriter translation callback. */
163 static struct ccase *
164 translate_case (struct ccase *input, void *map_)
166 struct case_map *map = map_;
167 return case_map_execute (map, input);
170 /* Casereader/casewriter destruction callback. */
172 destroy_case_map (void *map_)
174 struct case_map *map = map_;
175 case_map_destroy (map);
179 /* Creates and returns a case_map that can be used to compact
180 cases for dictionary D.
182 Compacting a case eliminates "holes" between values and after
183 the last value. (Holes are created by deleting variables.)
185 All variables are compacted if EXCLUDE_CLASSES is 0, or it may
186 contain one or more of DC_ORDINARY, DC_SYSTEM, or DC_SCRATCH
187 to cause the corresponding type of variable to be deleted
188 during compaction. */
190 case_map_to_compact_dict (const struct dictionary *d,
191 unsigned int exclude_classes)
193 size_t n_vars = dict_get_n_vars (d);
194 struct caseproto *proto;
195 struct case_map *map;
199 /* Create the case mapping. */
200 proto = dict_get_compacted_proto (d, exclude_classes);
201 map = create_case_map (proto);
202 caseproto_unref (proto);
204 /* Add the values to the case mapping. */
206 for (i = 0; i < n_vars; i++)
208 struct variable *v = dict_get_var (d, i);
209 if (!(exclude_classes & var_get_dict_class (v)))
210 insert_mapping (map, var_get_case_index (v), n_values++);
218 struct hmap_node hmap_node; /* In struct case_map_stage's 'stage_vars'. */
219 const struct variable *var;
223 struct case_map_stage
225 const struct dictionary *dict;
226 struct hmap stage_vars;
229 /* Prepares and returns a "struct case_map_stage" for producing a case map for
230 DICT. Afterward, the caller may delete, reorder, or rename variables within
231 DICT at will before using case_map_stage_get_case_map() to produce the case
234 The caller must *not* add new variables to DICT. */
235 struct case_map_stage *
236 case_map_stage_create (const struct dictionary *dict)
238 size_t n_vars = dict_get_n_vars (dict);
239 struct case_map_stage *stage;
242 stage = xmalloc (sizeof *stage);
244 hmap_init (&stage->stage_vars);
246 for (i = 0; i < n_vars; i++)
248 const struct variable *var = dict_get_var (dict, i);
249 struct stage_var *stage_var;
251 stage_var = xmalloc (sizeof *stage_var);
252 stage_var->var = var;
253 stage_var->case_index = var_get_case_index (var);
254 hmap_insert (&stage->stage_vars, &stage_var->hmap_node,
255 hash_pointer (var, 0));
261 /* Destroys STAGE, which was created by case_map_stage_create(). */
263 case_map_stage_destroy (struct case_map_stage *stage)
267 struct stage_var *stage_var, *next_stage_var;
269 HMAP_FOR_EACH_SAFE (stage_var, next_stage_var,
270 struct stage_var, hmap_node, &stage->stage_vars)
272 hmap_delete (&stage->stage_vars, &stage_var->hmap_node);
275 hmap_destroy (&stage->stage_vars);
280 static const struct stage_var *
281 case_map_stage_find_var (const struct case_map_stage *stage,
282 const struct variable *var)
284 const struct stage_var *stage_var;
286 HMAP_FOR_EACH_IN_BUCKET (stage_var, struct stage_var, hmap_node,
287 hash_pointer (var, 0), &stage->stage_vars)
288 if (stage_var->var == var)
291 /* If the following assertion is reached, it indicates a bug in the
292 case_map_stage client: the client allowed a new variable to be added to
293 the dictionary. This is not allowed, because of the risk that the new
294 varaible might have the same address as an old variable that has been
299 /* Produces a case map from STAGE, which must have been previously created with
300 case_map_stage_create(). The case map maps from the original case index of
301 the variables in STAGE's dictionary to their current case indexes.
303 Returns the new case map, or a null pointer if no mapping is required (that
304 is, no data has changed position). */
306 case_map_stage_get_case_map (const struct case_map_stage *stage)
308 struct case_map *map;
309 size_t n_vars = dict_get_n_vars (stage->dict);
312 bool identity_map = true;
314 map = create_case_map (dict_get_proto (stage->dict));
315 for (i = 0; i < n_vars; i++)
317 const struct variable *var = dict_get_var (stage->dict, i);
318 const struct stage_var *stage_var = case_map_stage_find_var (stage, var);
320 if (var_get_case_index (var) != stage_var->case_index)
321 identity_map = false;
323 insert_mapping (map, stage_var->case_index, var_get_case_index (var));
328 case_map_destroy (map);
332 n_values = caseproto_get_n_widths (map->proto);
333 while (n_values > 0 && caseproto_get_width (map->proto, n_values - 1) == -1)
334 map->proto = caseproto_remove_widths (map->proto, --n_values, 1);
339 /* Creates and returns a case map for mapping variables in OLD to
340 variables in NEW based on their name. For every variable in
341 NEW, there must be a variable in OLD with the same name, type,
344 case_map_by_name (const struct dictionary *old,
345 const struct dictionary *new)
347 size_t n_vars = dict_get_n_vars (new);
348 struct case_map *map = create_case_map (dict_get_proto (new));
349 for (size_t i = 0; i < n_vars; i++)
351 struct variable *nv = dict_get_var (new, i);
352 struct variable *ov = dict_lookup_var_assert (old, var_get_name (nv));
353 assert (var_get_width (nv) == var_get_width (ov));
354 insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv));
359 /* Prints the mapping represented by case map CM to stdout, for
360 debugging purposes. */
362 case_map_dump (const struct case_map *cm)
365 for (i = 0 ; i < caseproto_get_n_widths (cm->proto); ++i)
366 printf ("%d -> %d\n", i, cm->map[i]);