1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case-map.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dictionary.h"
27 #include "data/variable.h"
28 #include "data/case.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/hash-functions.h"
31 #include "libpspp/hmap.h"
33 #include "gl/xalloc.h"
38 struct caseproto *proto; /* Prototype for output cases. */
39 int *map; /* For each destination index, the
40 corresponding source index. */
43 static struct ccase *translate_case (struct ccase *, void *map_);
44 static bool destroy_case_map (void *map_);
46 /* Creates and returns an empty map that outputs cases matching
48 static struct case_map *
49 create_case_map (const struct caseproto *proto)
51 size_t n_values = caseproto_get_n_widths (proto);
55 map = xmalloc (sizeof *map);
56 map->proto = caseproto_ref (proto);
57 map->map = xnmalloc (n_values, sizeof *map->map);
58 for (i = 0; i < n_values; i++)
64 /* Inserts into MAP a mapping of the value at index FROM in the
65 source case to the value at index TO in the destination
68 insert_mapping (struct case_map *map, size_t from, size_t to)
70 assert (to < caseproto_get_n_widths (map->proto));
71 assert (map->map[to] == -1);
75 /* Returns a copy of OLD, if OLD is nonnull, and otherwise returns NULL. */
77 case_map_clone (const struct case_map *old)
82 size_t n_values = caseproto_get_n_widths (old->proto);
83 struct case_map *new = xmalloc (sizeof *new);
84 *new = (struct case_map) {
85 .proto = caseproto_ref (old->proto),
86 .map = xmemdup (old->proto, n_values * sizeof *new->map),
91 /* Destroys case map MAP. */
93 case_map_destroy (struct case_map *map)
97 caseproto_unref (map->proto);
103 /* If MAP is nonnull, returns a new case that is the result of
104 applying case map MAP to SRC, and unrefs SRC.
106 If MAP is null, returns SRC unchanged. */
108 case_map_execute (const struct case_map *map, struct ccase *src)
112 size_t n_values = caseproto_get_n_widths (map->proto);
116 dst = case_create (map->proto);
117 for (dst_idx = 0; dst_idx < n_values; dst_idx++)
119 int src_idx = map->map[dst_idx];
121 value_copy (case_data_rw_idx (dst, dst_idx), case_data_idx (src, src_idx), caseproto_get_width (map->proto, dst_idx));
130 /* Returns the prototype for output cases created by MAP. The
131 caller must not unref the returned case prototype. */
132 const struct caseproto *
133 case_map_get_proto (const struct case_map *map)
138 /* Creates and returns a new casereader whose cases are produced
139 by reading from SUBREADER and executing the actions of MAP.
140 The casereader will have as many `union value's as MAP. When
141 the new casereader is destroyed, MAP will be destroyed too.
143 After this function is called, SUBREADER must not ever again
144 be referenced directly. It will be destroyed automatically
145 when the returned casereader is destroyed. */
147 case_map_create_input_translator (struct case_map *map,
148 struct casereader *subreader)
150 static const struct casereader_translator_class class = {
151 translate_case, destroy_case_map,
153 return casereader_translate_stateless (subreader,
154 case_map_get_proto (map),
158 /* Creates and returns a new casewriter. Cases written to the
159 new casewriter will be passed through MAP and written to
160 SUBWRITER. The casewriter will have as many `union value's as
161 MAP. When the new casewriter is destroyed, MAP will be
164 After this function is called, SUBWRITER must not ever again
165 be referenced directly. It will be destroyed automatically
166 when the returned casewriter is destroyed. */
168 case_map_create_output_translator (struct case_map *map,
169 struct casewriter *subwriter)
171 return casewriter_create_translator (subwriter,
172 case_map_get_proto (map),
178 /* Casereader/casewriter translation callback. */
179 static struct ccase *
180 translate_case (struct ccase *input, void *map_)
182 struct case_map *map = map_;
183 return case_map_execute (map, input);
186 /* Casereader/casewriter destruction callback. */
188 destroy_case_map (void *map_)
190 struct case_map *map = map_;
191 case_map_destroy (map);
195 /* Creates and returns a case_map that can be used to compact
196 cases for dictionary D.
198 Compacting a case eliminates "holes" between values and after
199 the last value. (Holes are created by deleting variables.)
201 All variables are compacted if EXCLUDE_CLASSES is 0, or it may
202 contain one or more of DC_ORDINARY, DC_SYSTEM, or DC_SCRATCH
203 to cause the corresponding type of variable to be deleted
204 during compaction. */
206 case_map_to_compact_dict (const struct dictionary *d,
207 unsigned int exclude_classes)
209 size_t n_vars = dict_get_n_vars (d);
210 struct caseproto *proto;
211 struct case_map *map;
215 /* Create the case mapping. */
216 proto = dict_get_compacted_proto (d, exclude_classes);
217 map = create_case_map (proto);
218 caseproto_unref (proto);
220 /* Add the values to the case mapping. */
222 for (i = 0; i < n_vars; i++)
224 struct variable *v = dict_get_var (d, i);
225 if (!(exclude_classes & var_get_dict_class (v)))
226 insert_mapping (map, var_get_case_index (v), n_values++);
234 struct hmap_node hmap_node; /* In struct case_map_stage's 'stage_vars'. */
235 const struct variable *var;
239 struct case_map_stage
241 const struct dictionary *dict;
242 struct hmap stage_vars;
245 /* Prepares and returns a "struct case_map_stage" for producing a case map for
246 DICT. Afterward, the caller may delete, reorder, or rename variables within
247 DICT at will before using case_map_stage_get_case_map() to produce the case
250 The caller must *not* add new variables to DICT. */
251 struct case_map_stage *
252 case_map_stage_create (const struct dictionary *dict)
254 size_t n_vars = dict_get_n_vars (dict);
255 struct case_map_stage *stage;
258 stage = xmalloc (sizeof *stage);
260 hmap_init (&stage->stage_vars);
262 for (i = 0; i < n_vars; i++)
264 const struct variable *var = dict_get_var (dict, i);
265 struct stage_var *stage_var;
267 stage_var = xmalloc (sizeof *stage_var);
268 stage_var->var = var;
269 stage_var->case_index = var_get_case_index (var);
270 hmap_insert (&stage->stage_vars, &stage_var->hmap_node,
271 hash_pointer (var, 0));
277 /* Destroys STAGE, which was created by case_map_stage_create(). */
279 case_map_stage_destroy (struct case_map_stage *stage)
283 struct stage_var *stage_var, *next_stage_var;
285 HMAP_FOR_EACH_SAFE (stage_var, next_stage_var,
286 struct stage_var, hmap_node, &stage->stage_vars)
288 hmap_delete (&stage->stage_vars, &stage_var->hmap_node);
291 hmap_destroy (&stage->stage_vars);
296 static const struct stage_var *
297 case_map_stage_find_var (const struct case_map_stage *stage,
298 const struct variable *var)
300 const struct stage_var *stage_var;
302 HMAP_FOR_EACH_IN_BUCKET (stage_var, struct stage_var, hmap_node,
303 hash_pointer (var, 0), &stage->stage_vars)
304 if (stage_var->var == var)
307 /* If the following assertion is reached, it indicates a bug in the
308 case_map_stage client: the client allowed a new variable to be added to
309 the dictionary. This is not allowed, because of the risk that the new
310 varaible might have the same address as an old variable that has been
315 /* Produces a case map from STAGE, which must have been previously created with
316 case_map_stage_create(). The case map maps from the original case index of
317 the variables in STAGE's dictionary to their current case indexes.
319 Returns the new case map, or a null pointer if no mapping is required (that
320 is, no data has changed position). */
322 case_map_stage_get_case_map (const struct case_map_stage *stage)
324 struct case_map *map;
325 size_t n_vars = dict_get_n_vars (stage->dict);
328 bool identity_map = true;
330 map = create_case_map (dict_get_proto (stage->dict));
331 for (i = 0; i < n_vars; i++)
333 const struct variable *var = dict_get_var (stage->dict, i);
334 const struct stage_var *stage_var = case_map_stage_find_var (stage, var);
336 if (var_get_case_index (var) != stage_var->case_index)
337 identity_map = false;
339 insert_mapping (map, stage_var->case_index, var_get_case_index (var));
344 case_map_destroy (map);
348 n_values = caseproto_get_n_widths (map->proto);
349 while (n_values > 0 && caseproto_get_width (map->proto, n_values - 1) == -1)
350 map->proto = caseproto_remove_widths (map->proto, --n_values, 1);
355 /* Creates and returns a case map for mapping variables in OLD to
356 variables in NEW based on their name. For every variable in
357 NEW, there must be a variable in OLD with the same name, type,
360 case_map_by_name (const struct dictionary *old,
361 const struct dictionary *new)
363 size_t n_vars = dict_get_n_vars (new);
364 struct case_map *map = create_case_map (dict_get_proto (new));
365 for (size_t i = 0; i < n_vars; i++)
367 struct variable *nv = dict_get_var (new, i);
368 struct variable *ov = dict_lookup_var_assert (old, var_get_name (nv));
369 assert (var_get_width (nv) == var_get_width (ov));
370 insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv));
375 /* Prints the mapping represented by case map CM to stdout, for
376 debugging purposes. */
378 case_map_dump (const struct case_map *cm)
381 for (i = 0 ; i < caseproto_get_n_widths (cm->proto); ++i)
382 printf ("%d -> %d\n", i, cm->map[i]);