1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/casereader-provider.h"
22 #include "data/casereader.h"
23 #include "data/val-type.h"
24 #include "data/variable.h"
25 #include "libpspp/taint.h"
27 #include "gl/xalloc.h"
29 /* Casereader that applies a user-supplied function to translate
30 each case into another in an arbitrary fashion. */
32 /* A translating casereader. */
33 struct casereader_translator
35 struct casereader *subreader; /* Source of input cases. */
36 const struct casereader_translator_class *class;
40 static const struct casereader_class casereader_translator_class;
42 /* Creates and returns a new casereader whose cases are produced
43 by reading from SUBREADER and passing through TRANSLATE, which
44 must return the translated case, and populate it based on
45 INPUT and auxiliary data AUX. TRANSLATE must destroy its
48 TRANSLATE may be stateful, that is, the output for a given
49 case may depend on previous cases. If TRANSLATE is stateless,
50 then you may want to use casereader_translate_stateless
51 instead, since it sometimes performs better.
53 The cases returned by TRANSLATE must match OUTPUT_PROTO.
55 When the translating casereader is destroyed, DESTROY will be
56 called to allow any state maintained by TRANSLATE to be freed.
58 After this function is called, SUBREADER must not ever again
59 be referenced directly. It will be destroyed automatically
60 when the translating casereader is destroyed. */
62 casereader_create_translator (struct casereader *subreader,
63 const struct caseproto *output_proto,
64 const struct casereader_translator_class *class,
67 struct casereader_translator *ct = xmalloc (sizeof *ct);
68 *ct = (struct casereader_translator) {
69 .subreader = casereader_rename (subreader),
74 struct casereader *reader = casereader_create_sequential (
75 NULL, output_proto, casereader_get_n_cases (ct->subreader),
76 &casereader_translator_class, ct);
77 taint_propagate (casereader_get_taint (ct->subreader),
78 casereader_get_taint (reader));
82 /* Internal read function for translating casereader. */
84 casereader_translator_read (struct casereader *reader UNUSED,
87 struct casereader_translator *ct = ct_;
88 struct ccase *tmp = casereader_read (ct->subreader);
90 tmp = ct->class->translate (tmp, ct->aux);
94 /* Internal destroy function for translating casereader. */
96 casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
98 struct casereader_translator *ct = ct_;
99 casereader_destroy (ct->subreader);
100 ct->class->destroy (ct->aux);
104 /* Casereader class for translating casereader. */
105 static const struct casereader_class casereader_translator_class =
107 casereader_translator_read,
108 casereader_translator_destroy,
113 /* Casereader that applies a user-supplied function to translate
114 each case into another in a stateless fashion. */
116 static const struct casereader_random_class
117 casereader_stateless_translator_class;
119 /* Creates and returns a new casereader whose cases are produced by reading
120 from SUBREADER and passing through the TRANSLATE function. TRANSLATE must
121 takes ownership of its input case and returns a translated case, populating
122 the translated case based on INPUT and auxiliary data AUX.
124 TRANSLATE must be stateless, that is, the output for a given case must not
125 depend on previous cases. This is because cases may be retrieved in
126 arbitrary order, and some cases may be retrieved multiple times, and some
127 cases may be skipped and never retrieved at all. If TRANSLATE is stateful,
128 use casereader_create_translator instead.
130 The cases returned by TRANSLATE must match OUTPUT_PROTO.
132 When the stateless translating casereader is destroyed, DESTROY will be
133 called to allow any auxiliary data maintained by TRANSLATE to be freed.
135 After this function is called, SUBREADER must not ever again be referenced
136 directly. It will be destroyed automatically when the translating
137 casereader is destroyed. */
139 casereader_translate_stateless (
140 struct casereader *subreader,
141 const struct caseproto *output_proto,
142 const struct casereader_translator_class *class,
145 struct casereader_translator *ct = xmalloc (sizeof *ct);
146 *ct = (struct casereader_translator) {
147 .subreader = casereader_rename (subreader),
152 struct casereader *reader = casereader_create_random (
153 output_proto, casereader_get_n_cases (ct->subreader),
154 &casereader_stateless_translator_class, ct);
155 taint_propagate (casereader_get_taint (ct->subreader),
156 casereader_get_taint (reader));
160 /* Internal read function for stateless translating casereader. */
161 static struct ccase *
162 casereader_stateless_translator_read (struct casereader *reader UNUSED,
163 void *ct_, casenumber idx)
165 struct casereader_translator *ct = ct_;
166 struct ccase *tmp = casereader_peek (ct->subreader, idx);
168 tmp = ct->class->translate (tmp, ct->aux);
172 /* Internal destroy function for translating casereader. */
174 casereader_stateless_translator_destroy (struct casereader *reader UNUSED,
177 struct casereader_translator *ct = ct_;
178 casereader_destroy (ct->subreader);
179 ct->class->destroy (ct->aux);
183 /* Casereader class for stateless translating casereader. */
184 static const struct casereader_random_class
185 casereader_stateless_translator_class =
187 casereader_stateless_translator_read,
188 casereader_stateless_translator_destroy,
193 struct casereader_append_numeric
195 struct caseproto *proto;
197 new_value_func *func;
199 void (*destroy) (void *aux);
202 static bool can_destroy (void *can_);
204 static struct ccase *can_translate (struct ccase *, void *can_);
206 /* Creates and returns a new casereader whose cases are produced
207 by reading from SUBREADER and appending an additional value,
208 generated by FUNC. AUX is an optional parameter which
209 gets passed to FUNC. FUNC will also receive N as it, which is
210 the ordinal number of the case in the reader. DESTROY is an
211 optional parameter used to destroy AUX.
213 After this function is called, SUBREADER must not ever again
214 be referenced directly. It will be destroyed automatically
215 when the translating casereader is destroyed. */
217 casereader_create_append_numeric (struct casereader *subreader,
218 new_value_func func, void *aux,
219 void (*destroy) (void *aux))
221 struct casereader_append_numeric *can = xmalloc (sizeof *can);
222 can->proto = caseproto_ref (casereader_get_proto (subreader));
223 can->proto = caseproto_add_width (can->proto, 0);
227 can->destroy = destroy;
229 static const struct casereader_translator_class class = {
230 can_translate, can_destroy,
232 return casereader_create_translator (subreader, can->proto, &class, can);
236 static struct ccase *
237 can_translate (struct ccase *c, void *can_)
239 struct casereader_append_numeric *can = can_;
240 double new_value = can->func (c, can->n++, can->aux);
241 c = case_unshare_and_resize (c, can->proto);
242 *case_num_rw_idx (c, caseproto_get_n_widths (can->proto) - 1) = new_value;
247 can_destroy (void *can_)
249 struct casereader_append_numeric *can = can_;
251 can->destroy (can->aux);
252 caseproto_unref (can->proto);
259 struct arithmetic_sequence
266 next_arithmetic (const struct ccase *c UNUSED,
270 struct arithmetic_sequence *as = aux;
271 return n * as->increment + as->first;
274 /* Creates and returns a new casereader whose cases are produced
275 by reading from SUBREADER and appending an additional value,
276 which takes the value FIRST in the first case, FIRST +
277 INCREMENT in the second case, FIRST + INCREMENT * 2 in the
278 third case, and so on.
280 After this function is called, SUBREADER must not ever again
281 be referenced directly. It will be destroyed automatically
282 when the translating casereader is destroyed. */
284 casereader_create_arithmetic_sequence (struct casereader *subreader,
285 double first, double increment)
287 struct arithmetic_sequence *as = XZALLOC (struct arithmetic_sequence);
289 as->increment = increment;
290 return casereader_create_append_numeric (subreader, next_arithmetic,
297 struct casereader_append_rank
299 struct casereader *clone;
301 const struct variable *var;
302 const struct variable *weight;
303 struct caseproto *proto;
307 distinct_func *distinct;
309 enum rank_error *err;
313 static bool car_destroy (void *car_);
315 static struct ccase *car_translate (struct ccase *input, void *car_);
317 /* Creates and returns a new casereader whose cases are produced
318 by reading from SUBREADER and appending an additional value,
319 which is the rank of the observation. W is the weight variable
320 of the dictionary containing V, or NULL if there is no weight
323 The following preconditions must be met:
325 1. SUBREADER must be sorted on V.
327 2. The weight variables, must be non-negative.
329 If either of these preconditions are not satisfied, then the rank
330 variables may not be correct. In this case, if ERR is non-null,
331 it will be set according to the erroneous conditions encountered.
333 If DISTINCT_CALLBACK is non-null, then it will be called exactly
334 once for every case containing a distinct value of V. AUX is
335 an auxiliary pointer passed to DISTINCT_CALLBACK.
337 After this function is called, SUBREADER must not ever again
338 be referenced directly. It will be destroyed automatically
339 when the translating casereader is destroyed. */
341 casereader_create_append_rank (struct casereader *subreader,
342 const struct variable *v,
343 const struct variable *w,
344 enum rank_error *err,
345 distinct_func *distinct_callback,
349 struct casereader_append_rank *car = xmalloc (sizeof *car);
350 car->proto = caseproto_ref (casereader_get_proto (subreader));
351 car->proto = caseproto_add_width (car->proto, 0);
357 car->clone = casereader_clone (subreader);
358 car->distinct = distinct_callback;
361 car->prev_value = SYSMIS;
363 static const struct casereader_translator_class class = {
364 car_translate, car_destroy
366 return casereader_create_translator (subreader, car->proto, &class, car);
371 car_destroy (void *car_)
373 struct casereader_append_rank *car = car_;
374 casereader_destroy (car->clone);
375 caseproto_unref (car->proto);
380 static struct ccase *
381 car_translate (struct ccase *input, void *car_)
383 struct casereader_append_rank *car = car_;
385 const double value = case_num (input, car->var);
387 if (car->prev_value != SYSMIS)
389 if (car->err && value < car->prev_value)
390 *car->err |= RANK_ERR_UNSORTED;
393 if (car->n_common == 1)
400 weight = case_num (input, car->weight);
401 if (car->err && weight < 0)
402 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
407 struct ccase *c = casereader_peek (car->clone, car->n + ++k);
410 vxx = case_num (c, car->var);
416 double w = case_num (c, car->weight);
418 if (car->err && w < 0)
419 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
429 while (vxx == value);
430 car->mean_rank = car->cc + (weight + 1) / 2.0;
434 car->distinct (value, car->n_common, weight, car->aux);
441 input = case_unshare_and_resize (input, car->proto);
442 *case_num_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)
444 car->prev_value = value;
453 const struct variable *key;
454 const struct variable *weight;
459 struct casereader *clone;
460 struct caseproto *proto;
465 uniquify (const struct ccase *c, void *aux)
467 struct consolidator *cdr = aux;
468 const union value *current_value = case_data (c, cdr->key);
469 const int key_width = var_get_width (cdr->key);
470 const double weight = cdr->weight ? case_num (c, cdr->weight) : 1.0;
471 struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1);
477 if (NULL == next_case)
480 dir = value_compare_3way (case_data (next_case, cdr->key),
481 current_value, key_width);
487 case_unref (next_case);
490 /* Insist that the data are sorted */
491 assert (cdr->direction == 0 || dir == cdr->direction);
492 cdr->direction = dir;
499 cdr->prev_cc = cdr->cc;
506 static struct ccase *
507 consolodate_weight (struct ccase *input, void *aux)
509 struct consolidator *cdr = aux;
514 c = case_unshare (input);
515 *case_num_rw (c, cdr->weight) = cdr->prev_cc;
519 c = case_unshare_and_resize (input, cdr->proto);
520 *case_num_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1) = cdr->prev_cc;
528 uniquify_destroy (void *aux)
530 struct consolidator *cdr = aux;
532 casereader_destroy (cdr->clone);
533 caseproto_unref (cdr->proto);
541 /* Returns a new casereader which is based upon INPUT, but which contains a maximum
542 of one case for each distinct value of KEY.
543 If WEIGHT is non-null, then the new casereader's values for this variable
544 will be the sum of all values matching KEY.
545 IF WEIGHT is null, then the new casereader will have an additional numeric
546 value appended, which will contain the total number of cases containing
548 INPUT must be sorted on KEY
551 casereader_create_distinct (struct casereader *input,
552 const struct variable *key,
553 const struct variable *weight)
555 struct casereader *u ;
556 struct caseproto *output_proto = caseproto_ref (casereader_get_proto (input));
558 struct consolidator *cdr = xmalloc (sizeof (*cdr));
561 cdr->weight = weight;
563 cdr->clone = casereader_clone (input);
566 if (NULL == cdr->weight)
567 output_proto = caseproto_add_width (output_proto, 0);
569 cdr->proto = output_proto;
571 u = casereader_create_filter_func (input, uniquify,
574 static const struct casereader_translator_class class = {
575 consolodate_weight, uniquify_destroy,
577 return casereader_create_translator (u, output_proto, &class, cdr);