1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 #include <data/val-type.h>
19 #include <data/casereader.h>
22 #include <data/variable.h>
23 #include <data/casereader-provider.h>
24 #include <libpspp/taint.h>
28 /* Casereader that applies a user-supplied function to translate
29 each case into another in an arbitrary fashion. */
31 /* A translating casereader. */
32 struct casereader_translator
34 struct casereader *subreader; /* Source of input cases. */
36 struct ccase *(*translate) (struct ccase *input, void *aux);
37 bool (*destroy) (void *aux);
41 static const struct casereader_class casereader_translator_class;
43 /* Creates and returns a new casereader whose cases are produced
44 by reading from SUBREADER and passing through TRANSLATE, which
45 must return the translated case, and populate it based on
46 INPUT and auxiliary data AUX. TRANSLATE must destroy its
49 The cases returned by TRANSLATE must match OUTPUT_PROTO.
51 When the translating casereader is destroyed, DESTROY will be
52 called to allow any state maintained by TRANSLATE to be freed.
54 After this function is called, SUBREADER must not ever again
55 be referenced directly. It will be destroyed automatically
56 when the translating casereader is destroyed. */
58 casereader_create_translator (struct casereader *subreader,
59 const struct caseproto *output_proto,
60 struct ccase *(*translate) (struct ccase *input,
62 bool (*destroy) (void *aux),
65 struct casereader_translator *ct = xmalloc (sizeof *ct);
66 struct casereader *reader;
67 ct->subreader = casereader_rename (subreader);
68 ct->translate = translate;
69 ct->destroy = destroy;
71 reader = casereader_create_sequential (
72 NULL, output_proto, casereader_get_case_cnt (ct->subreader),
73 &casereader_translator_class, ct);
74 taint_propagate (casereader_get_taint (ct->subreader),
75 casereader_get_taint (reader));
79 /* Internal read function for translating casereader. */
81 casereader_translator_read (struct casereader *reader UNUSED,
84 struct casereader_translator *ct = ct_;
85 struct ccase *tmp = casereader_read (ct->subreader);
87 tmp = ct->translate (tmp, ct->aux);
91 /* Internal destroy function for translating casereader. */
93 casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
95 struct casereader_translator *ct = ct_;
96 casereader_destroy (ct->subreader);
97 ct->destroy (ct->aux);
101 /* Casereader class for translating casereader. */
102 static const struct casereader_class casereader_translator_class =
104 casereader_translator_read,
105 casereader_translator_destroy,
112 struct casereader_append_numeric
114 struct caseproto *proto;
116 new_value_func *func;
118 void (*destroy) (void *aux);
121 static bool can_destroy (void *can_);
123 static struct ccase *can_translate (struct ccase *, void *can_);
125 /* Creates and returns a new casereader whose cases are produced
126 by reading from SUBREADER and appending an additional value,
127 generated by FUNC. AUX is an optional parameter which
128 gets passed to FUNC. FUNC will also receive N as it, which is
129 the ordinal number of the case in the reader. DESTROY is an
130 optional parameter used to destroy AUX.
132 After this function is called, SUBREADER must not ever again
133 be referenced directly. It will be destroyed automatically
134 when the translating casereader is destroyed. */
136 casereader_create_append_numeric (struct casereader *subreader,
137 new_value_func func, void *aux,
138 void (*destroy) (void *aux))
140 struct casereader_append_numeric *can = xmalloc (sizeof *can);
141 can->proto = caseproto_ref (casereader_get_proto (subreader));
142 can->proto = caseproto_add_width (can->proto, 0);
146 can->destroy = destroy;
147 return casereader_create_translator (subreader, can->proto,
148 can_translate, can_destroy, can);
152 static struct ccase *
153 can_translate (struct ccase *c, void *can_)
155 struct casereader_append_numeric *can = can_;
156 double new_value = can->func (c, can->n++, can->aux);
157 c = case_unshare_and_resize (c, can->proto);
158 case_data_rw_idx (c, caseproto_get_n_widths (can->proto) - 1)->f = new_value;
163 can_destroy (void *can_)
165 struct casereader_append_numeric *can = can_;
167 can->destroy (can->aux);
168 caseproto_unref (can->proto);
175 struct arithmetic_sequence
182 next_arithmetic (const struct ccase *c UNUSED,
186 struct arithmetic_sequence *as = aux;
187 return n * as->increment + as->first;
190 /* Creates and returns a new casereader whose cases are produced
191 by reading from SUBREADER and appending an additional value,
192 which takes the value FIRST in the first case, FIRST +
193 INCREMENT in the second case, FIRST + INCREMENT * 2 in the
194 third case, and so on.
196 After this function is called, SUBREADER must not ever again
197 be referenced directly. It will be destroyed automatically
198 when the translating casereader is destroyed. */
200 casereader_create_arithmetic_sequence (struct casereader *subreader,
201 double first, double increment)
203 struct arithmetic_sequence *as = xzalloc (sizeof *as);
205 as->increment = increment;
206 return casereader_create_append_numeric (subreader, next_arithmetic,
213 struct casereader_append_rank
215 struct casereader *clone;
217 const struct variable *var;
218 const struct variable *weight;
219 struct caseproto *proto;
223 distinct_func *distinct;
225 enum rank_error *err;
229 static bool car_destroy (void *car_);
231 static struct ccase *car_translate (struct ccase *input, void *car_);
233 /* Creates and returns a new casereader whose cases are produced
234 by reading from SUBREADER and appending an additional value,
235 which is the rank of the observation. W is the weight variable
236 of the dictionary containing V, or NULL if there is no weight
239 The following preconditions must be met:
241 1. SUBREADER must be sorted on V.
243 2. The weight variables, must be non-negative.
245 If either of these preconditions are not satisfied, then the rank
246 variables may not be correct. In this case, if ERR is non-null,
247 it will be set according to the erroneous conditions encountered.
249 If DISTINCT_CALLBACK is non-null, then it will be called exactly
250 once for every case containing a distinct value of V. AUX is
251 an auxilliary pointer passed to DISTINCT_CALLBACK.
253 After this function is called, SUBREADER must not ever again
254 be referenced directly. It will be destroyed automatically
255 when the translating casereader is destroyed. */
257 casereader_create_append_rank (struct casereader *subreader,
258 const struct variable *v,
259 const struct variable *w,
260 enum rank_error *err,
261 distinct_func *distinct_callback,
265 struct casereader_append_rank *car = xmalloc (sizeof *car);
266 car->proto = caseproto_ref (casereader_get_proto (subreader));
267 car->proto = caseproto_add_width (car->proto, 0);
273 car->clone = casereader_clone (subreader);
274 car->distinct = distinct_callback;
277 car->prev_value = SYSMIS;
279 return casereader_create_translator (subreader, car->proto,
280 car_translate, car_destroy, car);
285 car_destroy (void *car_)
287 struct casereader_append_rank *car = car_;
288 casereader_destroy (car->clone);
289 caseproto_unref (car->proto);
294 static struct ccase *
295 car_translate (struct ccase *input, void *car_)
297 struct casereader_append_rank *car = car_;
299 const double value = case_data (input, car->var)->f;
301 if ( car->prev_value != SYSMIS)
303 if (car->err && value < car->prev_value)
304 *car->err |= RANK_ERR_UNSORTED;
307 if ( car->n_common == 1)
314 weight = case_data (input, car->weight)->f;
315 if ( car->err && weight < 0 )
316 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
321 struct ccase *c = casereader_peek (car->clone, car->n + ++k);
324 vxx = case_data (c, car->var)->f;
330 double w = case_data (c, car->weight)->f;
332 if ( car->err && w < 0 )
333 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
343 while (vxx == value);
344 car->mean_rank = car->cc + (weight + 1) / 2.0;
348 car->distinct (value, car->n_common, weight, car->aux);
355 input = case_unshare_and_resize (input, car->proto);
356 case_data_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)->f
358 car->prev_value = value;
367 const struct variable *key;
368 const struct variable *weight;
373 struct casereader *clone;
374 struct caseproto *proto;
379 uniquify (const struct ccase *c, void *aux)
381 struct consolidator *cdr = aux;
382 const union value *current_value = case_data (c, cdr->key);
383 const int key_width = var_get_width (cdr->key);
384 const double weight = cdr->weight ? case_data (c, cdr->weight)->f : 1.0;
385 const struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1);
391 if ( NULL == next_case)
394 dir = value_compare_3way (case_data (next_case, cdr->key),
395 current_value, key_width);
398 /* Insist that the data are sorted */
399 assert (cdr->direction == 0 || dir == cdr->direction);
400 cdr->direction = dir;
407 cdr->prev_cc = cdr->cc;
414 static struct ccase *
415 consolodate_weight (struct ccase *input, void *aux)
417 struct consolidator *cdr = aux;
422 c = case_unshare (input);
423 case_data_rw (c, cdr->weight)->f = cdr->prev_cc;
427 c = case_unshare_and_resize (input, cdr->proto);
428 case_data_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1)->f = cdr->prev_cc;
436 uniquify_destroy (void *aux)
438 struct consolidator *cdr = aux;
440 casereader_destroy (cdr->clone);
441 caseproto_unref (cdr->proto);
449 /* Returns a new casereader which is based upon INPUT, but which contains a maximum
450 of one case for each distinct value of KEY.
451 If WEIGHT is non-null, then the new casereader's values for this variable
452 will be the sum of all values matching KEY.
453 IF WEIGHT is null, then the new casereader will have an additional numeric
454 value appended, which will contain the total number of cases containing
456 INPUT must be sorted on KEY
459 casereader_create_distinct (struct casereader *input,
460 const struct variable *key,
461 const struct variable *weight)
463 struct casereader *u ;
464 struct casereader *ud ;
465 struct caseproto *output_proto = caseproto_ref (casereader_get_proto (input));
467 struct consolidator *cdr = xmalloc (sizeof (*cdr));
470 cdr->weight = weight;
472 cdr->clone = casereader_clone (input);
475 if ( NULL == cdr->weight )
476 output_proto = caseproto_add_width (output_proto, 0);
478 cdr->proto = output_proto;
480 u = casereader_create_filter_func (input, uniquify,
483 ud = casereader_create_translator (u,