1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/casereader-provider.h"
22 #include "data/casereader.h"
23 #include "data/val-type.h"
24 #include "data/variable.h"
25 #include "libpspp/taint.h"
27 #include "gl/xalloc.h"
29 /* Casereader that applies a user-supplied function to translate
30 each case into another in an arbitrary fashion. */
32 /* A translating casereader. */
33 struct casereader_translator
35 struct casereader *subreader; /* Source of input cases. */
37 struct ccase *(*translate) (struct ccase *input, void *aux);
38 bool (*destroy) (void *aux);
42 static const struct casereader_class casereader_translator_class;
44 /* Creates and returns a new casereader whose cases are produced
45 by reading from SUBREADER and passing through TRANSLATE, which
46 must return the translated case, and populate it based on
47 INPUT and auxiliary data AUX. TRANSLATE must destroy its
50 TRANSLATE may be stateful, that is, the output for a given
51 case may depend on previous cases. If TRANSLATE is stateless,
52 then you may want to use casereader_translate_stateless
53 instead, since it sometimes performs better.
55 The cases returned by TRANSLATE must match OUTPUT_PROTO.
57 When the translating casereader is destroyed, DESTROY will be
58 called to allow any state maintained by TRANSLATE to be freed.
60 After this function is called, SUBREADER must not ever again
61 be referenced directly. It will be destroyed automatically
62 when the translating casereader is destroyed. */
64 casereader_create_translator (struct casereader *subreader,
65 const struct caseproto *output_proto,
66 struct ccase *(*translate) (struct ccase *input,
68 bool (*destroy) (void *aux),
71 struct casereader_translator *ct = xmalloc (sizeof *ct);
72 struct casereader *reader;
73 ct->subreader = casereader_rename (subreader);
74 ct->translate = translate;
75 ct->destroy = destroy;
77 reader = casereader_create_sequential (
78 NULL, output_proto, casereader_get_case_cnt (ct->subreader),
79 &casereader_translator_class, ct);
80 taint_propagate (casereader_get_taint (ct->subreader),
81 casereader_get_taint (reader));
85 /* Internal read function for translating casereader. */
87 casereader_translator_read (struct casereader *reader UNUSED,
90 struct casereader_translator *ct = ct_;
91 struct ccase *tmp = casereader_read (ct->subreader);
93 tmp = ct->translate (tmp, ct->aux);
97 /* Internal destroy function for translating casereader. */
99 casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
101 struct casereader_translator *ct = ct_;
102 casereader_destroy (ct->subreader);
103 ct->destroy (ct->aux);
107 /* Casereader class for translating casereader. */
108 static const struct casereader_class casereader_translator_class =
110 casereader_translator_read,
111 casereader_translator_destroy,
116 /* Casereader that applies a user-supplied function to translate
117 each case into another in a stateless fashion. */
119 /* A statelessly translating casereader. */
120 struct casereader_stateless_translator
122 struct casereader *subreader; /* Source of input cases. */
124 casenumber case_offset;
125 struct ccase *(*translate) (struct ccase *input, casenumber,
127 bool (*destroy) (void *aux);
131 static const struct casereader_random_class
132 casereader_stateless_translator_class;
134 /* Creates and returns a new casereader whose cases are produced by reading
135 from SUBREADER and passing through the TRANSLATE function. TRANSLATE must
136 takes ownership of its input case and returns a translated case, populating
137 the translated case based on INPUT and auxiliary data AUX.
139 TRANSLATE must be stateless, that is, the output for a given case must not
140 depend on previous cases. This is because cases may be retrieved in
141 arbitrary order, and some cases may be retrieved multiple times, and some
142 cases may be skipped and never retrieved at all. If TRANSLATE is stateful,
143 use casereader_create_translator instead.
145 The casenumber argument to the TRANSLATE function is the absolute case
146 number in SUBREADER, that is, 0 when the first case in SUBREADER is being
147 translated, 1 when the second case is being translated, and so on.
149 The cases returned by TRANSLATE must match OUTPUT_PROTO.
151 When the stateless translating casereader is destroyed, DESTROY will be
152 called to allow any auxiliary data maintained by TRANSLATE to be freed.
154 After this function is called, SUBREADER must not ever again be referenced
155 directly. It will be destroyed automatically when the translating
156 casereader is destroyed. */
158 casereader_translate_stateless (
159 struct casereader *subreader,
160 const struct caseproto *output_proto,
161 struct ccase *(*translate) (struct ccase *input, casenumber,
163 bool (*destroy) (void *aux),
166 struct casereader_stateless_translator *cst = xmalloc (sizeof *cst);
167 struct casereader *reader;
168 cst->subreader = casereader_rename (subreader);
169 cst->translate = translate;
170 cst->destroy = destroy;
172 reader = casereader_create_random (
173 output_proto, casereader_get_case_cnt (cst->subreader),
174 &casereader_stateless_translator_class, cst);
175 taint_propagate (casereader_get_taint (cst->subreader),
176 casereader_get_taint (reader));
180 /* Internal read function for stateless translating casereader. */
181 static struct ccase *
182 casereader_stateless_translator_read (struct casereader *reader UNUSED,
183 void *cst_, casenumber idx)
185 struct casereader_stateless_translator *cst = cst_;
186 struct ccase *tmp = casereader_peek (cst->subreader, idx);
188 tmp = cst->translate (tmp, cst->case_offset + idx, cst->aux);
192 /* Internal destroy function for translating casereader. */
194 casereader_stateless_translator_destroy (struct casereader *reader UNUSED,
197 struct casereader_stateless_translator *cst = cst_;
198 casereader_destroy (cst->subreader);
199 cst->destroy (cst->aux);
204 casereader_stateless_translator_advance (struct casereader *reader UNUSED,
205 void *cst_, casenumber cnt)
207 struct casereader_stateless_translator *cst = cst_;
208 cst->case_offset += casereader_advance (cst->subreader, cnt);
211 /* Casereader class for stateless translating casereader. */
212 static const struct casereader_random_class
213 casereader_stateless_translator_class =
215 casereader_stateless_translator_read,
216 casereader_stateless_translator_destroy,
217 casereader_stateless_translator_advance,
221 struct casereader_append_numeric
223 struct caseproto *proto;
225 new_value_func *func;
227 void (*destroy) (void *aux);
230 static bool can_destroy (void *can_);
232 static struct ccase *can_translate (struct ccase *, void *can_);
234 /* Creates and returns a new casereader whose cases are produced
235 by reading from SUBREADER and appending an additional value,
236 generated by FUNC. AUX is an optional parameter which
237 gets passed to FUNC. FUNC will also receive N as it, which is
238 the ordinal number of the case in the reader. DESTROY is an
239 optional parameter used to destroy AUX.
241 After this function is called, SUBREADER must not ever again
242 be referenced directly. It will be destroyed automatically
243 when the translating casereader is destroyed. */
245 casereader_create_append_numeric (struct casereader *subreader,
246 new_value_func func, void *aux,
247 void (*destroy) (void *aux))
249 struct casereader_append_numeric *can = xmalloc (sizeof *can);
250 can->proto = caseproto_ref (casereader_get_proto (subreader));
251 can->proto = caseproto_add_width (can->proto, 0);
255 can->destroy = destroy;
256 return casereader_create_translator (subreader, can->proto,
257 can_translate, can_destroy, can);
261 static struct ccase *
262 can_translate (struct ccase *c, void *can_)
264 struct casereader_append_numeric *can = can_;
265 double new_value = can->func (c, can->n++, can->aux);
266 c = case_unshare_and_resize (c, can->proto);
267 case_data_rw_idx (c, caseproto_get_n_widths (can->proto) - 1)->f = new_value;
272 can_destroy (void *can_)
274 struct casereader_append_numeric *can = can_;
276 can->destroy (can->aux);
277 caseproto_unref (can->proto);
284 struct arithmetic_sequence
291 next_arithmetic (const struct ccase *c UNUSED,
295 struct arithmetic_sequence *as = aux;
296 return n * as->increment + as->first;
299 /* Creates and returns a new casereader whose cases are produced
300 by reading from SUBREADER and appending an additional value,
301 which takes the value FIRST in the first case, FIRST +
302 INCREMENT in the second case, FIRST + INCREMENT * 2 in the
303 third case, and so on.
305 After this function is called, SUBREADER must not ever again
306 be referenced directly. It will be destroyed automatically
307 when the translating casereader is destroyed. */
309 casereader_create_arithmetic_sequence (struct casereader *subreader,
310 double first, double increment)
312 struct arithmetic_sequence *as = xzalloc (sizeof *as);
314 as->increment = increment;
315 return casereader_create_append_numeric (subreader, next_arithmetic,
322 struct casereader_append_rank
324 struct casereader *clone;
326 const struct variable *var;
327 const struct variable *weight;
328 struct caseproto *proto;
332 distinct_func *distinct;
334 enum rank_error *err;
338 static bool car_destroy (void *car_);
340 static struct ccase *car_translate (struct ccase *input, void *car_);
342 /* Creates and returns a new casereader whose cases are produced
343 by reading from SUBREADER and appending an additional value,
344 which is the rank of the observation. W is the weight variable
345 of the dictionary containing V, or NULL if there is no weight
348 The following preconditions must be met:
350 1. SUBREADER must be sorted on V.
352 2. The weight variables, must be non-negative.
354 If either of these preconditions are not satisfied, then the rank
355 variables may not be correct. In this case, if ERR is non-null,
356 it will be set according to the erroneous conditions encountered.
358 If DISTINCT_CALLBACK is non-null, then it will be called exactly
359 once for every case containing a distinct value of V. AUX is
360 an auxilliary pointer passed to DISTINCT_CALLBACK.
362 After this function is called, SUBREADER must not ever again
363 be referenced directly. It will be destroyed automatically
364 when the translating casereader is destroyed. */
366 casereader_create_append_rank (struct casereader *subreader,
367 const struct variable *v,
368 const struct variable *w,
369 enum rank_error *err,
370 distinct_func *distinct_callback,
374 struct casereader_append_rank *car = xmalloc (sizeof *car);
375 car->proto = caseproto_ref (casereader_get_proto (subreader));
376 car->proto = caseproto_add_width (car->proto, 0);
382 car->clone = casereader_clone (subreader);
383 car->distinct = distinct_callback;
386 car->prev_value = SYSMIS;
388 return casereader_create_translator (subreader, car->proto,
389 car_translate, car_destroy, car);
394 car_destroy (void *car_)
396 struct casereader_append_rank *car = car_;
397 casereader_destroy (car->clone);
398 caseproto_unref (car->proto);
403 static struct ccase *
404 car_translate (struct ccase *input, void *car_)
406 struct casereader_append_rank *car = car_;
408 const double value = case_data (input, car->var)->f;
410 if ( car->prev_value != SYSMIS)
412 if (car->err && value < car->prev_value)
413 *car->err |= RANK_ERR_UNSORTED;
416 if ( car->n_common == 1)
423 weight = case_data (input, car->weight)->f;
424 if ( car->err && weight < 0 )
425 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
430 struct ccase *c = casereader_peek (car->clone, car->n + ++k);
433 vxx = case_data (c, car->var)->f;
439 double w = case_data (c, car->weight)->f;
441 if ( car->err && w < 0 )
442 *car->err |= RANK_ERR_NEGATIVE_WEIGHT;
452 while (vxx == value);
453 car->mean_rank = car->cc + (weight + 1) / 2.0;
457 car->distinct (value, car->n_common, weight, car->aux);
464 input = case_unshare_and_resize (input, car->proto);
465 case_data_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)->f
467 car->prev_value = value;
476 const struct variable *key;
477 const struct variable *weight;
482 struct casereader *clone;
483 struct caseproto *proto;
488 uniquify (const struct ccase *c, void *aux)
490 struct consolidator *cdr = aux;
491 const union value *current_value = case_data (c, cdr->key);
492 const int key_width = var_get_width (cdr->key);
493 const double weight = cdr->weight ? case_data (c, cdr->weight)->f : 1.0;
494 struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1);
500 if ( NULL == next_case)
503 dir = value_compare_3way (case_data (next_case, cdr->key),
504 current_value, key_width);
505 case_unref (next_case);
508 /* Insist that the data are sorted */
509 assert (cdr->direction == 0 || dir == cdr->direction);
510 cdr->direction = dir;
517 cdr->prev_cc = cdr->cc;
524 static struct ccase *
525 consolodate_weight (struct ccase *input, void *aux)
527 struct consolidator *cdr = aux;
532 c = case_unshare (input);
533 case_data_rw (c, cdr->weight)->f = cdr->prev_cc;
537 c = case_unshare_and_resize (input, cdr->proto);
538 case_data_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1)->f = cdr->prev_cc;
546 uniquify_destroy (void *aux)
548 struct consolidator *cdr = aux;
550 casereader_destroy (cdr->clone);
551 caseproto_unref (cdr->proto);
559 /* Returns a new casereader which is based upon INPUT, but which contains a maximum
560 of one case for each distinct value of KEY.
561 If WEIGHT is non-null, then the new casereader's values for this variable
562 will be the sum of all values matching KEY.
563 IF WEIGHT is null, then the new casereader will have an additional numeric
564 value appended, which will contain the total number of cases containing
566 INPUT must be sorted on KEY
569 casereader_create_distinct (struct casereader *input,
570 const struct variable *key,
571 const struct variable *weight)
573 struct casereader *u ;
574 struct casereader *ud ;
575 struct caseproto *output_proto = caseproto_ref (casereader_get_proto (input));
577 struct consolidator *cdr = xmalloc (sizeof (*cdr));
580 cdr->weight = weight;
582 cdr->clone = casereader_clone (input);
585 if ( NULL == cdr->weight )
586 output_proto = caseproto_add_width (output_proto, 0);
588 cdr->proto = output_proto;
590 u = casereader_create_filter_func (input, uniquify,
593 ud = casereader_create_translator (u,