1 /* PSPP - computes sample statistics.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <data/casereader.h>
25 #include <data/casereader-provider.h>
26 #include <data/casewriter.h>
27 #include <data/variable.h>
28 #include <data/dictionary.h>
29 #include <libpspp/taint.h>
30 #include <libpspp/message.h>
35 #define _(msgid) gettext (msgid)
37 /* A casereader that filters data coming from another
39 struct casereader_filter
41 struct casereader *subreader; /* The reader to filter. */
42 bool (*include) (const struct ccase *, void *aux);
43 bool (*destroy) (void *aux);
45 struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */
48 static struct casereader_class casereader_filter_class;
50 /* Creates and returns a casereader whose content is a filtered
51 version of the data in SUBREADER. Only the cases for which
52 INCLUDE returns true will appear in the returned casereader,
53 in the original order.
55 If EXCLUDE is non-null, then cases for which INCLUDE returns
56 false are written to EXCLUDE. These cases will not
57 necessarily be fully written to EXCLUDE until the filtering casereader's
58 cases have been fully read or, if that never occurs, until the
59 filtering casereader is destroyed.
61 When the filtering casereader is destroyed, DESTROY will be
62 called to allow any state maintained by INCLUDE to be freed.
64 After this function is called, SUBREADER must not ever again
65 be referenced directly. It will be destroyed automatically
66 when the filtering casereader is destroyed. */
68 casereader_create_filter_func (struct casereader *subreader,
69 bool (*include) (const struct ccase *,
71 bool (*destroy) (void *aux),
73 struct casewriter *exclude)
75 struct casereader_filter *filter = xmalloc (sizeof *filter);
76 struct casereader *reader;
77 filter->subreader = casereader_rename (subreader);
78 filter->include = include;
79 filter->destroy = destroy;
81 filter->exclude = exclude;
82 reader = casereader_create_sequential (
83 NULL, casereader_get_value_cnt (filter->subreader), CASENUMBER_MAX,
84 &casereader_filter_class, filter);
85 taint_propagate (casereader_get_taint (filter->subreader),
86 casereader_get_taint (reader));
90 /* Internal read function for filtering casereader. */
92 casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
96 struct casereader_filter *filter = filter_;
99 if (!casereader_read (filter->subreader, c))
101 else if (filter->include (c, filter->aux))
103 else if (filter->exclude != NULL)
104 casewriter_write (filter->exclude, c);
110 /* Internal destruction function for filtering casereader. */
112 casereader_filter_destroy (struct casereader *reader, void *filter_)
114 struct casereader_filter *filter = filter_;
116 /* Make sure we've written everything to the excluded cases
117 casewriter, if there is one. */
118 if (filter->exclude != NULL)
121 while (casereader_read (filter->subreader, &c))
122 if (filter->include (&c, filter->aux))
125 casewriter_write (filter->exclude, &c);
128 casereader_destroy (filter->subreader);
129 if (filter->destroy != NULL && !filter->destroy (filter->aux))
130 casereader_force_error (reader);
134 /* Filtering casereader class. */
135 static struct casereader_class casereader_filter_class =
137 casereader_filter_read,
138 casereader_filter_destroy,
140 /* We could in fact delegate clone to the subreader, if the
141 filter function is required to have no memory and if we
142 added reference counting. But it might be useful to have
143 filter functions with memory and in any case this would
144 require a little extra work. */
150 /* Casereader for filtering valid weights. */
152 /* Weight-filtering data. */
153 struct casereader_filter_weight
155 const struct variable *weight_var; /* Weight variable. */
156 bool *warn_on_invalid; /* Have we already issued an error? */
157 bool local_warn_on_invalid; /* warn_on_invalid might point here. */
160 static bool casereader_filter_weight_include (const struct ccase *, void *);
161 static bool casereader_filter_weight_destroy (void *);
163 /* Creates and returns a casereader that filters cases from
164 READER by valid weights, that is, any cases with user- or
165 system-missing, zero, or negative weights are dropped. The
166 weight variable's information is taken from DICT. If DICT
167 does not have a weight variable, then no cases are filtered
170 When a case with an invalid weight is encountered,
171 *WARN_ON_INVALID is checked. If it is true, then an error
172 message is issued and *WARN_ON_INVALID is set false. If
173 WARN_ON_INVALID is a null pointer, then an internal bool that
174 is initially true is used instead of a caller-supplied bool.
176 If EXCLUDE is non-null, then dropped cases are written to
177 EXCLUDE. These cases will not necessarily be fully written to
178 EXCLUDE until the filtering casereader's cases have been fully
179 read or, if that never occurs, until the filtering casereader
182 After this function is called, READER must not ever again be
183 referenced directly. It will be destroyed automatically when
184 the filtering casereader is destroyed. */
186 casereader_create_filter_weight (struct casereader *reader,
187 const struct dictionary *dict,
188 bool *warn_on_invalid,
189 struct casewriter *exclude)
191 struct variable *weight_var = dict_get_weight (dict);
192 if (weight_var != NULL)
194 struct casereader_filter_weight *cfw = xmalloc (sizeof *cfw);
195 cfw->weight_var = weight_var;
196 cfw->warn_on_invalid = (warn_on_invalid
198 : &cfw->local_warn_on_invalid);
199 cfw->local_warn_on_invalid = true;
200 reader = casereader_create_filter_func (reader,
201 casereader_filter_weight_include,
202 casereader_filter_weight_destroy,
206 reader = casereader_rename (reader);
210 /* Internal "include" function for weight-filtering
213 casereader_filter_weight_include (const struct ccase *c, void *cfw_)
215 struct casereader_filter_weight *cfw = cfw_;
216 double value = case_num (c, cfw->weight_var);
217 if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
221 if (*cfw->warn_on_invalid)
223 msg (SW, _("At least one case in the data read had a weight value "
224 "that was user-missing, system-missing, zero, or "
225 "negative. These case(s) were ignored."));
226 *cfw->warn_on_invalid = false;
232 /* Internal "destroy" function for weight-filtering
235 casereader_filter_weight_destroy (void *cfw_)
237 struct casereader_filter_weight *cfw = cfw_;
242 /* Casereader for filtering missing values. */
244 /* Missing-value filtering data. */
245 struct casereader_filter_missing
247 struct variable **vars; /* Variables whose values to filter. */
248 size_t var_cnt; /* Number of variables. */
249 enum mv_class class; /* Types of missing values to filter. */
252 static bool casereader_filter_missing_include (const struct ccase *, void *);
253 static bool casereader_filter_missing_destroy (void *);
255 /* Creates and returns a casereader that filters out cases from
256 READER that have a missing value in the given CLASS for any of
257 the VAR_CNT variables in VARS. Only cases that have
258 non-missing values for all of these variables are passed
261 Ownership of VARS is retained by the caller.
263 If EXCLUDE is non-null, then dropped cases are written to
264 EXCLUDE. These cases will not necessarily be fully written to
265 EXCLUDE until the filtering casereader's cases have been fully
266 read or, if that never occurs, until the filtering casereader
269 After this function is called, READER must not ever again
270 be referenced directly. It will be destroyed automatically
271 when the filtering casereader is destroyed. */
273 casereader_create_filter_missing (struct casereader *reader,
274 const struct variable **vars, size_t var_cnt,
276 struct casewriter *exclude)
278 if (var_cnt > 0 && class != MV_NEVER)
280 struct casereader_filter_missing *cfm = xmalloc (sizeof *cfm);
281 cfm->vars = xmemdup (vars, sizeof *vars * var_cnt);
282 cfm->var_cnt = var_cnt;
284 return casereader_create_filter_func (reader,
285 casereader_filter_missing_include,
286 casereader_filter_missing_destroy,
291 return casereader_rename (reader);
294 /* Internal "include" function for missing value-filtering
297 casereader_filter_missing_include (const struct ccase *c, void *cfm_)
299 const struct casereader_filter_missing *cfm = cfm_;
302 for (i = 0; i < cfm->var_cnt; i++)
304 struct variable *var = cfm->vars[i];
305 const union value *value = case_data (c, var);
306 if (var_is_value_missing (var, value, cfm->class))
312 /* Internal "destroy" function for missing value-filtering
315 casereader_filter_missing_destroy (void *cfm_)
317 struct casereader_filter_missing *cfm = cfm_;
323 /* Case-counting casereader. */
325 static bool casereader_counter_include (const struct ccase *, void *);
327 /* Creates and returns a new casereader that counts the number of
328 cases that have been read from it. *COUNTER is initially set
329 to INITIAL_VALUE, then incremented by 1 each time a case is read.
331 Counting casereaders must be used very cautiously: if a
332 counting casereader is cloned or if the casereader_peek
333 function is used on it, then the counter's value can be higher
334 than expected because of the buffering that goes on behind the
337 The counter is only incremented as cases are actually read
338 from the casereader. In particular, if the casereader is
339 destroyed before all cases have been read from the casereader,
340 cases never read will not be included in the count.
342 After this function is called, READER must not ever again
343 be referenced directly. It will be destroyed automatically
344 when the filtering casereader is destroyed. */
346 casereader_create_counter (struct casereader *reader, casenumber *counter,
347 casenumber initial_value)
349 *counter = initial_value;
350 return casereader_create_filter_func (reader, casereader_counter_include,
351 NULL, counter, NULL);
354 /* Internal "include" function for counting casereader. */
356 casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
358 casenumber *counter = counter_;