1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/casereader.h>
23 #include <data/casereader-provider.h>
24 #include <data/casewriter.h>
25 #include <data/variable.h>
26 #include <data/dictionary.h>
27 #include <libpspp/taint.h>
28 #include <libpspp/message.h>
33 #define _(msgid) gettext (msgid)
35 /* A casereader that filters data coming from another
37 struct casereader_filter
39 struct casereader *subreader; /* The reader to filter. */
40 bool (*include) (const struct ccase *, void *aux);
41 bool (*destroy) (void *aux);
43 struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */
46 static struct casereader_class casereader_filter_class;
48 /* Creates and returns a casereader whose content is a filtered
49 version of the data in SUBREADER. Only the cases for which
50 INCLUDE returns true will appear in the returned casereader,
51 in the original order.
53 If EXCLUDE is non-null, then cases for which INCLUDE returns
54 false are written to EXCLUDE. These cases will not
55 necessarily be fully written to EXCLUDE until the filtering casereader's
56 cases have been fully read or, if that never occurs, until the
57 filtering casereader is destroyed.
59 When the filtering casereader is destroyed, DESTROY will be
60 called to allow any state maintained by INCLUDE to be freed.
62 After this function is called, SUBREADER must not ever again
63 be referenced directly. It will be destroyed automatically
64 when the filtering casereader is destroyed. */
66 casereader_create_filter_func (struct casereader *subreader,
67 bool (*include) (const struct ccase *,
69 bool (*destroy) (void *aux),
71 struct casewriter *exclude)
73 struct casereader_filter *filter = xmalloc (sizeof *filter);
74 struct casereader *reader;
75 filter->subreader = casereader_rename (subreader);
76 filter->include = include;
77 filter->destroy = destroy;
79 filter->exclude = exclude;
80 reader = casereader_create_sequential (
81 NULL, casereader_get_value_cnt (filter->subreader), CASENUMBER_MAX,
82 &casereader_filter_class, filter);
83 taint_propagate (casereader_get_taint (filter->subreader),
84 casereader_get_taint (reader));
88 /* Internal read function for filtering casereader. */
90 casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
94 struct casereader_filter *filter = filter_;
97 if (!casereader_read (filter->subreader, c))
99 else if (filter->include (c, filter->aux))
101 else if (filter->exclude != NULL)
102 casewriter_write (filter->exclude, c);
108 /* Internal destruction function for filtering casereader. */
110 casereader_filter_destroy (struct casereader *reader, void *filter_)
112 struct casereader_filter *filter = filter_;
114 /* Make sure we've written everything to the excluded cases
115 casewriter, if there is one. */
116 if (filter->exclude != NULL)
119 while (casereader_read (filter->subreader, &c))
120 if (filter->include (&c, filter->aux))
123 casewriter_write (filter->exclude, &c);
126 casereader_destroy (filter->subreader);
127 if (filter->destroy != NULL && !filter->destroy (filter->aux))
128 casereader_force_error (reader);
132 /* Filtering casereader class. */
133 static struct casereader_class casereader_filter_class =
135 casereader_filter_read,
136 casereader_filter_destroy,
138 /* We could in fact delegate clone to the subreader, if the
139 filter function is required to have no memory and if we
140 added reference counting. But it might be useful to have
141 filter functions with memory and in any case this would
142 require a little extra work. */
148 /* Casereader for filtering valid weights. */
150 /* Weight-filtering data. */
151 struct casereader_filter_weight
153 const struct variable *weight_var; /* Weight variable. */
154 bool *warn_on_invalid; /* Have we already issued an error? */
155 bool local_warn_on_invalid; /* warn_on_invalid might point here. */
158 static bool casereader_filter_weight_include (const struct ccase *, void *);
159 static bool casereader_filter_weight_destroy (void *);
161 /* Creates and returns a casereader that filters cases from
162 READER by valid weights, that is, any cases with user- or
163 system-missing, zero, or negative weights are dropped. The
164 weight variable's information is taken from DICT. If DICT
165 does not have a weight variable, then no cases are filtered
168 When a case with an invalid weight is encountered,
169 *WARN_ON_INVALID is checked. If it is true, then an error
170 message is issued and *WARN_ON_INVALID is set false. If
171 WARN_ON_INVALID is a null pointer, then an internal bool that
172 is initially true is used instead of a caller-supplied bool.
174 If EXCLUDE is non-null, then dropped cases are written to
175 EXCLUDE. These cases will not necessarily be fully written to
176 EXCLUDE until the filtering casereader's cases have been fully
177 read or, if that never occurs, until the filtering casereader
180 After this function is called, READER must not ever again be
181 referenced directly. It will be destroyed automatically when
182 the filtering casereader is destroyed. */
184 casereader_create_filter_weight (struct casereader *reader,
185 const struct dictionary *dict,
186 bool *warn_on_invalid,
187 struct casewriter *exclude)
189 struct variable *weight_var = dict_get_weight (dict);
190 if (weight_var != NULL)
192 struct casereader_filter_weight *cfw = xmalloc (sizeof *cfw);
193 cfw->weight_var = weight_var;
194 cfw->warn_on_invalid = (warn_on_invalid
196 : &cfw->local_warn_on_invalid);
197 cfw->local_warn_on_invalid = true;
198 reader = casereader_create_filter_func (reader,
199 casereader_filter_weight_include,
200 casereader_filter_weight_destroy,
204 reader = casereader_rename (reader);
208 /* Internal "include" function for weight-filtering
211 casereader_filter_weight_include (const struct ccase *c, void *cfw_)
213 struct casereader_filter_weight *cfw = cfw_;
214 double value = case_num (c, cfw->weight_var);
215 if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
219 if (*cfw->warn_on_invalid)
221 msg (SW, _("At least one case in the data read had a weight value "
222 "that was user-missing, system-missing, zero, or "
223 "negative. These case(s) were ignored."));
224 *cfw->warn_on_invalid = false;
230 /* Internal "destroy" function for weight-filtering
233 casereader_filter_weight_destroy (void *cfw_)
235 struct casereader_filter_weight *cfw = cfw_;
240 /* Casereader for filtering missing values. */
242 /* Missing-value filtering data. */
243 struct casereader_filter_missing
245 struct variable **vars; /* Variables whose values to filter. */
246 size_t var_cnt; /* Number of variables. */
247 enum mv_class class; /* Types of missing values to filter. */
250 static bool casereader_filter_missing_include (const struct ccase *, void *);
251 static bool casereader_filter_missing_destroy (void *);
253 /* Creates and returns a casereader that filters out cases from
254 READER that have a missing value in the given CLASS for any of
255 the VAR_CNT variables in VARS. Only cases that have
256 non-missing values for all of these variables are passed
259 Ownership of VARS is retained by the caller.
261 If EXCLUDE is non-null, then dropped cases are written to
262 EXCLUDE. These cases will not necessarily be fully written to
263 EXCLUDE until the filtering casereader's cases have been fully
264 read or, if that never occurs, until the filtering casereader
267 After this function is called, READER must not ever again
268 be referenced directly. It will be destroyed automatically
269 when the filtering casereader is destroyed. */
271 casereader_create_filter_missing (struct casereader *reader,
272 const struct variable **vars, size_t var_cnt,
274 struct casewriter *exclude)
276 if (var_cnt > 0 && class != MV_NEVER)
278 struct casereader_filter_missing *cfm = xmalloc (sizeof *cfm);
279 cfm->vars = xmemdup (vars, sizeof *vars * var_cnt);
280 cfm->var_cnt = var_cnt;
282 return casereader_create_filter_func (reader,
283 casereader_filter_missing_include,
284 casereader_filter_missing_destroy,
289 return casereader_rename (reader);
292 /* Internal "include" function for missing value-filtering
295 casereader_filter_missing_include (const struct ccase *c, void *cfm_)
297 const struct casereader_filter_missing *cfm = cfm_;
300 for (i = 0; i < cfm->var_cnt; i++)
302 struct variable *var = cfm->vars[i];
303 const union value *value = case_data (c, var);
304 if (var_is_value_missing (var, value, cfm->class))
310 /* Internal "destroy" function for missing value-filtering
313 casereader_filter_missing_destroy (void *cfm_)
315 struct casereader_filter_missing *cfm = cfm_;
321 /* Case-counting casereader. */
323 static bool casereader_counter_include (const struct ccase *, void *);
325 /* Creates and returns a new casereader that counts the number of
326 cases that have been read from it. *COUNTER is initially set
327 to INITIAL_VALUE, then incremented by 1 each time a case is read.
329 Counting casereaders must be used very cautiously: if a
330 counting casereader is cloned or if the casereader_peek
331 function is used on it, then the counter's value can be higher
332 than expected because of the buffering that goes on behind the
335 The counter is only incremented as cases are actually read
336 from the casereader. In particular, if the casereader is
337 destroyed before all cases have been read from the casereader,
338 cases never read will not be included in the count.
340 After this function is called, READER must not ever again
341 be referenced directly. It will be destroyed automatically
342 when the filtering casereader is destroyed. */
344 casereader_create_counter (struct casereader *reader, casenumber *counter,
345 casenumber initial_value)
347 *counter = initial_value;
348 return casereader_create_filter_func (reader, casereader_counter_include,
349 NULL, counter, NULL);
352 /* Internal "include" function for counting casereader. */
354 casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
356 casenumber *counter = counter_;