1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <libpspp/message.h>
20 #include <data/case.h>
21 #include <data/casereader.h>
22 #include <data/dictionary.h>
23 #include "group-proc.h"
24 #include <libpspp/hash.h>
25 #include <libpspp/str.h>
26 #include <data/variable.h>
27 #include <data/procedure.h>
28 #include <libpspp/alloc.h>
29 #include <libpspp/misc.h>
36 /* This module calculates the Levene statistic for variables.
38 Just for reference, the Levene Statistic is a defines as follows:
40 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
41 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
44 k is the number of groups
45 n is the total number of samples
46 n_i is the number of samples in the ith group
47 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
48 Z_{iL} is the mean of Z_{ij} over the ith group
49 Z_{LL} is the grand mean of Z_{ij}
51 Imagine calculating that with pencil and paper!
59 /* Per group statistics */
60 struct t_test_proc **group_stats;
62 /* The independent variable */
63 const struct variable *v_indep;
65 /* Number of dependent variables */
68 /* The dependent variables */
69 const struct variable **v_dep;
71 /* Filter for missing values */
72 enum mv_class exclude;
74 /* An array of lz_stats for each variable */
77 /* The denominator for the expression for the Levene */
78 double *lz_denominator;
82 /* Per variable statistics */
91 /* The total number of cases */
94 /* Number of groups */
99 static void levene_precalc (const struct levene_info *l);
100 static int levene_calc (const struct dictionary *dict, const struct ccase *,
101 const struct levene_info *l);
102 static void levene_postcalc (struct levene_info *);
106 static void levene2_precalc (struct levene_info *l);
107 static int levene2_calc (const struct dictionary *, const struct ccase *,
108 struct levene_info *l);
109 static void levene2_postcalc (struct levene_info *);
113 levene(const struct dictionary *dict,
114 struct casereader *reader,
115 const struct variable *v_indep, size_t n_dep,
116 const struct variable **v_dep,
117 enum mv_class exclude)
119 struct casereader *pass1, *pass2;
121 struct levene_info l;
127 l.lz = xnmalloc (l.n_dep, sizeof *l.lz);
128 l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator);
130 casereader_split (reader, &pass1, &pass2);
133 for (; casereader_read (pass1, &c); case_destroy (&c))
134 levene_calc (dict, &c, &l);
135 casereader_destroy (pass1);
136 levene_postcalc (&l);
139 for (; casereader_read (pass2, &c); case_destroy (&c))
140 levene2_calc (dict, &c, &l);
141 casereader_destroy (pass2);
142 levene2_postcalc (&l);
144 free (l.lz_denominator);
149 levene_precalc (const struct levene_info *l)
153 for(i = 0; i < l->n_dep ; ++i )
155 const struct variable *var = l->v_dep[i];
156 struct group_proc *gp = group_proc_get (var);
157 struct group_statistics *gs;
158 struct hsh_iterator hi;
160 l->lz[i].grand_total = 0;
161 l->lz[i].total_n = 0;
162 l->lz[i].n_groups = gp->n_groups ;
165 for ( gs = hsh_first(gp->group_hash, &hi);
167 gs = hsh_next(gp->group_hash, &hi))
177 levene_calc (const struct dictionary *dict, const struct ccase *c,
178 const struct levene_info *l)
182 const union value *gv = case_data (c, l->v_indep);
183 struct group_statistics key;
184 double weight = dict_get_case_weight (dict, c, &warn);
188 for (i = 0; i < l->n_dep; ++i)
190 const struct variable *var = l->v_dep[i];
191 struct group_proc *gp = group_proc_get (var);
193 const union value *v = case_data (c, var);
194 struct group_statistics *gs;
196 gs = hsh_find(gp->group_hash,(void *) &key );
201 if ( !var_is_value_missing (var, v, l->exclude))
203 levene_z= fabs(v->f - gs->mean);
204 l->lz[i].grand_total += levene_z * weight;
205 l->lz[i].total_n += weight;
207 gs->lz_total += levene_z * weight;
215 levene_postcalc (struct levene_info *l)
219 for (v = 0; v < l->n_dep; ++v)
222 l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ;
231 levene2_precalc (struct levene_info *l)
236 /* This stuff could go in the first post calc . . . */
241 struct hsh_iterator hi;
242 struct group_statistics *g;
244 const struct variable *var = l->v_dep[v] ;
245 struct hsh_table *hash = group_proc_get (var)->group_hash;
248 for(g = (struct group_statistics *) hsh_first(hash,&hi);
250 g = (struct group_statistics *) hsh_next(hash,&hi) )
252 g->lz_mean = g->lz_total / g->n ;
254 l->lz_denominator[v] = 0;
259 levene2_calc (const struct dictionary *dict, const struct ccase *c,
260 struct levene_info *l)
265 double weight = dict_get_case_weight (dict, c, &warn);
267 const union value *gv = case_data (c, l->v_indep);
268 struct group_statistics key;
272 for (i = 0; i < l->n_dep; ++i)
275 const struct variable *var = l->v_dep[i] ;
276 const union value *v = case_data (c, var);
277 struct group_statistics *gs;
279 gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
284 if ( !var_is_value_missing (var, v, l->exclude))
286 levene_z = fabs(v->f - gs->mean);
287 l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
296 levene2_postcalc (struct levene_info *l)
300 for (v = 0; v < l->n_dep; ++v)
302 double lz_numerator = 0;
303 struct hsh_iterator hi;
304 struct group_statistics *g;
306 const struct variable *var = l->v_dep[v] ;
307 struct group_proc *gp = group_proc_get (var);
308 struct hsh_table *hash = gp->group_hash;
310 for(g = (struct group_statistics *) hsh_first(hash,&hi);
312 g = (struct group_statistics *) hsh_next(hash,&hi) )
314 lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean );
316 lz_numerator *= ( gp->ugs.n - gp->n_groups );
318 l->lz_denominator[v] *= (gp->n_groups - 1);
320 gp->levene = lz_numerator / l->lz_denominator[v] ;