1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include <libpspp/message.h>
24 #include <data/case.h>
25 #include <data/casereader.h>
26 #include <data/dictionary.h>
27 #include "group-proc.h"
28 #include <libpspp/hash.h>
29 #include <libpspp/str.h>
30 #include <data/variable.h>
31 #include <data/procedure.h>
32 #include <libpspp/alloc.h>
33 #include <libpspp/misc.h>
40 /* This module calculates the Levene statistic for variables.
42 Just for reference, the Levene Statistic is a defines as follows:
44 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
45 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
48 k is the number of groups
49 n is the total number of samples
50 n_i is the number of samples in the ith group
51 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
52 Z_{iL} is the mean of Z_{ij} over the ith group
53 Z_{LL} is the grand mean of Z_{ij}
55 Imagine calculating that with pencil and paper!
63 /* Per group statistics */
64 struct t_test_proc **group_stats;
66 /* The independent variable */
67 const struct variable *v_indep;
69 /* Number of dependent variables */
72 /* The dependent variables */
73 const struct variable **v_dep;
75 /* Filter for missing values */
76 enum mv_class exclude;
78 /* An array of lz_stats for each variable */
81 /* The denominator for the expression for the Levene */
82 double *lz_denominator;
86 /* Per variable statistics */
95 /* The total number of cases */
98 /* Number of groups */
103 static void levene_precalc (const struct levene_info *l);
104 static int levene_calc (const struct dictionary *dict, const struct ccase *,
105 const struct levene_info *l);
106 static void levene_postcalc (struct levene_info *);
110 static void levene2_precalc (struct levene_info *l);
111 static int levene2_calc (const struct dictionary *, const struct ccase *,
112 struct levene_info *l);
113 static void levene2_postcalc (struct levene_info *);
117 levene(const struct dictionary *dict,
118 struct casereader *reader,
119 const struct variable *v_indep, size_t n_dep,
120 const struct variable **v_dep,
121 enum mv_class exclude)
123 struct casereader *pass1, *pass2;
125 struct levene_info l;
131 l.lz = xnmalloc (l.n_dep, sizeof *l.lz);
132 l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator);
134 casereader_split (reader, &pass1, &pass2);
137 for (; casereader_read (pass1, &c); case_destroy (&c))
138 levene_calc (dict, &c, &l);
139 casereader_destroy (pass1);
140 levene_postcalc (&l);
143 for (; casereader_read (pass2, &c); case_destroy (&c))
144 levene2_calc (dict, &c, &l);
145 casereader_destroy (pass2);
146 levene2_postcalc (&l);
148 free (l.lz_denominator);
153 levene_precalc (const struct levene_info *l)
157 for(i = 0; i < l->n_dep ; ++i )
159 const struct variable *var = l->v_dep[i];
160 struct group_proc *gp = group_proc_get (var);
161 struct group_statistics *gs;
162 struct hsh_iterator hi;
164 l->lz[i].grand_total = 0;
165 l->lz[i].total_n = 0;
166 l->lz[i].n_groups = gp->n_groups ;
169 for ( gs = hsh_first(gp->group_hash, &hi);
171 gs = hsh_next(gp->group_hash, &hi))
181 levene_calc (const struct dictionary *dict, const struct ccase *c,
182 const struct levene_info *l)
186 const union value *gv = case_data (c, l->v_indep);
187 struct group_statistics key;
188 double weight = dict_get_case_weight (dict, c, &warn);
192 for (i = 0; i < l->n_dep; ++i)
194 const struct variable *var = l->v_dep[i];
195 struct group_proc *gp = group_proc_get (var);
197 const union value *v = case_data (c, var);
198 struct group_statistics *gs;
200 gs = hsh_find(gp->group_hash,(void *) &key );
205 if ( !var_is_value_missing (var, v, l->exclude))
207 levene_z= fabs(v->f - gs->mean);
208 l->lz[i].grand_total += levene_z * weight;
209 l->lz[i].total_n += weight;
211 gs->lz_total += levene_z * weight;
219 levene_postcalc (struct levene_info *l)
223 for (v = 0; v < l->n_dep; ++v)
226 l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ;
235 levene2_precalc (struct levene_info *l)
240 /* This stuff could go in the first post calc . . . */
245 struct hsh_iterator hi;
246 struct group_statistics *g;
248 const struct variable *var = l->v_dep[v] ;
249 struct hsh_table *hash = group_proc_get (var)->group_hash;
252 for(g = (struct group_statistics *) hsh_first(hash,&hi);
254 g = (struct group_statistics *) hsh_next(hash,&hi) )
256 g->lz_mean = g->lz_total / g->n ;
258 l->lz_denominator[v] = 0;
263 levene2_calc (const struct dictionary *dict, const struct ccase *c,
264 struct levene_info *l)
269 double weight = dict_get_case_weight (dict, c, &warn);
271 const union value *gv = case_data (c, l->v_indep);
272 struct group_statistics key;
276 for (i = 0; i < l->n_dep; ++i)
279 const struct variable *var = l->v_dep[i] ;
280 const union value *v = case_data (c, var);
281 struct group_statistics *gs;
283 gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
288 if ( !var_is_value_missing (var, v, l->exclude))
290 levene_z = fabs(v->f - gs->mean);
291 l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
300 levene2_postcalc (struct levene_info *l)
304 for (v = 0; v < l->n_dep; ++v)
306 double lz_numerator = 0;
307 struct hsh_iterator hi;
308 struct group_statistics *g;
310 const struct variable *var = l->v_dep[v] ;
311 struct group_proc *gp = group_proc_get (var);
312 struct hsh_table *hash = gp->group_hash;
314 for(g = (struct group_statistics *) hsh_first(hash,&hi);
316 g = (struct group_statistics *) hsh_next(hash,&hi) )
318 lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean );
320 lz_numerator *= ( gp->ugs.n - gp->n_groups );
322 l->lz_denominator[v] *= (gp->n_groups - 1);
324 gp->levene = lz_numerator / l->lz_denominator[v] ;