1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
39 /* This module calculates the Levene statistic for variables.
41 Just for reference, the Levene Statistic is a defines as follows:
43 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
44 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
47 k is the number of groups
48 n is the total number of samples
49 n_i is the number of samples in the ith group
50 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
51 Z_{iL} is the mean of Z_{ij} over the ith group
52 Z_{LL} is the grand mean of Z_{ij}
54 Imagine calculating that with pencil and paper!
62 /* Per group statistics */
63 struct t_test_proc **group_stats;
65 /* The independent variable */
66 struct variable *v_indep;
68 /* Number of dependent variables */
71 /* The dependent variables */
72 struct variable **v_dep;
74 /* How to treat missing values */
75 enum lev_missing missing;
77 /* Function to test for missing values */
78 is_missing_func is_missing;
83 static void levene_precalc (const struct levene_info *l);
84 static int levene_calc (const struct ccase *, void *);
85 static void levene_postcalc (void *);
89 static void levene2_precalc (void *);
90 static int levene2_calc (const struct ccase *, void *);
91 static void levene2_postcalc (void *);
95 levene(const struct casefile *cf,
96 struct variable *v_indep, int n_dep, struct variable **v_dep,
97 enum lev_missing missing, is_missing_func value_is_missing)
101 struct levene_info l;
107 l.is_missing = value_is_missing;
112 for(r = casefile_get_reader (cf);
113 casereader_read (r, &c) ;
118 casereader_destroy (r);
122 for(r = casefile_get_reader (cf);
123 casereader_read (r, &c) ;
128 casereader_destroy (r);
129 levene2_postcalc(&l);
133 /* Internal variables used in calculating the Levene statistic */
135 /* Per variable statistics */
138 /* Total of all lz */
144 /* The total number of cases */
147 /* Number of groups */
151 /* An array of lz_stats for each variable */
152 static struct lz_stats *lz;
156 levene_precalc (const struct levene_info *l)
160 lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
162 for(i = 0; i < l->n_dep ; ++i )
164 struct variable *var = l->v_dep[i];
165 struct group_statistics *gs;
166 struct hsh_iterator hi;
168 lz[i].grand_total = 0;
170 lz[i].n_groups = var->p.grp_data.n_groups ;
173 for ( gs = hsh_first(var->p.grp_data.group_hash, &hi);
175 gs = hsh_next(var->p.grp_data.group_hash, &hi))
185 levene_calc (const struct ccase *c, void *_l)
189 struct levene_info *l = (struct levene_info *) _l;
190 const union value *gv = case_data (c, l->v_indep->fv);
191 struct group_statistics key;
192 double weight = dict_get_case_weight(default_dict,c,&warn);
194 /* Skip the entire case if /MISSING=LISTWISE is set */
195 if ( l->missing == LEV_LISTWISE )
197 for (i = 0; i < l->n_dep; ++i)
199 struct variable *v = l->v_dep[i];
200 const union value *val = case_data (c, v->fv);
202 if (l->is_missing(val,v) )
212 for (i = 0; i < l->n_dep; ++i)
214 struct variable *var = l->v_dep[i];
216 const union value *v = case_data (c, var->fv);
217 struct group_statistics *gs;
219 gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
224 if ( ! l->is_missing(v,var))
226 levene_z= fabs(v->f - gs->mean);
227 lz[i].grand_total += levene_z * weight;
228 lz[i].total_n += weight;
230 gs->lz_total += levene_z * weight;
239 levene_postcalc (void *_l)
243 struct levene_info *l = (struct levene_info *) _l;
245 for (v = 0; v < l->n_dep; ++v)
248 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
255 /* The denominator for the expression for the Levene */
256 static double *lz_denominator;
259 levene2_precalc (void *_l)
263 struct levene_info *l = (struct levene_info *) _l;
265 lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
267 /* This stuff could go in the first post calc . . . */
268 for (v = 0; v < l->n_dep; ++v)
270 struct hsh_iterator hi;
271 struct group_statistics *g;
273 struct variable *var = l->v_dep[v] ;
274 struct hsh_table *hash = var->p.grp_data.group_hash;
277 for(g = (struct group_statistics *) hsh_first(hash,&hi);
279 g = (struct group_statistics *) hsh_next(hash,&hi) )
281 g->lz_mean = g->lz_total / g->n ;
283 lz_denominator[v] = 0;
288 levene2_calc (const struct ccase *c, void *_l)
293 struct levene_info *l = (struct levene_info *) _l;
295 double weight = dict_get_case_weight(default_dict,c,&warn);
297 const union value *gv = case_data (c, l->v_indep->fv);
298 struct group_statistics key;
300 /* Skip the entire case if /MISSING=LISTWISE is set */
301 if ( l->missing == LEV_LISTWISE )
303 for (i = 0; i < l->n_dep; ++i)
305 struct variable *v = l->v_dep[i];
306 const union value *val = case_data (c, v->fv);
308 if (l->is_missing(val,v) )
317 for (i = 0; i < l->n_dep; ++i)
320 struct variable *var = l->v_dep[i] ;
321 const union value *v = case_data (c, var->fv);
322 struct group_statistics *gs;
324 gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
329 if ( ! l->is_missing(v,var) )
331 levene_z = fabs(v->f - gs->mean);
332 lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
341 levene2_postcalc (void *_l)
345 struct levene_info *l = (struct levene_info *) _l;
347 for (v = 0; v < l->n_dep; ++v)
349 double lz_numerator = 0;
350 struct hsh_iterator hi;
351 struct group_statistics *g;
353 struct variable *var = l->v_dep[v] ;
354 struct hsh_table *hash = var->p.grp_data.group_hash;
356 for(g = (struct group_statistics *) hsh_first(hash,&hi);
358 g = (struct group_statistics *) hsh_next(hash,&hi) )
360 lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
362 lz_numerator *= ( l->v_dep[v]->p.grp_data.ugs.n -
363 l->v_dep[v]->p.grp_data.n_groups );
365 lz_denominator[v] *= (l->v_dep[v]->p.grp_data.n_groups - 1);
367 l->v_dep[v]->p.grp_data.levene = lz_numerator / lz_denominator[v] ;
371 /* Now clear up after ourselves */
372 free(lz_denominator);