1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "dictionary.h"
28 #include "group_proc.h"
41 /* This module calculates the Levene statistic for variables.
43 Just for reference, the Levene Statistic is a defines as follows:
45 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
46 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
49 k is the number of groups
50 n is the total number of samples
51 n_i is the number of samples in the ith group
52 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
53 Z_{iL} is the mean of Z_{ij} over the ith group
54 Z_{LL} is the grand mean of Z_{ij}
56 Imagine calculating that with pencil and paper!
64 /* Per group statistics */
65 struct t_test_proc **group_stats;
67 /* The independent variable */
68 struct variable *v_indep;
70 /* Number of dependent variables */
73 /* The dependent variables */
74 struct variable **v_dep;
76 /* How to treat missing values */
77 enum lev_missing missing;
79 /* Function to test for missing values */
80 is_missing_func *is_missing;
84 static void levene_precalc (const struct levene_info *l);
85 static int levene_calc (const struct ccase *, void *);
86 static void levene_postcalc (void *);
90 static void levene2_precalc (void *);
91 static int levene2_calc (const struct ccase *, void *);
92 static void levene2_postcalc (void *);
96 levene(const struct casefile *cf,
97 struct variable *v_indep, int n_dep, struct variable **v_dep,
98 enum lev_missing missing, is_missing_func value_is_missing)
100 struct casereader *r;
102 struct levene_info l;
108 l.is_missing = value_is_missing;
113 for(r = casefile_get_reader (cf);
114 casereader_read (r, &c) ;
119 casereader_destroy (r);
123 for(r = casefile_get_reader (cf);
124 casereader_read (r, &c) ;
129 casereader_destroy (r);
130 levene2_postcalc(&l);
134 /* Internal variables used in calculating the Levene statistic */
136 /* Per variable statistics */
139 /* Total of all lz */
145 /* The total number of cases */
148 /* Number of groups */
152 /* An array of lz_stats for each variable */
153 static struct lz_stats *lz;
157 levene_precalc (const struct levene_info *l)
161 lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
163 for(i = 0; i < l->n_dep ; ++i )
165 struct variable *var = l->v_dep[i];
166 struct group_proc *gp = group_proc_get (var);
167 struct group_statistics *gs;
168 struct hsh_iterator hi;
170 lz[i].grand_total = 0;
172 lz[i].n_groups = gp->n_groups ;
175 for ( gs = hsh_first(gp->group_hash, &hi);
177 gs = hsh_next(gp->group_hash, &hi))
187 levene_calc (const struct ccase *c, void *_l)
191 struct levene_info *l = (struct levene_info *) _l;
192 const union value *gv = case_data (c, l->v_indep->fv);
193 struct group_statistics key;
194 double weight = dict_get_case_weight(default_dict,c,&warn);
196 /* Skip the entire case if /MISSING=LISTWISE is set */
197 if ( l->missing == LEV_LISTWISE )
199 for (i = 0; i < l->n_dep; ++i)
201 struct variable *v = l->v_dep[i];
202 const union value *val = case_data (c, v->fv);
204 if (l->is_missing (&v->miss, val) )
214 for (i = 0; i < l->n_dep; ++i)
216 struct variable *var = l->v_dep[i];
217 struct group_proc *gp = group_proc_get (var);
219 const union value *v = case_data (c, var->fv);
220 struct group_statistics *gs;
222 gs = hsh_find(gp->group_hash,(void *) &key );
227 if ( ! l->is_missing(&var->miss, v))
229 levene_z= fabs(v->f - gs->mean);
230 lz[i].grand_total += levene_z * weight;
231 lz[i].total_n += weight;
233 gs->lz_total += levene_z * weight;
242 levene_postcalc (void *_l)
246 struct levene_info *l = (struct levene_info *) _l;
248 for (v = 0; v < l->n_dep; ++v)
251 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
258 /* The denominator for the expression for the Levene */
259 static double *lz_denominator;
262 levene2_precalc (void *_l)
266 struct levene_info *l = (struct levene_info *) _l;
268 lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
270 /* This stuff could go in the first post calc . . . */
271 for (v = 0; v < l->n_dep; ++v)
273 struct hsh_iterator hi;
274 struct group_statistics *g;
276 struct variable *var = l->v_dep[v] ;
277 struct hsh_table *hash = group_proc_get (var)->group_hash;
280 for(g = (struct group_statistics *) hsh_first(hash,&hi);
282 g = (struct group_statistics *) hsh_next(hash,&hi) )
284 g->lz_mean = g->lz_total / g->n ;
286 lz_denominator[v] = 0;
291 levene2_calc (const struct ccase *c, void *_l)
296 struct levene_info *l = (struct levene_info *) _l;
298 double weight = dict_get_case_weight(default_dict,c,&warn);
300 const union value *gv = case_data (c, l->v_indep->fv);
301 struct group_statistics key;
303 /* Skip the entire case if /MISSING=LISTWISE is set */
304 if ( l->missing == LEV_LISTWISE )
306 for (i = 0; i < l->n_dep; ++i)
308 struct variable *v = l->v_dep[i];
309 const union value *val = case_data (c, v->fv);
311 if (l->is_missing(&v->miss, val) )
320 for (i = 0; i < l->n_dep; ++i)
323 struct variable *var = l->v_dep[i] ;
324 const union value *v = case_data (c, var->fv);
325 struct group_statistics *gs;
327 gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
332 if ( ! l->is_missing (&var->miss, v) )
334 levene_z = fabs(v->f - gs->mean);
335 lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
344 levene2_postcalc (void *_l)
348 struct levene_info *l = (struct levene_info *) _l;
350 for (v = 0; v < l->n_dep; ++v)
352 double lz_numerator = 0;
353 struct hsh_iterator hi;
354 struct group_statistics *g;
356 struct variable *var = l->v_dep[v] ;
357 struct group_proc *gp = group_proc_get (var);
358 struct hsh_table *hash = gp->group_hash;
360 for(g = (struct group_statistics *) hsh_first(hash,&hi);
362 g = (struct group_statistics *) hsh_next(hash,&hi) )
364 lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
366 lz_numerator *= ( gp->ugs.n - gp->n_groups );
368 lz_denominator[v] *= (gp->n_groups - 1);
370 gp->levene = lz_numerator / lz_denominator[v] ;
374 /* Now clear up after ourselves */
375 free(lz_denominator);