1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
34 /* This module calculates the Levene statistic for variables.
36 Just for reference, the Levene Statistic is a defines as follows:
38 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
39 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
42 k is the number of groups
43 n is the total number of samples
44 n_i is the number of samples in the ith group
45 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
46 Z_{iL} is the mean of Z_{ij} over the ith group
47 Z_{LL} is the grand mean of Z_{ij}
49 Imagine calculating that with pencil and paper!
53 static void levene_precalc (void *);
54 static int levene_calc (struct ccase *, void *);
55 static void levene_postcalc (void *);
59 static void levene2_precalc (void *);
60 static int levene2_calc (struct ccase *, void *);
61 static void levene2_postcalc (void *);
67 /* The number of groups */
70 /* Per group statistics */
71 struct t_test_proc **group_stats;
73 /* The independent variable */
74 struct variable *v_indep;
76 /* Number of dependent variables */
79 /* The dependent variables */
80 struct variable **v_dep;
87 levene(struct variable *v_indep, int n_dep, struct variable **v_dep)
95 procedure(levene_precalc, levene_calc, levene_postcalc, &l);
96 procedure(levene2_precalc,levene2_calc,levene2_postcalc,&l);
100 static struct hsh_table **hash;
103 compare_group_id(const void *a_, const void *b_, void *aux)
105 const struct group_statistics *a = (struct group_statistics *) a_;
106 const struct group_statistics *b = (struct group_statistics *) b_;
108 int width = (int) aux;
110 return compare_values(&a->id, &b->id, width);
115 hash_group_id(const void *g_, void *aux)
117 const struct group_statistics *g = (struct group_statistics *) g_;
119 int width = (int) aux;
122 return hsh_hash_double (g->id.f);
124 return hsh_hash_bytes (g->id.s, width);
128 /* Internal variables used in calculating the Levene statistic */
130 /* Per variable statistics */
133 /* Total of all lz */
139 /* The total number of cases */
142 /* Number of groups */
146 /* An array of lz_stats for each variable */
147 static struct lz_stats *lz;
152 levene_precalc (void *_l)
155 struct levene_info *l = (struct levene_info *) _l;
157 lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
159 hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
161 for(i=0; i < l->n_dep ; ++i )
163 struct variable *v = l->v_dep[i];
165 int number_of_groups = v->p.t_t.n_groups ;
167 hash[i] = hsh_create (l->n_dep * number_of_groups,
168 compare_group_id, hash_group_id,
169 0,(void *) l->v_indep->width);
171 lz[i].grand_total = 0;
173 lz[i].n_groups = number_of_groups;
175 for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
177 struct group_statistics *gs = &v->p.t_t.gs[g];
179 hsh_insert(hash[i],gs);
186 levene_calc (struct ccase *c, void *_l)
189 struct levene_info *l = (struct levene_info *) _l;
190 union value *gv = &c->data[l->v_indep->fv];
191 struct group_statistics key;
192 double weight = dict_get_case_weight(default_dict,c);
196 for (var = 0; var < l->n_dep; ++var)
199 union value *v = &c->data[l->v_dep[var]->fv];
200 struct group_statistics *gs;
201 gs = hsh_find(hash[var],&key);
202 assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
204 /* FIXME: handle SYSMIS properly */
206 levene_z= fabs(v->f - gs->mean);
207 lz[var].grand_total += levene_z * weight;
208 lz[var].total_n += weight;
210 gs->lz_total += levene_z * weight;
218 levene_postcalc (void *_l)
222 struct levene_info *l = (struct levene_info *) _l;
224 for (v = 0; v < l->n_dep; ++v)
226 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
233 /* The denominator for the expression for the Levene */
234 static double *lz_denominator;
237 levene2_precalc (void *_l)
241 struct levene_info *l = (struct levene_info *) _l;
243 lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
245 /* This stuff could go in the first post calc . . . */
246 for (v = 0; v < l->n_dep; ++v)
248 struct hsh_iterator hi;
249 struct group_statistics *g;
250 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
252 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
254 g->lz_mean = g->lz_total/g->n ;
256 lz_denominator[v] = 0;
261 levene2_calc (struct ccase *c, void *_l)
265 struct levene_info *l = (struct levene_info *) _l;
267 double weight = dict_get_case_weight(default_dict,c);
269 union value *gv = &c->data[l->v_indep->fv];
270 struct group_statistics key;
274 for (var = 0; var < l->n_dep; ++var)
277 union value *v = &c->data[l->v_dep[var]->fv];
278 struct group_statistics *gs;
279 gs = hsh_find(hash[var],&key);
281 assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
283 /* FIXME: handle SYSMIS properly */
285 levene_z = fabs(v->f - gs->mean);
287 lz_denominator[var] += weight * sqr(levene_z - gs->lz_mean);
295 levene2_postcalc (void *_l)
299 struct levene_info *l = (struct levene_info *) _l;
301 for (v = 0; v < l->n_dep; ++v)
303 double lz_numerator = 0;
304 struct hsh_iterator hi;
305 struct group_statistics *g;
306 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
308 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
311 lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean );
315 lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
316 l->v_dep[v]->p.t_t.n_groups );
318 lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
320 l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
323 /* Now clear up after ourselves */
324 free(lz_denominator);
325 for (v = 0; v < l->n_dep; ++v)
327 hsh_destroy(hash[v]);