1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 /* This module calculates the Levene statistic for variables.
38 Just for reference, the Levene Statistic is a defines as follows:
40 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
41 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
44 k is the number of groups
45 n is the total number of samples
46 n_i is the number of samples in the ith group
47 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
48 Z_{iL} is the mean of Z_{ij} over the ith group
49 Z_{LL} is the grand mean of Z_{ij}
51 Imagine calculating that with pencil and paper!
55 static struct group_statistics *get_group(int v, struct group_statistics *key);
58 static void levene_precalc (void *);
59 static int levene_calc (struct ccase *, void *);
60 static void levene_postcalc (void *);
64 static void levene2_precalc (void *);
65 static int levene2_calc (struct ccase *, void *);
66 static void levene2_postcalc (void *);
72 /* The number of groups */
75 /* Per group statistics */
76 struct t_test_proc **group_stats;
78 /* The independent variable */
79 struct variable *v_indep;
81 /* Number of dependent variables */
84 /* The dependent variables */
85 struct variable **v_dep;
87 /* How to treat missing values */
88 enum lev_missing missing;
90 /* Function to test for missing values */
91 is_missing_func is_missing;
98 levene(struct variable *v_indep, int n_dep, struct variable **v_dep,
99 enum lev_missing missing, is_missing_func value_is_missing)
101 struct levene_info l;
107 l.is_missing = value_is_missing;
109 procedure_with_splits (levene_precalc, levene_calc, levene_postcalc, &l);
110 procedure_with_splits (levene2_precalc, levene2_calc, levene2_postcalc, &l);
114 static struct hsh_table **hash;
116 /* Return -1 if the id of a is less than b; +1 if greater than and
119 compare_group(const struct group_statistics *a,
120 const struct group_statistics *b,
123 int id_cmp = compare_values(&a->id, &b->id, width);
128 c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
137 hash_group(const struct group_statistics *g, int width)
142 id_hash = hsh_hash_double (g->id.f);
144 id_hash = hsh_hash_bytes (g->id.s, width);
149 /* Internal variables used in calculating the Levene statistic */
151 /* Per variable statistics */
154 /* Total of all lz */
160 /* The total number of cases */
163 /* Number of groups */
167 /* An array of lz_stats for each variable */
168 static struct lz_stats *lz;
170 /* Set to 1 if the groups require inequality comparisions */
171 static int inequality_compare;
175 levene_precalc (void *_l)
178 struct levene_info *l = (struct levene_info *) _l;
180 lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
182 hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
184 for(i=0; i < l->n_dep ; ++i )
186 struct variable *v = l->v_dep[i];
188 int number_of_groups = v->p.t_t.n_groups ;
190 hash[i] = hsh_create (l->n_dep * number_of_groups,
191 (hsh_compare_func *) compare_group,
192 (hsh_hash_func *) hash_group,
193 0,(void *) l->v_indep->width);
195 lz[i].grand_total = 0;
197 lz[i].n_groups = number_of_groups;
199 for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
201 struct group_statistics *gs = &v->p.t_t.gs[g];
203 hsh_insert(hash[i], gs);
204 if ( gs->criterion != CMP_EQ )
206 inequality_compare = 1;
214 levene_calc (struct ccase *c, void *_l)
217 struct levene_info *l = (struct levene_info *) _l;
218 union value *gv = &c->data[l->v_indep->fv];
219 struct group_statistics key;
220 double weight = dict_get_case_weight(default_dict,c);
223 /* Skip the entire case if /MISSING=LISTWISE is set */
224 if ( l->missing == LEV_LISTWISE )
226 for (i = 0; i < l->n_dep; ++i)
228 struct variable *v = l->v_dep[i];
229 union value *val = &c->data[v->fv];
231 if (l->is_missing(val,v) )
240 key.criterion = CMP_EQ;
242 for (i = 0; i < l->n_dep; ++i)
244 struct variable *var = l->v_dep[i];
246 union value *v = &c->data[var->fv];
247 struct group_statistics *gs;
248 gs = get_group(i,&key);
252 if ( ! l->is_missing(v,var))
254 levene_z= fabs(v->f - gs->mean);
255 lz[i].grand_total += levene_z * weight;
256 lz[i].total_n += weight;
258 gs->lz_total += levene_z * weight;
266 levene_postcalc (void *_l)
270 struct levene_info *l = (struct levene_info *) _l;
272 for (v = 0; v < l->n_dep; ++v)
274 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
281 /* The denominator for the expression for the Levene */
282 static double *lz_denominator;
285 levene2_precalc (void *_l)
289 struct levene_info *l = (struct levene_info *) _l;
291 lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
293 /* This stuff could go in the first post calc . . . */
294 for (v = 0; v < l->n_dep; ++v)
296 struct hsh_iterator hi;
297 struct group_statistics *g;
298 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
300 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
302 g->lz_mean = g->lz_total/g->n ;
304 lz_denominator[v] = 0;
309 levene2_calc (struct ccase *c, void *_l)
313 struct levene_info *l = (struct levene_info *) _l;
315 double weight = dict_get_case_weight(default_dict,c);
317 union value *gv = &c->data[l->v_indep->fv];
318 struct group_statistics key;
320 /* Skip the entire case if /MISSING=LISTWISE is set */
321 if ( l->missing == LEV_LISTWISE )
323 for (i = 0; i < l->n_dep; ++i)
325 struct variable *v = l->v_dep[i];
326 union value *val = &c->data[v->fv];
328 if (l->is_missing(val,v) )
336 key.criterion = CMP_EQ;
338 for (i = 0; i < l->n_dep; ++i)
341 struct variable *var = l->v_dep[i] ;
342 union value *v = &c->data[var->fv];
343 struct group_statistics *gs;
344 gs = get_group(i,&key);
348 if ( ! l->is_missing(v,var) )
350 levene_z = fabs(v->f - gs->mean);
351 lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
360 levene2_postcalc (void *_l)
364 struct levene_info *l = (struct levene_info *) _l;
366 for (v = 0; v < l->n_dep; ++v)
368 double lz_numerator = 0;
369 struct hsh_iterator hi;
370 struct group_statistics *g;
371 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
373 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
376 lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
380 lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
381 l->v_dep[v]->p.t_t.n_groups );
383 lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
385 l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
388 /* Now clear up after ourselves */
389 free(lz_denominator);
390 for (v = 0; v < l->n_dep; ++v)
392 hsh_destroy(hash[v]);
400 /* Return the group belonging to the v_th dependent variable
401 which matches the key */
402 static struct group_statistics *
403 get_group(int v, struct group_statistics *key)
405 struct group_statistics *gs;
406 gs = hsh_find(hash[v],key);
409 if ( ( !gs ) && inequality_compare)
411 /* Here we degrade to a linear search.
412 This would seem inefficient. However, it should only ever happen
413 with the T-TEST, for which there are exactly two groups */
415 struct hsh_iterator hi;
417 assert( hsh_count(hash[v]) == 2 ) ;
418 for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
420 gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
424 cmp = compare_values(&gs->id, &key->id, 0);
426 assert( cmp != 0 ); /* or else the hash would have found something */
429 ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
434 ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )