1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
39 /* This module calculates the Levene statistic for variables.
41 Just for reference, the Levene Statistic is a defines as follows:
43 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
44 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
47 k is the number of groups
48 n is the total number of samples
49 n_i is the number of samples in the ith group
50 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
51 Z_{iL} is the mean of Z_{ij} over the ith group
52 Z_{LL} is the grand mean of Z_{ij}
54 Imagine calculating that with pencil and paper!
58 static struct group_statistics *get_group(int v, struct group_statistics *key);
64 /* Per group statistics */
65 struct t_test_proc **group_stats;
67 /* The independent variable */
68 struct variable *v_indep;
70 /* Number of dependent variables */
73 /* The dependent variables */
74 struct variable **v_dep;
76 /* How to treat missing values */
77 enum lev_missing missing;
79 /* Function to test for missing values */
80 is_missing_func is_missing;
85 static void levene_precalc (const struct levene_info *l);
86 static int levene_calc (const struct ccase *, void *);
87 static void levene_postcalc (void *);
91 static void levene2_precalc (void *);
92 static int levene2_calc (const struct ccase *, void *);
93 static void levene2_postcalc (void *);
97 levene(const struct casefile *cf,
98 struct variable *v_indep, int n_dep, struct variable **v_dep,
99 enum lev_missing missing, is_missing_func value_is_missing)
101 struct casereader *r;
103 struct levene_info l;
109 l.is_missing = value_is_missing;
114 for(r = casefile_get_reader (cf);
115 casereader_read (r, &c) ;
120 casereader_destroy (r);
124 for(r = casefile_get_reader (cf);
125 casereader_read (r, &c) ;
130 casereader_destroy (r);
131 levene2_postcalc(&l);
135 static struct hsh_table **hash;
137 /* Internal variables used in calculating the Levene statistic */
139 /* Per variable statistics */
142 /* Total of all lz */
148 /* The total number of cases */
151 /* Number of groups */
155 /* An array of lz_stats for each variable */
156 static struct lz_stats *lz;
158 /* Set to 1 if the groups require inequality comparisions */
159 static int inequality_compare;
163 levene_precalc (const struct levene_info *l)
167 lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
169 hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
171 for(i=0; i < l->n_dep ; ++i )
173 struct variable *v = l->v_dep[i];
175 int number_of_groups = v->p.grp_data.n_groups ;
177 hash[i] = hsh_create (l->n_dep * number_of_groups,
178 (hsh_compare_func *) compare_group,
179 (hsh_hash_func *) hash_group,
180 0,(void *) l->v_indep->width);
182 lz[i].grand_total = 0;
184 lz[i].n_groups = number_of_groups;
186 for (g = 0 ; g < v->p.grp_data.n_groups ; ++g )
188 struct group_statistics *gs = &v->p.grp_data.gs[g];
190 hsh_insert(hash[i], gs);
191 if ( gs->criterion != CMP_EQ )
193 inequality_compare = 1;
201 levene_calc (const struct ccase *c, void *_l)
205 struct levene_info *l = (struct levene_info *) _l;
206 const union value *gv = case_data (c, l->v_indep->fv);
207 struct group_statistics key;
208 double weight = dict_get_case_weight(default_dict,c,&warn);
211 /* Skip the entire case if /MISSING=LISTWISE is set */
212 if ( l->missing == LEV_LISTWISE )
214 for (i = 0; i < l->n_dep; ++i)
216 struct variable *v = l->v_dep[i];
217 const union value *val = case_data (c, v->fv);
219 if (l->is_missing(val,v) )
228 key.criterion = CMP_EQ;
230 for (i = 0; i < l->n_dep; ++i)
232 struct variable *var = l->v_dep[i];
234 const union value *v = case_data (c, var->fv);
235 struct group_statistics *gs;
236 gs = get_group(i,&key);
240 if ( ! l->is_missing(v,var))
242 levene_z= fabs(v->f - gs->mean);
243 lz[i].grand_total += levene_z * weight;
244 lz[i].total_n += weight;
246 gs->lz_total += levene_z * weight;
254 levene_postcalc (void *_l)
258 struct levene_info *l = (struct levene_info *) _l;
260 for (v = 0; v < l->n_dep; ++v)
262 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
269 /* The denominator for the expression for the Levene */
270 static double *lz_denominator;
273 levene2_precalc (void *_l)
277 struct levene_info *l = (struct levene_info *) _l;
279 lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
281 /* This stuff could go in the first post calc . . . */
282 for (v = 0; v < l->n_dep; ++v)
284 struct hsh_iterator hi;
285 struct group_statistics *g;
286 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
288 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
290 g->lz_mean = g->lz_total/g->n ;
292 lz_denominator[v] = 0;
297 levene2_calc (const struct ccase *c, void *_l)
302 struct levene_info *l = (struct levene_info *) _l;
304 double weight = dict_get_case_weight(default_dict,c,&warn);
306 const union value *gv = case_data (c, l->v_indep->fv);
307 struct group_statistics key;
309 /* Skip the entire case if /MISSING=LISTWISE is set */
310 if ( l->missing == LEV_LISTWISE )
312 for (i = 0; i < l->n_dep; ++i)
314 struct variable *v = l->v_dep[i];
315 const union value *val = case_data (c, v->fv);
317 if (l->is_missing(val,v) )
325 key.criterion = CMP_EQ;
327 for (i = 0; i < l->n_dep; ++i)
330 struct variable *var = l->v_dep[i] ;
331 const union value *v = case_data (c, var->fv);
332 struct group_statistics *gs;
333 gs = get_group(i,&key);
337 if ( ! l->is_missing(v,var) )
339 levene_z = fabs(v->f - gs->mean);
340 lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
349 levene2_postcalc (void *_l)
353 struct levene_info *l = (struct levene_info *) _l;
355 for (v = 0; v < l->n_dep; ++v)
357 double lz_numerator = 0;
358 struct hsh_iterator hi;
359 struct group_statistics *g;
360 for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
362 g = (struct group_statistics *) hsh_next(hash[v],&hi) )
365 lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
369 lz_numerator *= ( l->v_dep[v]->p.grp_data.ugs.n -
370 l->v_dep[v]->p.grp_data.n_groups );
372 lz_denominator[v] /= (l->v_dep[v]->p.grp_data.n_groups - 1);
374 l->v_dep[v]->p.grp_data.levene = lz_numerator/lz_denominator[v] ;
377 /* Now clear up after ourselves */
378 free(lz_denominator);
379 for (v = 0; v < l->n_dep; ++v)
381 hsh_destroy(hash[v]);
389 /* Return the group belonging to the v_th dependent variable
390 which matches the key */
391 static struct group_statistics *
392 get_group(int v, struct group_statistics *key)
394 struct group_statistics *gs;
395 gs = hsh_find(hash[v],key);
398 if ( ( !gs ) && inequality_compare)
400 /* Here we degrade to a linear search.
401 This would seem inefficient. However, it should only ever happen
402 with the T-TEST, for which there are exactly two groups */
404 struct hsh_iterator hi;
406 assert( hsh_count(hash[v]) == 2 ) ;
407 for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
409 gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
413 cmp = compare_values(&gs->id, &key->id, 0);
415 assert( cmp != 0 ); /* or else the hash would have found something */
418 ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
423 ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )