1 /* This file is part of GNU PSPP
2 Computes Levene test statistic.
4 Copyright (C) 2004 Free Software Foundation, Inc.
5 Written by John Darrington <john@darrington.wattle.id.au>
7 This program is free software; you can redistribute it and/or
8 modify it under the terms of the GNU General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <libpspp/message.h>
25 #include <data/case.h>
26 #include <data/casefile.h>
27 #include <data/dictionary.h>
28 #include "group-proc.h"
29 #include <libpspp/hash.h>
30 #include <libpspp/str.h>
31 #include <data/variable.h>
32 #include <data/procedure.h>
33 #include <data/casefilter.h>
34 #include <libpspp/alloc.h>
35 #include <libpspp/misc.h>
42 /* This module calculates the Levene statistic for variables.
44 Just for reference, the Levene Statistic is a defines as follows:
46 W = \frac{ (n-k)\sum_{i=1}^k n_i(Z_{iL} - Z_{LL})^2}
47 { (k-1)\sum_{i=1}^k \sum_{j=1}^{n_i} (Z_{ij} - Z_{iL})^2}
50 k is the number of groups
51 n is the total number of samples
52 n_i is the number of samples in the ith group
53 Z_{ij} is | Y_{ij} - Y_{iL} | where Y_{iL} is the mean of the ith group
54 Z_{iL} is the mean of Z_{ij} over the ith group
55 Z_{LL} is the grand mean of Z_{ij}
57 Imagine calculating that with pencil and paper!
65 /* Per group statistics */
66 struct t_test_proc **group_stats;
68 /* The independent variable */
69 struct variable *v_indep;
71 /* Number of dependent variables */
74 /* The dependent variables */
75 struct variable **v_dep;
77 /* Filter for missing values */
78 struct casefilter *filter;
82 static void levene_precalc (const struct levene_info *l);
83 static int levene_calc (const struct dictionary *dict, const struct ccase *,
84 const struct levene_info *l);
85 static void levene_postcalc (void *);
89 static void levene2_precalc (struct levene_info *l);
90 static int levene2_calc (const struct dictionary *, const struct ccase *,
91 struct levene_info *l);
92 static void levene2_postcalc (void *);
96 levene(const struct dictionary *dict,
97 const struct casefile *cf,
98 struct variable *v_indep, size_t n_dep, struct variable **v_dep,
99 struct casefilter *filter)
101 struct casereader *r;
103 struct levene_info l;
112 for(r = casefile_get_reader (cf, filter);
113 casereader_read (r, &c) ;
116 levene_calc (dict, &c, &l);
118 casereader_destroy (r);
119 levene_postcalc (&l);
122 for(r = casefile_get_reader (cf, filter);
123 casereader_read (r, &c) ;
126 levene2_calc (dict, &c,&l);
128 casereader_destroy (r);
129 levene2_postcalc (&l);
132 /* Internal variables used in calculating the Levene statistic */
134 /* Per variable statistics */
137 /* Total of all lz */
143 /* The total number of cases */
146 /* Number of groups */
150 /* An array of lz_stats for each variable */
151 static struct lz_stats *lz;
155 levene_precalc (const struct levene_info *l)
159 lz = xnmalloc (l->n_dep, sizeof *lz);
161 for(i = 0; i < l->n_dep ; ++i )
163 struct variable *var = l->v_dep[i];
164 struct group_proc *gp = group_proc_get (var);
165 struct group_statistics *gs;
166 struct hsh_iterator hi;
168 lz[i].grand_total = 0;
170 lz[i].n_groups = gp->n_groups ;
173 for ( gs = hsh_first(gp->group_hash, &hi);
175 gs = hsh_next(gp->group_hash, &hi))
185 levene_calc (const struct dictionary *dict, const struct ccase *c,
186 const struct levene_info *l)
190 const union value *gv = case_data (c, l->v_indep->fv);
191 struct group_statistics key;
192 double weight = dict_get_case_weight (dict, c, &warn);
196 for (i = 0; i < l->n_dep; ++i)
198 struct variable *var = l->v_dep[i];
199 struct group_proc *gp = group_proc_get (var);
201 const union value *v = case_data (c, var->fv);
202 struct group_statistics *gs;
204 gs = hsh_find(gp->group_hash,(void *) &key );
209 if ( ! casefilter_variable_missing (l->filter, c, var))
211 levene_z= fabs(v->f - gs->mean);
212 lz[i].grand_total += levene_z * weight;
213 lz[i].total_n += weight;
215 gs->lz_total += levene_z * weight;
223 levene_postcalc (void *_l)
227 struct levene_info *l = (struct levene_info *) _l;
229 for (v = 0; v < l->n_dep; ++v)
232 lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
240 /* The denominator for the expression for the Levene */
241 static double *lz_denominator = 0;
244 levene2_precalc (struct levene_info *l)
248 lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator);
250 /* This stuff could go in the first post calc . . . */
255 struct hsh_iterator hi;
256 struct group_statistics *g;
258 struct variable *var = l->v_dep[v] ;
259 struct hsh_table *hash = group_proc_get (var)->group_hash;
262 for(g = (struct group_statistics *) hsh_first(hash,&hi);
264 g = (struct group_statistics *) hsh_next(hash,&hi) )
266 g->lz_mean = g->lz_total / g->n ;
268 lz_denominator[v] = 0;
273 levene2_calc (const struct dictionary *dict, const struct ccase *c,
274 struct levene_info *l)
279 double weight = dict_get_case_weight (dict, c, &warn);
281 const union value *gv = case_data (c, l->v_indep->fv);
282 struct group_statistics key;
286 for (i = 0; i < l->n_dep; ++i)
289 struct variable *var = l->v_dep[i] ;
290 const union value *v = case_data (c, var->fv);
291 struct group_statistics *gs;
293 gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
298 if ( ! casefilter_variable_missing (l->filter, c, var))
301 levene_z = fabs(v->f - gs->mean);
302 lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
311 levene2_postcalc (void *_l)
315 struct levene_info *l = (struct levene_info *) _l;
317 for (v = 0; v < l->n_dep; ++v)
319 double lz_numerator = 0;
320 struct hsh_iterator hi;
321 struct group_statistics *g;
323 struct variable *var = l->v_dep[v] ;
324 struct group_proc *gp = group_proc_get (var);
325 struct hsh_table *hash = gp->group_hash;
327 for(g = (struct group_statistics *) hsh_first(hash,&hi);
329 g = (struct group_statistics *) hsh_next(hash,&hi) )
331 lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
333 lz_numerator *= ( gp->ugs.n - gp->n_groups );
335 lz_denominator[v] *= (gp->n_groups - 1);
337 gp->levene = lz_numerator / lz_denominator[v] ;
341 /* Now clear up after ourselves */
342 free(lz_denominator);