1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "libpspp/misc.h"
23 #include "libpspp/hmap.h"
24 #include "data/value.h"
25 #include "data/val-type.h"
27 #include <gl/xalloc.h>
32 struct hmap_node node;
40 typedef unsigned int hash_func (const struct levene *, const union value *v);
41 typedef bool cmp_func (const struct levene *, const union value *v0, const union value *v1);
45 /* Width of the categorical variable */
48 /* The value dividing the groups. Valid only for dichotomous categorical variable.*/
49 const union value *cutpoint;
52 /* A hashtable of struct lev objects indexed by union value */
59 /* A state variable indicating how many passes have been done */
70 unique_hash (const struct levene *nl, const union value *val)
72 return value_hash (val, nl->gvw, 0);
76 unique_cmp (const struct levene *nl, const union value *val0, const union value *val1)
78 return value_equal (val0, val1, nl->gvw);
82 cutpoint_hash (const struct levene *nl, const union value *val)
84 int x = value_compare_3way (val, nl->cutpoint, nl->gvw);
90 cutpoint_cmp (const struct levene *nl, const union value *val0, const union value *val1)
92 int x = value_compare_3way (val0, nl->cutpoint, nl->gvw);
94 int y = value_compare_3way (val1, nl->cutpoint, nl->gvw);
105 find_group (const struct levene *nl, const union value *target)
107 struct lev *l = NULL;
109 HMAP_FOR_EACH_WITH_HASH (l, struct lev, node, nl->hash (nl, target), &nl->hmap)
111 if (nl->cmp (nl, &l->group, target))
119 levene_create (int indep_width, const union value *cutpoint)
121 struct levene *nl = XZALLOC (struct levene);
123 hmap_init (&nl->hmap);
125 nl->gvw = indep_width;
126 nl->cutpoint = cutpoint;
128 nl->hash = cutpoint ? cutpoint_hash : unique_hash;
129 nl->cmp = cutpoint ? cutpoint_cmp : unique_cmp;
135 /* Data accumulation. First pass */
137 levene_pass_one (struct levene *nl, double value, double weight, const union value *gv)
139 struct lev *lev = find_group (nl, gv);
145 assert (nl->pass == 1);
149 struct lev *l = XZALLOC (struct lev);
150 value_clone (&l->group, gv, nl->gvw);
151 hmap_insert (&nl->hmap, &l->node, nl->hash (nl, &l->group));
156 lev->t_bar += value * weight;
158 nl->grand_n += weight;
161 /* Data accumulation. Second pass */
163 levene_pass_two (struct levene *nl, double value, double weight, const union value *gv)
165 struct lev *lev = NULL;
174 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
179 assert (nl->pass == 2);
181 lev = find_group (nl, gv);
183 lev->z_mean += fabs (value - lev->t_bar) * weight;
184 nl->z_grand_mean += fabs (value - lev->t_bar) * weight;
187 /* Data accumulation. Third pass */
189 levene_pass_three (struct levene *nl, double value, double weight, const union value *gv)
192 struct lev *lev = NULL;
201 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
206 nl->z_grand_mean /= nl->grand_n;
209 assert (nl->pass == 3);
210 lev = find_group (nl, gv);
212 z = fabs (value - lev->t_bar);
213 nl->denominator += pow2 (z - lev->z_mean) * weight;
217 /* Return the value of the levene statistic */
219 levene_calculate (struct levene *nl)
224 double numerator = 0.0;
227 /* The Levene calculation requires three passes.
228 Normally this should have been done prior to calling this function.
229 However, in abnormal circumstances (eg. the dataset is empty) there
230 will have been no passes.
232 assert (nl->pass == 0 || nl->pass == 3);
237 nl->denominator *= hmap_count (&nl->hmap) - 1;
239 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
241 numerator += l->n * pow2 (l->z_mean - nl->z_grand_mean);
245 numerator *= nn - hmap_count (&nl->hmap);
247 return numerator / nl->denominator;
251 levene_destroy (struct levene *nl)
256 HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
258 value_destroy (&l->group, nl->gvw);
262 hmap_destroy (&nl->hmap);