X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flevene.c;h=7bd582105f940c28ffeb26506d4c5fe941ff728d;hb=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=86a32e7ab92a380763ececcc1984955eff768a10;hpb=81fff61a96bece351e381ad3fef8ab1248a952ba;p=pspp-builds.git diff --git a/src/math/levene.c b/src/math/levene.c index 86a32e7a..7bd58210 100644 --- a/src/math/levene.c +++ b/src/math/levene.c @@ -1,42 +1,38 @@ -/* This file is part of GNU PSPP - Computes Levene test statistic. +/* PSPP - a program for statistical analysis. + Copyright (C) 2004, 2009 Free Software Foundation, Inc. - Copyright (C) 2004 Free Software Foundation, Inc. - Written by John Darrington + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include "levene.h" #include #include -#include +#include #include #include "group-proc.h" #include #include #include #include -#include #include #include "group.h" #include #include +#include "xalloc.h" + /* This module calculates the Levene statistic for variables. @@ -65,73 +61,24 @@ struct levene_info struct t_test_proc **group_stats; /* The independent variable */ - struct variable *v_indep; + const struct variable *v_indep; /* Number of dependent variables */ size_t n_dep; /* The dependent variables */ - struct variable **v_dep; + const struct variable **v_dep; - /* How to treat missing values */ - enum lev_missing missing; + /* Filter for missing values */ + enum mv_class exclude; - /* Function to test for missing values */ - is_missing_func *is_missing; -}; - -/* First pass */ -static void levene_precalc (const struct levene_info *l); -static int levene_calc (const struct ccase *, void *); -static void levene_postcalc (void *); - - -/* Second pass */ -static void levene2_precalc (void *); -static int levene2_calc (const struct ccase *, void *); -static void levene2_postcalc (void *); - - -void -levene(const struct casefile *cf, - struct variable *v_indep, size_t n_dep, struct variable **v_dep, - enum lev_missing missing, is_missing_func value_is_missing) -{ - struct casereader *r; - struct ccase c; - struct levene_info l; - - l.n_dep = n_dep; - l.v_indep = v_indep; - l.v_dep = v_dep; - l.missing = missing; - l.is_missing = value_is_missing; + /* An array of lz_stats for each variable */ + struct lz_stats *lz; + /* The denominator for the expression for the Levene */ + double *lz_denominator; - - levene_precalc(&l); - for(r = casefile_get_reader (cf); - casereader_read (r, &c) ; - case_destroy (&c)) - { - levene_calc(&c,&l); - } - casereader_destroy (r); - levene_postcalc(&l); - - levene2_precalc(&l); - for(r = casefile_get_reader (cf); - casereader_read (r, &c) ; - case_destroy (&c)) - { - levene2_calc(&c,&l); - } - casereader_destroy (r); - levene2_postcalc(&l); - -} - -/* Internal variables used in calculating the Levene statistic */ +}; /* Per variable statistics */ struct lz_stats @@ -143,137 +90,159 @@ struct lz_stats double grand_mean; /* The total number of cases */ - double total_n ; + double total_n ; /* Number of groups */ int n_groups; }; -/* An array of lz_stats for each variable */ -static struct lz_stats *lz; +/* First pass */ +static void levene_precalc (const struct levene_info *l); +static int levene_calc (const struct dictionary *dict, const struct ccase *, + const struct levene_info *l); +static void levene_postcalc (struct levene_info *); + + +/* Second pass */ +static void levene2_precalc (struct levene_info *l); +static int levene2_calc (const struct dictionary *, const struct ccase *, + struct levene_info *l); +static void levene2_postcalc (struct levene_info *); + + +void +levene(const struct dictionary *dict, + struct casereader *reader, + const struct variable *v_indep, size_t n_dep, + const struct variable **v_dep, + enum mv_class exclude) +{ + struct casereader *pass1, *pass2; + struct ccase *c; + struct levene_info l; + + l.n_dep = n_dep; + l.v_indep = v_indep; + l.v_dep = v_dep; + l.exclude = exclude; + l.lz = xnmalloc (l.n_dep, sizeof *l.lz); + l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator); + + casereader_split (reader, &pass1, &pass2); + levene_precalc (&l); + for (; (c = casereader_read (pass1)) != NULL; case_unref (c)) + levene_calc (dict, c, &l); + casereader_destroy (pass1); + levene_postcalc (&l); + + levene2_precalc(&l); + for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) + levene2_calc (dict, c, &l); + casereader_destroy (pass2); + levene2_postcalc (&l); -static void + free (l.lz_denominator); + free (l.lz); +} + +static void levene_precalc (const struct levene_info *l) { size_t i; - lz = xnmalloc (l->n_dep, sizeof *lz); - - for(i = 0; i < l->n_dep ; ++i ) + for(i = 0; i < l->n_dep ; ++i ) { - struct variable *var = l->v_dep[i]; + const struct variable *var = l->v_dep[i]; struct group_proc *gp = group_proc_get (var); struct group_statistics *gs; struct hsh_iterator hi; - lz[i].grand_total = 0; - lz[i].total_n = 0; - lz[i].n_groups = gp->n_groups ; + l->lz[i].grand_total = 0; + l->lz[i].total_n = 0; + l->lz[i].n_groups = gp->n_groups ; + - for ( gs = hsh_first(gp->group_hash, &hi); gs != 0; gs = hsh_next(gp->group_hash, &hi)) { gs->lz_total = 0; } - + } } -static int -levene_calc (const struct ccase *c, void *_l) +static int +levene_calc (const struct dictionary *dict, const struct ccase *c, + const struct levene_info *l) { size_t i; - int warn = 0; - struct levene_info *l = (struct levene_info *) _l; - const union value *gv = case_data (c, l->v_indep->fv); + bool warn = false; + const union value *gv = case_data (c, l->v_indep); struct group_statistics key; - double weight = dict_get_case_weight(default_dict,c,&warn); - - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( l->missing == LEV_LISTWISE ) - { - for (i = 0; i < l->n_dep; ++i) - { - struct variable *v = l->v_dep[i]; - const union value *val = case_data (c, v->fv); + double weight = dict_get_case_weight (dict, c, &warn); - if (l->is_missing (&v->miss, val) ) - { - return 0; - } - } - } - - key.id = *gv; - for (i = 0; i < l->n_dep; ++i) + for (i = 0; i < l->n_dep; ++i) { - struct variable *var = l->v_dep[i]; + const struct variable *var = l->v_dep[i]; struct group_proc *gp = group_proc_get (var); double levene_z; - const union value *v = case_data (c, var->fv); + const union value *v = case_data (c, var); struct group_statistics *gs; gs = hsh_find(gp->group_hash,(void *) &key ); - if ( 0 == gs ) + if ( 0 == gs ) continue ; - if ( ! l->is_missing(&var->miss, v)) + if ( !var_is_value_missing (var, v, l->exclude)) { levene_z= fabs(v->f - gs->mean); - lz[i].grand_total += levene_z * weight; - lz[i].total_n += weight; + l->lz[i].grand_total += levene_z * weight; + l->lz[i].total_n += weight; gs->lz_total += levene_z * weight; } - } return 0; } -static void -levene_postcalc (void *_l) +static void +levene_postcalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - - for (v = 0; v < l->n_dep; ++v) + for (v = 0; v < l->n_dep; ++v) { /* This is Z_LL */ - lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ; + l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ; } - + } -/* The denominator for the expression for the Levene */ -static double *lz_denominator; -static void -levene2_precalc (void *_l) +static void +levene2_precalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - - lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator); /* This stuff could go in the first post calc . . . */ - for (v = 0; v < l->n_dep; ++v) + for (v = 0; + v < l->n_dep; + ++v) { struct hsh_iterator hi; struct group_statistics *g; - struct variable *var = l->v_dep[v] ; + const struct variable *var = l->v_dep[v] ; struct hsh_table *hash = group_proc_get (var)->group_hash; @@ -283,56 +252,40 @@ levene2_precalc (void *_l) { g->lz_mean = g->lz_total / g->n ; } - lz_denominator[v] = 0; + l->lz_denominator[v] = 0; } } -static int -levene2_calc (const struct ccase *c, void *_l) +static int +levene2_calc (const struct dictionary *dict, const struct ccase *c, + struct levene_info *l) { size_t i; - int warn = 0; - - struct levene_info *l = (struct levene_info *) _l; + bool warn = false; - double weight = dict_get_case_weight(default_dict,c,&warn); + double weight = dict_get_case_weight (dict, c, &warn); - const union value *gv = case_data (c, l->v_indep->fv); + const union value *gv = case_data (c, l->v_indep); struct group_statistics key; - /* Skip the entire case if /MISSING=LISTWISE is set */ - if ( l->missing == LEV_LISTWISE ) - { - for (i = 0; i < l->n_dep; ++i) - { - struct variable *v = l->v_dep[i]; - const union value *val = case_data (c, v->fv); - - if (l->is_missing(&v->miss, val) ) - { - return 0; - } - } - } - key.id = *gv; - for (i = 0; i < l->n_dep; ++i) + for (i = 0; i < l->n_dep; ++i) { double levene_z; - struct variable *var = l->v_dep[i] ; - const union value *v = case_data (c, var->fv); + const struct variable *var = l->v_dep[i] ; + const union value *v = case_data (c, var); struct group_statistics *gs; gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key ); - if ( 0 == gs ) + if ( 0 == gs ) continue; - if ( ! l->is_missing (&var->miss, v) ) + if ( !var_is_value_missing (var, v, l->exclude)) { - levene_z = fabs(v->f - gs->mean); - lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); + levene_z = fabs(v->f - gs->mean); + l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean); } } @@ -340,20 +293,18 @@ levene2_calc (const struct ccase *c, void *_l) } -static void -levene2_postcalc (void *_l) +static void +levene2_postcalc (struct levene_info *l) { size_t v; - struct levene_info *l = (struct levene_info *) _l; - - for (v = 0; v < l->n_dep; ++v) + for (v = 0; v < l->n_dep; ++v) { double lz_numerator = 0; struct hsh_iterator hi; struct group_statistics *g; - struct variable *var = l->v_dep[v] ; + const struct variable *var = l->v_dep[v] ; struct group_proc *gp = group_proc_get (var); struct hsh_table *hash = gp->group_hash; @@ -361,18 +312,14 @@ levene2_postcalc (void *_l) g != 0 ; g = (struct group_statistics *) hsh_next(hash,&hi) ) { - lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean ); + lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean ); } lz_numerator *= ( gp->ugs.n - gp->n_groups ); - lz_denominator[v] *= (gp->n_groups - 1); + l->lz_denominator[v] *= (gp->n_groups - 1); - gp->levene = lz_numerator / lz_denominator[v] ; + gp->levene = lz_numerator / l->lz_denominator[v] ; } - - /* Now clear up after ourselves */ - free(lz_denominator); - free(lz); }