X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flevene.c;h=7877c7ec6aa5fc039efb7fbedff900096977c8a8;hb=28d7aaf2db476de5d62eb90787fef50fec444287;hp=635c68689e93af7d872b7d184756e8f3122669e2;hpb=205ac3afa4c2b19c85819d8695abf3975bb11807;p=pspp-builds.git diff --git a/src/levene.c b/src/levene.c index 635c6868..7877c7ec 100644 --- a/src/levene.c +++ b/src/levene.c @@ -16,18 +16,23 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include "error.h" #include "levene.h" +#include "error.h" +#include "case.h" +#include "casefile.h" +#include "dictionary.h" +#include "group_proc.h" #include "hash.h" #include "str.h" #include "var.h" #include "vfm.h" #include "alloc.h" -#include "stats.h" +#include "misc.h" +#include "group.h" #include #include @@ -52,26 +57,10 @@ */ -static struct group_statistics *get_group(int v, struct group_statistics *key); - -/* First pass */ -static void levene_precalc (void *); -static int levene_calc (struct ccase *, void *); -static void levene_postcalc (void *); - - -/* Second pass */ -static void levene2_precalc (void *); -static int levene2_calc (struct ccase *, void *); -static void levene2_postcalc (void *); - struct levene_info { - /* The number of groups */ - int n_groups; - /* Per group statistics */ struct t_test_proc **group_stats; @@ -88,62 +77,58 @@ struct levene_info enum lev_missing missing; /* Function to test for missing values */ - is_missing_func is_missing; - + is_missing_func *is_missing; }; +/* First pass */ +static void levene_precalc (const struct levene_info *l); +static int levene_calc (const struct ccase *, void *); +static void levene_postcalc (void *); + + +/* Second pass */ +static void levene2_precalc (void *); +static int levene2_calc (const struct ccase *, void *); +static void levene2_postcalc (void *); void -levene(struct variable *v_indep, int n_dep, struct variable **v_dep, +levene(const struct casefile *cf, + struct variable *v_indep, int n_dep, struct variable **v_dep, enum lev_missing missing, is_missing_func value_is_missing) { + struct casereader *r; + struct ccase c; struct levene_info l; - l.n_dep = n_dep; - l.v_indep = v_indep; - l.v_dep = v_dep; - l.missing = missing; + l.n_dep = n_dep; + l.v_indep = v_indep; + l.v_dep = v_dep; + l.missing = missing; l.is_missing = value_is_missing; - procedure_with_splits (levene_precalc, levene_calc, levene_postcalc, &l); - procedure_with_splits (levene2_precalc, levene2_calc, levene2_postcalc, &l); - -} -static struct hsh_table **hash; - -/* Return -1 if the id of a is less than b; +1 if greater than and - 0 if equal */ -static int -compare_group(const struct group_statistics *a, - const struct group_statistics *b, - int width) -{ - int id_cmp = compare_values(&a->id, &b->id, width); - if (id_cmp == 0 ) + levene_precalc(&l); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) { - int c; - c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison)); - return c; + levene_calc(&c,&l); } - else - return id_cmp; -} - - -static unsigned -hash_group(const struct group_statistics *g, int width) -{ - unsigned id_hash; + casereader_destroy (r); + levene_postcalc(&l); - if ( 0 == width ) - id_hash = hsh_hash_double (g->id.f); - else - id_hash = hsh_hash_bytes (g->id.s, width); + levene2_precalc(&l); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + levene2_calc(&c,&l); + } + casereader_destroy (r); + levene2_postcalc(&l); - return id_hash; } /* Internal variables used in calculating the Levene statistic */ @@ -167,58 +152,46 @@ struct lz_stats /* An array of lz_stats for each variable */ static struct lz_stats *lz; -/* Set to 1 if the groups require inequality comparisions */ -static int inequality_compare; - static void -levene_precalc (void *_l) +levene_precalc (const struct levene_info *l) { int i; - struct levene_info *l = (struct levene_info *) _l; lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ; - hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep ); - - for(i=0; i < l->n_dep ; ++i ) + for(i = 0; i < l->n_dep ; ++i ) { - struct variable *v = l->v_dep[i]; - int g; - int number_of_groups = v->p.t_t.n_groups ; - - hash[i] = hsh_create (l->n_dep * number_of_groups, - (hsh_compare_func *) compare_group, - (hsh_hash_func *) hash_group, - 0,(void *) l->v_indep->width); + struct variable *var = l->v_dep[i]; + struct group_proc *gp = group_proc_get (var); + struct group_statistics *gs; + struct hsh_iterator hi; lz[i].grand_total = 0; lz[i].total_n = 0; - lz[i].n_groups = number_of_groups; + lz[i].n_groups = gp->n_groups ; - for (g = 0 ; g < v->p.t_t.n_groups ; ++g ) + + for ( gs = hsh_first(gp->group_hash, &hi); + gs != 0; + gs = hsh_next(gp->group_hash, &hi)) { - struct group_statistics *gs = &v->p.t_t.gs[g]; gs->lz_total = 0; - hsh_insert(hash[i], gs); - if ( gs->criterion != CMP_EQ ) - { - inequality_compare = 1; - } } + } } static int -levene_calc (struct ccase *c, void *_l) +levene_calc (const struct ccase *c, void *_l) { int i; + int warn = 0; struct levene_info *l = (struct levene_info *) _l; - union value *gv = &c->data[l->v_indep->fv]; + const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; - double weight = dict_get_case_weight(default_dict,c); - + double weight = dict_get_case_weight(default_dict,c,&warn); /* Skip the entire case if /MISSING=LISTWISE is set */ if ( l->missing == LEV_LISTWISE ) @@ -226,9 +199,9 @@ levene_calc (struct ccase *c, void *_l) for (i = 0; i < l->n_dep; ++i) { struct variable *v = l->v_dep[i]; - union value *val = &c->data[v->fv]; + const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing (&v->miss, val) ) { return 0; } @@ -237,19 +210,21 @@ levene_calc (struct ccase *c, void *_l) key.id = *gv; - key.criterion = CMP_EQ; for (i = 0; i < l->n_dep; ++i) { struct variable *var = l->v_dep[i]; + struct group_proc *gp = group_proc_get (var); double levene_z; - union value *v = &c->data[var->fv]; + const union value *v = case_data (c, var->fv); struct group_statistics *gs; - gs = get_group(i,&key); + + gs = hsh_find(gp->group_hash,(void *) &key ); + if ( 0 == gs ) continue ; - if ( ! l->is_missing(v,var)) + if ( ! l->is_missing(&var->miss, v)) { levene_z= fabs(v->f - gs->mean); lz[i].grand_total += levene_z * weight; @@ -257,6 +232,7 @@ levene_calc (struct ccase *c, void *_l) gs->lz_total += levene_z * weight; } + } return 0; } @@ -271,10 +247,11 @@ levene_postcalc (void *_l) for (v = 0; v < l->n_dep; ++v) { + /* This is Z_LL */ lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ; - } + } @@ -295,26 +272,32 @@ levene2_precalc (void *_l) { struct hsh_iterator hi; struct group_statistics *g; - for(g = (struct group_statistics *) hsh_first(hash[v],&hi); + + struct variable *var = l->v_dep[v] ; + struct hsh_table *hash = group_proc_get (var)->group_hash; + + + for(g = (struct group_statistics *) hsh_first(hash,&hi); g != 0 ; - g = (struct group_statistics *) hsh_next(hash[v],&hi) ) + g = (struct group_statistics *) hsh_next(hash,&hi) ) { - g->lz_mean = g->lz_total/g->n ; + g->lz_mean = g->lz_total / g->n ; } lz_denominator[v] = 0; } } static int -levene2_calc (struct ccase *c, void *_l) +levene2_calc (const struct ccase *c, void *_l) { int i; + int warn = 0; struct levene_info *l = (struct levene_info *) _l; - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&warn); - union value *gv = &c->data[l->v_indep->fv]; + const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; /* Skip the entire case if /MISSING=LISTWISE is set */ @@ -323,9 +306,9 @@ levene2_calc (struct ccase *c, void *_l) for (i = 0; i < l->n_dep; ++i) { struct variable *v = l->v_dep[i]; - union value *val = &c->data[v->fv]; + const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing(&v->miss, val) ) { return 0; } @@ -333,22 +316,23 @@ levene2_calc (struct ccase *c, void *_l) } key.id = *gv; - key.criterion = CMP_EQ; for (i = 0; i < l->n_dep; ++i) { double levene_z; struct variable *var = l->v_dep[i] ; - union value *v = &c->data[var->fv]; + const union value *v = case_data (c, var->fv); struct group_statistics *gs; - gs = get_group(i,&key); + + gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key ); + if ( 0 == gs ) continue; - if ( ! l->is_missing(v,var) ) + if ( ! l->is_missing (&var->miss, v) ) { levene_z = fabs(v->f - gs->mean); - lz_denominator[i] += weight * sqr(levene_z - gs->lz_mean); + lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); } } @@ -368,74 +352,27 @@ levene2_postcalc (void *_l) double lz_numerator = 0; struct hsh_iterator hi; struct group_statistics *g; - for(g = (struct group_statistics *) hsh_first(hash[v],&hi); + + struct variable *var = l->v_dep[v] ; + struct group_proc *gp = group_proc_get (var); + struct hsh_table *hash = gp->group_hash; + + for(g = (struct group_statistics *) hsh_first(hash,&hi); g != 0 ; - g = (struct group_statistics *) hsh_next(hash[v],&hi) ) + g = (struct group_statistics *) hsh_next(hash,&hi) ) { + lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean ); + } + lz_numerator *= ( gp->ugs.n - gp->n_groups ); - lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean ); - + lz_denominator[v] *= (gp->n_groups - 1); - } - lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n - - l->v_dep[v]->p.t_t.n_groups ); + gp->levene = lz_numerator / lz_denominator[v] ; - lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1); - - l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ; } /* Now clear up after ourselves */ free(lz_denominator); - for (v = 0; v < l->n_dep; ++v) - { - hsh_destroy(hash[v]); - } - - free(hash); free(lz); } - -/* Return the group belonging to the v_th dependent variable - which matches the key */ -static struct group_statistics * -get_group(int v, struct group_statistics *key) -{ - struct group_statistics *gs; - gs = hsh_find(hash[v],key); - - - if ( ( !gs ) && inequality_compare) - { - /* Here we degrade to a linear search. - This would seem inefficient. However, it should only ever happen - with the T-TEST, for which there are exactly two groups */ - - struct hsh_iterator hi; - - assert( hsh_count(hash[v]) == 2 ) ; - for(gs = (struct group_statistics *) hsh_first(hash[v],&hi); - gs != 0 ; - gs = (struct group_statistics *) hsh_next(hash[v],&hi) ) - { - int cmp; - - cmp = compare_values(&gs->id, &key->id, 0); - - assert( cmp != 0 ); /* or else the hash would have found something */ - - if ( cmp == -1 && - ( gs->criterion == CMP_GT || gs->criterion == CMP_GE ) - ) - break; - - if ( cmp == 1 && - ( gs->criterion == CMP_LT || gs->criterion == CMP_LE ) - ) - break; - } - } - - return gs; -}