X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flevene.c;h=635c68689e93af7d872b7d184756e8f3122669e2;hb=97d6c6f6b1922621ca013668eba9a9a9f71d60fe;hp=67e52d38300f732f37d066c0f637855bfcf80d7a;hpb=f9d47b5bba8416419cf3bcd3aa23c2d40a05fcac;p=pspp diff --git a/src/levene.c b/src/levene.c index 67e52d3830..635c68689e 100644 --- a/src/levene.c +++ b/src/levene.c @@ -20,9 +20,10 @@ 02111-1307, USA. */ #include -#include +#include "error.h" #include "levene.h" #include "hash.h" +#include "str.h" #include "var.h" #include "vfm.h" #include "alloc.h" @@ -51,6 +52,9 @@ */ +static struct group_statistics *get_group(int v, struct group_statistics *key); + +/* First pass */ static void levene_precalc (void *); static int levene_calc (struct ccase *, void *); static void levene_postcalc (void *); @@ -80,50 +84,66 @@ struct levene_info /* The dependent variables */ struct variable **v_dep; + /* How to treat missing values */ + enum lev_missing missing; + + /* Function to test for missing values */ + is_missing_func is_missing; + }; void -levene(struct variable *v_indep, int n_dep, struct variable **v_dep) +levene(struct variable *v_indep, int n_dep, struct variable **v_dep, + enum lev_missing missing, is_missing_func value_is_missing) { struct levene_info l; - l.n_dep=n_dep; - l.v_indep=v_indep; - l.v_dep=v_dep; + l.n_dep = n_dep; + l.v_indep = v_indep; + l.v_dep = v_dep; + l.missing = missing; + l.is_missing = value_is_missing; - procedure(levene_precalc, levene_calc, levene_postcalc, &l); - procedure(levene2_precalc,levene2_calc,levene2_postcalc,&l); + procedure_with_splits (levene_precalc, levene_calc, levene_postcalc, &l); + procedure_with_splits (levene2_precalc, levene2_calc, levene2_postcalc, &l); } static struct hsh_table **hash; +/* Return -1 if the id of a is less than b; +1 if greater than and + 0 if equal */ static int -compare_group_id(const void *a_, const void *b_, void *aux) +compare_group(const struct group_statistics *a, + const struct group_statistics *b, + int width) { - const struct group_statistics *a = (struct group_statistics *) a_; - const struct group_statistics *b = (struct group_statistics *) b_; + int id_cmp = compare_values(&a->id, &b->id, width); - int width = (int) aux; - - return compare_values(&a->id, &b->id, width); + if (id_cmp == 0 ) + { + int c; + c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison)); + return c; + } + else + return id_cmp; } static unsigned -hash_group_id(const void *g_, void *aux) +hash_group(const struct group_statistics *g, int width) { - const struct group_statistics *g = (struct group_statistics *) g_; - - int width = (int) aux; + unsigned id_hash; if ( 0 == width ) - return hsh_hash_double (g->id.f); + id_hash = hsh_hash_double (g->id.f); else - return hsh_hash_bytes (g->id.s, width); + id_hash = hsh_hash_bytes (g->id.s, width); + return id_hash; } /* Internal variables used in calculating the Levene statistic */ @@ -147,6 +167,8 @@ struct lz_stats /* An array of lz_stats for each variable */ static struct lz_stats *lz; +/* Set to 1 if the groups require inequality comparisions */ +static int inequality_compare; static void @@ -166,7 +188,8 @@ levene_precalc (void *_l) int number_of_groups = v->p.t_t.n_groups ; hash[i] = hsh_create (l->n_dep * number_of_groups, - compare_group_id, hash_group_id, + (hsh_compare_func *) compare_group, + (hsh_hash_func *) hash_group, 0,(void *) l->v_indep->width); lz[i].grand_total = 0; @@ -176,8 +199,12 @@ levene_precalc (void *_l) for (g = 0 ; g < v->p.t_t.n_groups ; ++g ) { struct group_statistics *gs = &v->p.t_t.gs[g]; - gs->lz_total=0; - hsh_insert(hash[i],gs); + gs->lz_total = 0; + hsh_insert(hash[i], gs); + if ( gs->criterion != CMP_EQ ) + { + inequality_compare = 1; + } } } @@ -186,30 +213,50 @@ levene_precalc (void *_l) static int levene_calc (struct ccase *c, void *_l) { - int var; + int i; struct levene_info *l = (struct levene_info *) _l; union value *gv = &c->data[l->v_indep->fv]; struct group_statistics key; double weight = dict_get_case_weight(default_dict,c); + + + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( l->missing == LEV_LISTWISE ) + { + for (i = 0; i < l->n_dep; ++i) + { + struct variable *v = l->v_dep[i]; + union value *val = &c->data[v->fv]; + + if (l->is_missing(val,v) ) + { + return 0; + } + } + } + key.id = *gv; + key.criterion = CMP_EQ; - for (var = 0; var < l->n_dep; ++var) + for (i = 0; i < l->n_dep; ++i) { + struct variable *var = l->v_dep[i]; double levene_z; - union value *v = &c->data[l->v_dep[var]->fv]; + union value *v = &c->data[var->fv]; struct group_statistics *gs; - gs = hsh_find(hash[var],&key); - assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width)); - - /* FIXME: handle SYSMIS properly */ + gs = get_group(i,&key); + if ( 0 == gs ) + continue ; - levene_z= fabs(v->f - gs->mean); - lz[var].grand_total += levene_z * weight; - lz[var].total_n += weight; - - gs->lz_total += levene_z * weight; + if ( ! l->is_missing(v,var)) + { + levene_z= fabs(v->f - gs->mean); + lz[i].grand_total += levene_z * weight; + lz[i].total_n += weight; + gs->lz_total += levene_z * weight; + } } return 0; } @@ -261,7 +308,7 @@ levene2_precalc (void *_l) static int levene2_calc (struct ccase *c, void *_l) { - int var; + int i; struct levene_info *l = (struct levene_info *) _l; @@ -270,22 +317,39 @@ levene2_calc (struct ccase *c, void *_l) union value *gv = &c->data[l->v_indep->fv]; struct group_statistics key; + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( l->missing == LEV_LISTWISE ) + { + for (i = 0; i < l->n_dep; ++i) + { + struct variable *v = l->v_dep[i]; + union value *val = &c->data[v->fv]; + + if (l->is_missing(val,v) ) + { + return 0; + } + } + } + key.id = *gv; + key.criterion = CMP_EQ; - for (var = 0; var < l->n_dep; ++var) + for (i = 0; i < l->n_dep; ++i) { double levene_z; - union value *v = &c->data[l->v_dep[var]->fv]; + struct variable *var = l->v_dep[i] ; + union value *v = &c->data[var->fv]; struct group_statistics *gs; - gs = hsh_find(hash[var],&key); - assert(gs); - assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width)); - - /* FIXME: handle SYSMIS properly */ - - levene_z = fabs(v->f - gs->mean); + gs = get_group(i,&key); + if ( 0 == gs ) + continue; - lz_denominator[var] += weight * sqr(levene_z - gs->lz_mean); + if ( ! l->is_missing(v,var) ) + { + levene_z = fabs(v->f - gs->mean); + lz_denominator[i] += weight * sqr(levene_z - gs->lz_mean); + } } return 0; @@ -333,3 +397,45 @@ levene2_postcalc (void *_l) } +/* Return the group belonging to the v_th dependent variable + which matches the key */ +static struct group_statistics * +get_group(int v, struct group_statistics *key) +{ + struct group_statistics *gs; + gs = hsh_find(hash[v],key); + + + if ( ( !gs ) && inequality_compare) + { + /* Here we degrade to a linear search. + This would seem inefficient. However, it should only ever happen + with the T-TEST, for which there are exactly two groups */ + + struct hsh_iterator hi; + + assert( hsh_count(hash[v]) == 2 ) ; + for(gs = (struct group_statistics *) hsh_first(hash[v],&hi); + gs != 0 ; + gs = (struct group_statistics *) hsh_next(hash[v],&hi) ) + { + int cmp; + + cmp = compare_values(&gs->id, &key->id, 0); + + assert( cmp != 0 ); /* or else the hash would have found something */ + + if ( cmp == -1 && + ( gs->criterion == CMP_GT || gs->criterion == CMP_GE ) + ) + break; + + if ( cmp == 1 && + ( gs->criterion == CMP_LT || gs->criterion == CMP_LE ) + ) + break; + } + } + + return gs; +}