X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flevene.c;h=5de5220588564cb6674fd20aa04e21279dd3f0bf;hb=92fb12eb06716d14c05b781f5d9dcde956d77c30;hp=67e52d38300f732f37d066c0f637855bfcf80d7a;hpb=f9d47b5bba8416419cf3bcd3aa23c2d40a05fcac;p=pspp diff --git a/src/levene.c b/src/levene.c index 67e52d3830..5de5220588 100644 --- a/src/levene.c +++ b/src/levene.c @@ -16,17 +16,23 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include #include "levene.h" +#include "error.h" +#include "case.h" +#include "casefile.h" +#include "dictionary.h" +#include "group_proc.h" #include "hash.h" +#include "str.h" #include "var.h" #include "vfm.h" #include "alloc.h" -#include "stats.h" +#include "misc.h" +#include "group.h" #include #include @@ -51,23 +57,10 @@ */ -static void levene_precalc (void *); -static int levene_calc (struct ccase *, void *); -static void levene_postcalc (void *); - - -/* Second pass */ -static void levene2_precalc (void *); -static int levene2_calc (struct ccase *, void *); -static void levene2_postcalc (void *); - struct levene_info { - /* The number of groups */ - int n_groups; - /* Per group statistics */ struct t_test_proc **group_stats; @@ -75,54 +68,66 @@ struct levene_info struct variable *v_indep; /* Number of dependent variables */ - int n_dep; + size_t n_dep; /* The dependent variables */ struct variable **v_dep; -}; - + /* How to treat missing values */ + enum lev_missing missing; + /* Function to test for missing values */ + is_missing_func *is_missing; +}; -void -levene(struct variable *v_indep, int n_dep, struct variable **v_dep) -{ - struct levene_info l; +/* First pass */ +static void levene_precalc (const struct levene_info *l); +static int levene_calc (const struct ccase *, void *); +static void levene_postcalc (void *); - l.n_dep=n_dep; - l.v_indep=v_indep; - l.v_dep=v_dep; - procedure(levene_precalc, levene_calc, levene_postcalc, &l); - procedure(levene2_precalc,levene2_calc,levene2_postcalc,&l); - -} +/* Second pass */ +static void levene2_precalc (void *); +static int levene2_calc (const struct ccase *, void *); +static void levene2_postcalc (void *); -static struct hsh_table **hash; -static int -compare_group_id(const void *a_, const void *b_, void *aux) +void +levene(const struct casefile *cf, + struct variable *v_indep, size_t n_dep, struct variable **v_dep, + enum lev_missing missing, is_missing_func value_is_missing) { - const struct group_statistics *a = (struct group_statistics *) a_; - const struct group_statistics *b = (struct group_statistics *) b_; + struct casereader *r; + struct ccase c; + struct levene_info l; - int width = (int) aux; - - return compare_values(&a->id, &b->id, width); -} + l.n_dep = n_dep; + l.v_indep = v_indep; + l.v_dep = v_dep; + l.missing = missing; + l.is_missing = value_is_missing; -static unsigned -hash_group_id(const void *g_, void *aux) -{ - const struct group_statistics *g = (struct group_statistics *) g_; - int width = (int) aux; + levene_precalc(&l); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + levene_calc(&c,&l); + } + casereader_destroy (r); + levene_postcalc(&l); - if ( 0 == width ) - return hsh_hash_double (g->id.f); - else - return hsh_hash_bytes (g->id.s, width); + levene2_precalc(&l); + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c)) + { + levene2_calc(&c,&l); + } + casereader_destroy (r); + levene2_postcalc(&l); } @@ -148,67 +153,85 @@ struct lz_stats static struct lz_stats *lz; - static void -levene_precalc (void *_l) +levene_precalc (const struct levene_info *l) { - int i; - struct levene_info *l = (struct levene_info *) _l; - - lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ; + size_t i; - hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep ); + lz = xnmalloc (l->n_dep, sizeof *lz); - for(i=0; i < l->n_dep ; ++i ) + for(i = 0; i < l->n_dep ; ++i ) { - struct variable *v = l->v_dep[i]; - int g; - int number_of_groups = v->p.t_t.n_groups ; - - hash[i] = hsh_create (l->n_dep * number_of_groups, - compare_group_id, hash_group_id, - 0,(void *) l->v_indep->width); + struct variable *var = l->v_dep[i]; + struct group_proc *gp = group_proc_get (var); + struct group_statistics *gs; + struct hsh_iterator hi; lz[i].grand_total = 0; lz[i].total_n = 0; - lz[i].n_groups = number_of_groups; + lz[i].n_groups = gp->n_groups ; - for (g = 0 ; g < v->p.t_t.n_groups ; ++g ) + + for ( gs = hsh_first(gp->group_hash, &hi); + gs != 0; + gs = hsh_next(gp->group_hash, &hi)) { - struct group_statistics *gs = &v->p.t_t.gs[g]; - gs->lz_total=0; - hsh_insert(hash[i],gs); + gs->lz_total = 0; } + } } static int -levene_calc (struct ccase *c, void *_l) +levene_calc (const struct ccase *c, void *_l) { - int var; + size_t i; + int warn = 0; struct levene_info *l = (struct levene_info *) _l; - union value *gv = &c->data[l->v_indep->fv]; + const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&warn); + + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( l->missing == LEV_LISTWISE ) + { + for (i = 0; i < l->n_dep; ++i) + { + struct variable *v = l->v_dep[i]; + const union value *val = case_data (c, v->fv); + + if (l->is_missing (&v->miss, val) ) + { + return 0; + } + } + } + key.id = *gv; - for (var = 0; var < l->n_dep; ++var) + for (i = 0; i < l->n_dep; ++i) { + struct variable *var = l->v_dep[i]; + struct group_proc *gp = group_proc_get (var); double levene_z; - union value *v = &c->data[l->v_dep[var]->fv]; + const union value *v = case_data (c, var->fv); struct group_statistics *gs; - gs = hsh_find(hash[var],&key); - assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width)); - /* FIXME: handle SYSMIS properly */ + gs = hsh_find(gp->group_hash,(void *) &key ); - levene_z= fabs(v->f - gs->mean); - lz[var].grand_total += levene_z * weight; - lz[var].total_n += weight; + if ( 0 == gs ) + continue ; - gs->lz_total += levene_z * weight; + if ( ! l->is_missing(&var->miss, v)) + { + levene_z= fabs(v->f - gs->mean); + lz[i].grand_total += levene_z * weight; + lz[i].total_n += weight; + + gs->lz_total += levene_z * weight; + } } return 0; @@ -218,16 +241,17 @@ levene_calc (struct ccase *c, void *_l) static void levene_postcalc (void *_l) { - int v; + size_t v; struct levene_info *l = (struct levene_info *) _l; for (v = 0; v < l->n_dep; ++v) { + /* This is Z_LL */ lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ; - } + } @@ -237,55 +261,79 @@ static double *lz_denominator; static void levene2_precalc (void *_l) { - int v; + size_t v; struct levene_info *l = (struct levene_info *) _l; - lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep); + lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator); /* This stuff could go in the first post calc . . . */ for (v = 0; v < l->n_dep; ++v) { struct hsh_iterator hi; struct group_statistics *g; - for(g = (struct group_statistics *) hsh_first(hash[v],&hi); + + struct variable *var = l->v_dep[v] ; + struct hsh_table *hash = group_proc_get (var)->group_hash; + + + for(g = (struct group_statistics *) hsh_first(hash,&hi); g != 0 ; - g = (struct group_statistics *) hsh_next(hash[v],&hi) ) + g = (struct group_statistics *) hsh_next(hash,&hi) ) { - g->lz_mean = g->lz_total/g->n ; + g->lz_mean = g->lz_total / g->n ; } lz_denominator[v] = 0; } } static int -levene2_calc (struct ccase *c, void *_l) +levene2_calc (const struct ccase *c, void *_l) { - int var; + size_t i; + int warn = 0; struct levene_info *l = (struct levene_info *) _l; - double weight = dict_get_case_weight(default_dict,c); + double weight = dict_get_case_weight(default_dict,c,&warn); - union value *gv = &c->data[l->v_indep->fv]; + const union value *gv = case_data (c, l->v_indep->fv); struct group_statistics key; + /* Skip the entire case if /MISSING=LISTWISE is set */ + if ( l->missing == LEV_LISTWISE ) + { + for (i = 0; i < l->n_dep; ++i) + { + struct variable *v = l->v_dep[i]; + const union value *val = case_data (c, v->fv); + + if (l->is_missing(&v->miss, val) ) + { + return 0; + } + } + } + key.id = *gv; - for (var = 0; var < l->n_dep; ++var) + for (i = 0; i < l->n_dep; ++i) { double levene_z; - union value *v = &c->data[l->v_dep[var]->fv]; + struct variable *var = l->v_dep[i] ; + const union value *v = case_data (c, var->fv); struct group_statistics *gs; - gs = hsh_find(hash[var],&key); - assert(gs); - assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width)); - /* FIXME: handle SYSMIS properly */ + gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key ); - levene_z = fabs(v->f - gs->mean); + if ( 0 == gs ) + continue; - lz_denominator[var] += weight * sqr(levene_z - gs->lz_mean); + if ( ! l->is_missing (&var->miss, v) ) + { + levene_z = fabs(v->f - gs->mean); + lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); + } } return 0; @@ -295,7 +343,7 @@ levene2_calc (struct ccase *c, void *_l) static void levene2_postcalc (void *_l) { - int v; + size_t v; struct levene_info *l = (struct levene_info *) _l; @@ -304,32 +352,27 @@ levene2_postcalc (void *_l) double lz_numerator = 0; struct hsh_iterator hi; struct group_statistics *g; - for(g = (struct group_statistics *) hsh_first(hash[v],&hi); + + struct variable *var = l->v_dep[v] ; + struct group_proc *gp = group_proc_get (var); + struct hsh_table *hash = gp->group_hash; + + for(g = (struct group_statistics *) hsh_first(hash,&hi); g != 0 ; - g = (struct group_statistics *) hsh_next(hash[v],&hi) ) + g = (struct group_statistics *) hsh_next(hash,&hi) ) { + lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean ); + } + lz_numerator *= ( gp->ugs.n - gp->n_groups ); - lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean ); - + lz_denominator[v] *= (gp->n_groups - 1); - } - lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n - - l->v_dep[v]->p.t_t.n_groups ); + gp->levene = lz_numerator / lz_denominator[v] ; - lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1); - - l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ; } /* Now clear up after ourselves */ free(lz_denominator); - for (v = 0; v < l->n_dep; ++v) - { - hsh_destroy(hash[v]); - } - - free(hash); free(lz); } -