02111-1307, USA. */
#include <config.h>
-#include <assert.h>
#include "levene.h"
+#include "error.h"
+#include "case.h"
+#include "casefile.h"
#include "hash.h"
+#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
-#include "stats.h"
+#include "misc.h"
+#include "group.h"
#include <math.h>
+#include <stdlib.h>
/* This module calculates the Levene statistic for variables.
*/
-static void levene_precalc (void *);
-static int levene_calc (struct ccase *, void *);
-static void levene_postcalc (void *);
-
-
-/* Second pass */
-static void levene2_precalc (void *);
-static int levene2_calc (struct ccase *, void *);
-static void levene2_postcalc (void *);
-
struct levene_info
{
- /* The number of groups */
- int n_groups;
-
/* Per group statistics */
struct t_test_proc **group_stats;
/* The dependent variables */
struct variable **v_dep;
-};
+ /* How to treat missing values */
+ enum lev_missing missing;
+ /* Function to test for missing values */
+ is_missing_func is_missing;
+};
-void
-levene(struct variable *v_indep, int n_dep, struct variable **v_dep)
-{
- struct levene_info l;
+/* First pass */
+static void levene_precalc (const struct levene_info *l);
+static int levene_calc (const struct ccase *, void *);
+static void levene_postcalc (void *);
- l.n_dep=n_dep;
- l.v_indep=v_indep;
- l.v_dep=v_dep;
- procedure(levene_precalc, levene_calc, levene_postcalc, &l);
- procedure(levene2_precalc,levene2_calc,levene2_postcalc,&l);
-
-}
+/* Second pass */
+static void levene2_precalc (void *);
+static int levene2_calc (const struct ccase *, void *);
+static void levene2_postcalc (void *);
-static struct hsh_table **hash;
-static int
-compare_group_id(const void *a_, const void *b_, void *aux)
+void
+levene(const struct casefile *cf,
+ struct variable *v_indep, int n_dep, struct variable **v_dep,
+ enum lev_missing missing, is_missing_func value_is_missing)
{
- const struct group_statistics *a = (struct group_statistics *) a_;
- const struct group_statistics *b = (struct group_statistics *) b_;
+ struct casereader *r;
+ struct ccase c;
+ struct levene_info l;
- int width = (int) aux;
-
- return compare_values(&a->id, &b->id, width);
-}
+ l.n_dep = n_dep;
+ l.v_indep = v_indep;
+ l.v_dep = v_dep;
+ l.missing = missing;
+ l.is_missing = value_is_missing;
-static unsigned
-hash_group_id(const void *g_, void *aux)
-{
- const struct group_statistics *g = (struct group_statistics *) g_;
- int width = (int) aux;
+ levene_precalc(&l);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ levene_calc(&c,&l);
+ }
+ casereader_destroy (r);
+ levene_postcalc(&l);
- if ( 0 == width )
- return hsh_hash_double (g->id.f);
- else
- return hsh_hash_bytes (g->id.s, width);
+ levene2_precalc(&l);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ levene2_calc(&c,&l);
+ }
+ casereader_destroy (r);
+ levene2_postcalc(&l);
}
static struct lz_stats *lz;
-
static void
-levene_precalc (void *_l)
+levene_precalc (const struct levene_info *l)
{
int i;
- struct levene_info *l = (struct levene_info *) _l;
lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
- hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
-
for(i=0; i < l->n_dep ; ++i )
{
struct variable *v = l->v_dep[i];
- int g;
- int number_of_groups = v->p.t_t.n_groups ;
-
- hash[i] = hsh_create (l->n_dep * number_of_groups,
- compare_group_id, hash_group_id,
- 0,(void *) l->v_indep->width);
lz[i].grand_total = 0;
lz[i].total_n = 0;
- lz[i].n_groups = number_of_groups;
-
- for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
- {
- struct group_statistics *gs = &v->p.t_t.gs[g];
- gs->lz_total=0;
- hsh_insert(hash[i],gs);
- }
+ lz[i].n_groups = v->p.grp_data.n_groups ;
}
}
static int
-levene_calc (struct ccase *c, void *_l)
+levene_calc (const struct ccase *c, void *_l)
{
- int var;
+ int i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
- double weight = dict_get_case_weight(default_dict,c);
+ double weight = dict_get_case_weight(default_dict,c,&warn);
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( l->missing == LEV_LISTWISE )
+ {
+ for (i = 0; i < l->n_dep; ++i)
+ {
+ struct variable *v = l->v_dep[i];
+ const union value *val = case_data (c, v->fv);
+
+ if (l->is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
key.id = *gv;
- for (var = 0; var < l->n_dep; ++var)
+ for (i = 0; i < l->n_dep; ++i)
{
+ struct variable *var = l->v_dep[i];
double levene_z;
- union value *v = &c->data[l->v_dep[var]->fv];
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = hsh_find(hash[var],&key);
- assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
- /* FIXME: handle SYSMIS properly */
+ gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
- levene_z= fabs(v->f - gs->mean);
- lz[var].grand_total += levene_z * weight;
- lz[var].total_n += weight;
+ if ( 0 == gs )
+ continue ;
- gs->lz_total += levene_z * weight;
+ if ( ! l->is_missing(v,var))
+ {
+ levene_z= fabs(v->f - gs->mean);
+ lz[i].grand_total += levene_z * weight;
+ lz[i].total_n += weight;
+ gs->lz_total += levene_z * weight;
+ }
}
return 0;
}
for (v = 0; v < l->n_dep; ++v)
{
+ /* This is Z_LL */
lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
-
}
}
{
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = var->p.grp_data.group_hash;
+
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
g->lz_mean = g->lz_total/g->n ;
}
}
static int
-levene2_calc (struct ccase *c, void *_l)
+levene2_calc (const struct ccase *c, void *_l)
{
- int var;
+ int i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- double weight = dict_get_case_weight(default_dict,c);
+ double weight = dict_get_case_weight(default_dict,c,&warn);
- union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( l->missing == LEV_LISTWISE )
+ {
+ for (i = 0; i < l->n_dep; ++i)
+ {
+ struct variable *v = l->v_dep[i];
+ const union value *val = case_data (c, v->fv);
+
+ if (l->is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
key.id = *gv;
- for (var = 0; var < l->n_dep; ++var)
+ for (i = 0; i < l->n_dep; ++i)
{
double levene_z;
- union value *v = &c->data[l->v_dep[var]->fv];
+ struct variable *var = l->v_dep[i] ;
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = hsh_find(hash[var],&key);
- assert(gs);
- assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
- /* FIXME: handle SYSMIS properly */
+ gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
- levene_z = fabs(v->f - gs->mean);
+ if ( 0 == gs )
+ continue;
- lz_denominator[var] += weight * sqr(levene_z - gs->lz_mean);
+ if ( ! l->is_missing(v,var) )
+ {
+ levene_z = fabs(v->f - gs->mean);
+ lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
+ }
}
return 0;
double lz_numerator = 0;
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
- g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
- {
- lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean );
-
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = var->p.grp_data.group_hash;
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
+ g != 0 ;
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
+ {
+ lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
}
- lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
- l->v_dep[v]->p.t_t.n_groups );
+ lz_numerator *= ( l->v_dep[v]->p.grp_data.ugs.n -
+ l->v_dep[v]->p.grp_data.n_groups );
- lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
+ lz_denominator[v] *= (l->v_dep[v]->p.grp_data.n_groups - 1);
- l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
+ l->v_dep[v]->p.grp_data.levene = lz_numerator/lz_denominator[v] ;
+
}
/* Now clear up after ourselves */
free(lz_denominator);
- for (v = 0; v < l->n_dep; ++v)
- {
- hsh_destroy(hash[v]);
- }
-
- free(hash);
free(lz);
}
-