02111-1307, USA. */
#include <config.h>
-#include <assert.h>
+#include "error.h"
#include "levene.h"
#include "hash.h"
+#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
*/
+static struct group_statistics *get_group(int v, struct group_statistics *key);
+
+/* First pass */
static void levene_precalc (void *);
static int levene_calc (struct ccase *, void *);
static void levene_postcalc (void *);
/* The dependent variables */
struct variable **v_dep;
+ /* How to treat missing values */
+ enum lev_missing missing;
+
+ /* Function to test for missing values */
+ is_missing_func is_missing;
+
};
void
-levene(struct variable *v_indep, int n_dep, struct variable **v_dep)
+levene(struct variable *v_indep, int n_dep, struct variable **v_dep,
+ enum lev_missing missing, is_missing_func value_is_missing)
{
struct levene_info l;
- l.n_dep=n_dep;
- l.v_indep=v_indep;
- l.v_dep=v_dep;
+ l.n_dep = n_dep;
+ l.v_indep = v_indep;
+ l.v_dep = v_dep;
+ l.missing = missing;
+ l.is_missing = value_is_missing;
- procedure(levene_precalc, levene_calc, levene_postcalc, &l);
- procedure(levene2_precalc,levene2_calc,levene2_postcalc,&l);
+ procedure_with_splits (levene_precalc, levene_calc, levene_postcalc, &l);
+ procedure_with_splits (levene2_precalc, levene2_calc, levene2_postcalc, &l);
}
static struct hsh_table **hash;
+/* Return -1 if the id of a is less than b; +1 if greater than and
+ 0 if equal */
static int
-compare_group_id(const void *a_, const void *b_, void *aux)
+compare_group(const struct group_statistics *a,
+ const struct group_statistics *b,
+ int width)
{
- const struct group_statistics *a = (struct group_statistics *) a_;
- const struct group_statistics *b = (struct group_statistics *) b_;
+ int id_cmp = compare_values(&a->id, &b->id, width);
- int width = (int) aux;
-
- return compare_values(&a->id, &b->id, width);
+ if (id_cmp == 0 )
+ {
+ int c;
+ c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
+ return c;
+ }
+ else
+ return id_cmp;
}
static unsigned
-hash_group_id(const void *g_, void *aux)
+hash_group(const struct group_statistics *g, int width)
{
- const struct group_statistics *g = (struct group_statistics *) g_;
-
- int width = (int) aux;
+ unsigned id_hash;
if ( 0 == width )
- return hsh_hash_double (g->id.f);
+ id_hash = hsh_hash_double (g->id.f);
else
- return hsh_hash_bytes (g->id.s, width);
+ id_hash = hsh_hash_bytes (g->id.s, width);
+ return id_hash;
}
/* Internal variables used in calculating the Levene statistic */
/* An array of lz_stats for each variable */
static struct lz_stats *lz;
+/* Set to 1 if the groups require inequality comparisions */
+static int inequality_compare;
static void
int number_of_groups = v->p.t_t.n_groups ;
hash[i] = hsh_create (l->n_dep * number_of_groups,
- compare_group_id, hash_group_id,
+ (hsh_compare_func *) compare_group,
+ (hsh_hash_func *) hash_group,
0,(void *) l->v_indep->width);
lz[i].grand_total = 0;
for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
{
struct group_statistics *gs = &v->p.t_t.gs[g];
- gs->lz_total=0;
- hsh_insert(hash[i],gs);
+ gs->lz_total = 0;
+ hsh_insert(hash[i], gs);
+ if ( gs->criterion != CMP_EQ )
+ {
+ inequality_compare = 1;
+ }
}
}
static int
levene_calc (struct ccase *c, void *_l)
{
- int var;
+ int i;
struct levene_info *l = (struct levene_info *) _l;
union value *gv = &c->data[l->v_indep->fv];
struct group_statistics key;
double weight = dict_get_case_weight(default_dict,c);
+
+
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( l->missing == LEV_LISTWISE )
+ {
+ for (i = 0; i < l->n_dep; ++i)
+ {
+ struct variable *v = l->v_dep[i];
+ union value *val = &c->data[v->fv];
+
+ if (l->is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
key.id = *gv;
+ key.criterion = CMP_EQ;
- for (var = 0; var < l->n_dep; ++var)
+ for (i = 0; i < l->n_dep; ++i)
{
+ struct variable *var = l->v_dep[i];
double levene_z;
- union value *v = &c->data[l->v_dep[var]->fv];
+ union value *v = &c->data[var->fv];
struct group_statistics *gs;
- gs = hsh_find(hash[var],&key);
- assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
-
- /* FIXME: handle SYSMIS properly */
+ gs = get_group(i,&key);
+ if ( 0 == gs )
+ continue ;
- levene_z= fabs(v->f - gs->mean);
- lz[var].grand_total += levene_z * weight;
- lz[var].total_n += weight;
-
- gs->lz_total += levene_z * weight;
+ if ( ! l->is_missing(v,var))
+ {
+ levene_z= fabs(v->f - gs->mean);
+ lz[i].grand_total += levene_z * weight;
+ lz[i].total_n += weight;
+ gs->lz_total += levene_z * weight;
+ }
}
return 0;
}
static int
levene2_calc (struct ccase *c, void *_l)
{
- int var;
+ int i;
struct levene_info *l = (struct levene_info *) _l;
union value *gv = &c->data[l->v_indep->fv];
struct group_statistics key;
+ /* Skip the entire case if /MISSING=LISTWISE is set */
+ if ( l->missing == LEV_LISTWISE )
+ {
+ for (i = 0; i < l->n_dep; ++i)
+ {
+ struct variable *v = l->v_dep[i];
+ union value *val = &c->data[v->fv];
+
+ if (l->is_missing(val,v) )
+ {
+ return 0;
+ }
+ }
+ }
+
key.id = *gv;
+ key.criterion = CMP_EQ;
- for (var = 0; var < l->n_dep; ++var)
+ for (i = 0; i < l->n_dep; ++i)
{
double levene_z;
- union value *v = &c->data[l->v_dep[var]->fv];
+ struct variable *var = l->v_dep[i] ;
+ union value *v = &c->data[var->fv];
struct group_statistics *gs;
- gs = hsh_find(hash[var],&key);
- assert(gs);
- assert(0 == compare_values(&gs->id, &key.id, l->v_indep->width));
-
- /* FIXME: handle SYSMIS properly */
-
- levene_z = fabs(v->f - gs->mean);
+ gs = get_group(i,&key);
+ if ( 0 == gs )
+ continue;
- lz_denominator[var] += weight * sqr(levene_z - gs->lz_mean);
+ if ( ! l->is_missing(v,var) )
+ {
+ levene_z = fabs(v->f - gs->mean);
+ lz_denominator[i] += weight * sqr(levene_z - gs->lz_mean);
+ }
}
return 0;
}
+/* Return the group belonging to the v_th dependent variable
+ which matches the key */
+static struct group_statistics *
+get_group(int v, struct group_statistics *key)
+{
+ struct group_statistics *gs;
+ gs = hsh_find(hash[v],key);
+
+
+ if ( ( !gs ) && inequality_compare)
+ {
+ /* Here we degrade to a linear search.
+ This would seem inefficient. However, it should only ever happen
+ with the T-TEST, for which there are exactly two groups */
+
+ struct hsh_iterator hi;
+
+ assert( hsh_count(hash[v]) == 2 ) ;
+ for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
+ gs != 0 ;
+ gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ {
+ int cmp;
+
+ cmp = compare_values(&gs->id, &key->id, 0);
+
+ assert( cmp != 0 ); /* or else the hash would have found something */
+
+ if ( cmp == -1 &&
+ ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
+ )
+ break;
+
+ if ( cmp == 1 &&
+ ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )
+ )
+ break;
+ }
+ }
+
+ return gs;
+}