02111-1307, USA. */
#include <config.h>
+#include "levene.h"
#include "error.h"
+#include "case.h"
#include "casefile.h"
-#include "levene.h"
#include "hash.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
#include "misc.h"
-
+#include "group.h"
#include <math.h>
#include <stdlib.h>
*/
-static struct group_statistics *get_group(int v, struct group_statistics *key);
-
struct levene_info
{
enum lev_missing missing, is_missing_func value_is_missing)
{
struct casereader *r;
- const struct ccase *c;
+ struct ccase c;
struct levene_info l;
l.n_dep = n_dep;
levene_precalc(&l);
for(r = casefile_get_reader (cf);
- casereader_read (r, &c) ; )
+ casereader_read (r, &c) ;
+ case_destroy (&c))
{
- levene_calc(c,&l);
+ levene_calc(&c,&l);
}
casereader_destroy (r);
levene_postcalc(&l);
levene2_precalc(&l);
for(r = casefile_get_reader (cf);
- casereader_read (r, &c) ; )
+ casereader_read (r, &c) ;
+ case_destroy (&c))
{
- levene2_calc(c,&l);
+ levene2_calc(&c,&l);
}
casereader_destroy (r);
levene2_postcalc(&l);
}
-static struct hsh_table **hash;
-
-
-/* Return -1 if the id of a is less than b; +1 if greater than and
- 0 if equal */
-static int
-compare_group(const struct group_statistics *a,
- const struct group_statistics *b,
- int width)
-{
- int id_cmp = compare_values(&a->id, &b->id, width);
-
- if (id_cmp == 0 )
- {
- int c;
- c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
- return c;
- }
- else
- return id_cmp;
-}
-
-
-static unsigned
-hash_group(const struct group_statistics *g, int width)
-{
- unsigned id_hash;
-
- if ( 0 == width )
- id_hash = hsh_hash_double (g->id.f);
- else
- id_hash = hsh_hash_bytes (g->id.s, width);
-
- return id_hash;
-}
-
/* Internal variables used in calculating the Levene statistic */
/* Per variable statistics */
/* An array of lz_stats for each variable */
static struct lz_stats *lz;
-/* Set to 1 if the groups require inequality comparisions */
-static int inequality_compare;
-
static void
levene_precalc (const struct levene_info *l)
lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
- hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
-
for(i=0; i < l->n_dep ; ++i )
{
struct variable *v = l->v_dep[i];
- int g;
- int number_of_groups = v->p.t_t.n_groups ;
-
- hash[i] = hsh_create (l->n_dep * number_of_groups,
- (hsh_compare_func *) compare_group,
- (hsh_hash_func *) hash_group,
- 0,(void *) l->v_indep->width);
lz[i].grand_total = 0;
lz[i].total_n = 0;
- lz[i].n_groups = number_of_groups;
-
- for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
- {
- struct group_statistics *gs = &v->p.t_t.gs[g];
- gs->lz_total = 0;
- hsh_insert(hash[i], gs);
- if ( gs->criterion != CMP_EQ )
- {
- inequality_compare = 1;
- }
- }
+ lz[i].n_groups = v->p.grp_data.n_groups ;
}
}
levene_calc (const struct ccase *c, void *_l)
{
int i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- const union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
- double weight = dict_get_case_weight(default_dict,c);
-
+ double weight = dict_get_case_weight(default_dict,c,&warn);
/* Skip the entire case if /MISSING=LISTWISE is set */
if ( l->missing == LEV_LISTWISE )
for (i = 0; i < l->n_dep; ++i)
{
struct variable *v = l->v_dep[i];
- const union value *val = &c->data[v->fv];
+ const union value *val = case_data (c, v->fv);
if (l->is_missing(val,v) )
{
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
struct variable *var = l->v_dep[i];
double levene_z;
- const union value *v = &c->data[var->fv];
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
+
if ( 0 == gs )
continue ;
for (v = 0; v < l->n_dep; ++v)
{
+ /* This is Z_LL */
lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
-
}
}
{
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = var->p.grp_data.group_hash;
+
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
g->lz_mean = g->lz_total/g->n ;
}
levene2_calc (const struct ccase *c, void *_l)
{
int i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- double weight = dict_get_case_weight(default_dict,c);
+ double weight = dict_get_case_weight(default_dict,c,&warn);
- const union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
/* Skip the entire case if /MISSING=LISTWISE is set */
for (i = 0; i < l->n_dep; ++i)
{
struct variable *v = l->v_dep[i];
- const union value *val = &c->data[v->fv];
+ const union value *val = case_data (c, v->fv);
if (l->is_missing(val,v) )
{
}
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
double levene_z;
struct variable *var = l->v_dep[i] ;
- const union value *v = &c->data[var->fv];
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
+
if ( 0 == gs )
continue;
double lz_numerator = 0;
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = var->p.grp_data.group_hash;
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
-
lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
-
-
}
- lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
- l->v_dep[v]->p.t_t.n_groups );
+ lz_numerator *= ( l->v_dep[v]->p.grp_data.ugs.n -
+ l->v_dep[v]->p.grp_data.n_groups );
- lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
+ lz_denominator[v] *= (l->v_dep[v]->p.grp_data.n_groups - 1);
- l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
+ l->v_dep[v]->p.grp_data.levene = lz_numerator/lz_denominator[v] ;
+
}
/* Now clear up after ourselves */
free(lz_denominator);
- for (v = 0; v < l->n_dep; ++v)
- {
- hsh_destroy(hash[v]);
- }
-
- free(hash);
free(lz);
}
-
-/* Return the group belonging to the v_th dependent variable
- which matches the key */
-static struct group_statistics *
-get_group(int v, struct group_statistics *key)
-{
- struct group_statistics *gs;
- gs = hsh_find(hash[v],key);
-
-
- if ( ( !gs ) && inequality_compare)
- {
- /* Here we degrade to a linear search.
- This would seem inefficient. However, it should only ever happen
- with the T-TEST, for which there are exactly two groups */
-
- struct hsh_iterator hi;
-
- assert( hsh_count(hash[v]) == 2 ) ;
- for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
- gs != 0 ;
- gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
- {
- int cmp;
-
- cmp = compare_values(&gs->id, &key->id, 0);
-
- assert( cmp != 0 ); /* or else the hash would have found something */
-
- if ( cmp == -1 &&
- ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
- )
- break;
-
- if ( cmp == 1 &&
- ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )
- )
- break;
- }
- }
-
- return gs;
-}