You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
-#include <assert.h>
#include "levene.h"
+#include "error.h"
+#include "case.h"
+#include "casefile.h"
+#include "dictionary.h"
+#include "group_proc.h"
#include "hash.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
-#include "stats.h"
+#include "misc.h"
+#include "group.h"
#include <math.h>
#include <stdlib.h>
*/
-static struct group_statistics *get_group(int v, struct group_statistics *key);
-
-/* First pass */
-static void levene_precalc (void *);
-static int levene_calc (struct ccase *, void *);
-static void levene_postcalc (void *);
-
-
-/* Second pass */
-static void levene2_precalc (void *);
-static int levene2_calc (struct ccase *, void *);
-static void levene2_postcalc (void *);
-
struct levene_info
{
- /* The number of groups */
- int n_groups;
-
/* Per group statistics */
struct t_test_proc **group_stats;
struct variable *v_indep;
/* Number of dependent variables */
- int n_dep;
+ size_t n_dep;
/* The dependent variables */
struct variable **v_dep;
enum lev_missing missing;
/* Function to test for missing values */
- is_missing_func is_missing;
-
+ is_missing_func *is_missing;
};
+/* First pass */
+static void levene_precalc (const struct levene_info *l);
+static int levene_calc (const struct ccase *, void *);
+static void levene_postcalc (void *);
+
+
+/* Second pass */
+static void levene2_precalc (void *);
+static int levene2_calc (const struct ccase *, void *);
+static void levene2_postcalc (void *);
void
-levene(struct variable *v_indep, int n_dep, struct variable **v_dep,
+levene(const struct casefile *cf,
+ struct variable *v_indep, size_t n_dep, struct variable **v_dep,
enum lev_missing missing, is_missing_func value_is_missing)
{
+ struct casereader *r;
+ struct ccase c;
struct levene_info l;
- l.n_dep = n_dep;
- l.v_indep = v_indep;
- l.v_dep = v_dep;
- l.missing = missing;
+ l.n_dep = n_dep;
+ l.v_indep = v_indep;
+ l.v_dep = v_dep;
+ l.missing = missing;
l.is_missing = value_is_missing;
- procedure_with_splits (levene_precalc, levene_calc, levene_postcalc, &l);
- procedure_with_splits (levene2_precalc, levene2_calc, levene2_postcalc, &l);
-
-}
-static struct hsh_table **hash;
-/* Return -1 if the id of a is less than b; +1 if greater than and
- 0 if equal */
-static int
-compare_group(const struct group_statistics *a,
- const struct group_statistics *b,
- int width)
-{
- int id_cmp = compare_values(&a->id, &b->id, width);
-
- if (id_cmp == 0 )
+ levene_precalc(&l);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
{
- int c;
- c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
- return c;
+ levene_calc(&c,&l);
}
- else
- return id_cmp;
-}
+ casereader_destroy (r);
+ levene_postcalc(&l);
+ levene2_precalc(&l);
+ for(r = casefile_get_reader (cf);
+ casereader_read (r, &c) ;
+ case_destroy (&c))
+ {
+ levene2_calc(&c,&l);
+ }
+ casereader_destroy (r);
+ levene2_postcalc(&l);
-static unsigned
-hash_group(const struct group_statistics *g, int width)
-{
- unsigned id_hash;
-
- if ( 0 == width )
- id_hash = hsh_hash_double (g->id.f);
- else
- id_hash = hsh_hash_bytes (g->id.s, width);
-
- return id_hash;
}
/* Internal variables used in calculating the Levene statistic */
/* An array of lz_stats for each variable */
static struct lz_stats *lz;
-/* Set to 1 if the groups require inequality comparisions */
-static int inequality_compare;
-
static void
-levene_precalc (void *_l)
+levene_precalc (const struct levene_info *l)
{
- int i;
- struct levene_info *l = (struct levene_info *) _l;
-
- lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
+ size_t i;
- hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
+ lz = xnmalloc (l->n_dep, sizeof *lz);
- for(i=0; i < l->n_dep ; ++i )
+ for(i = 0; i < l->n_dep ; ++i )
{
- struct variable *v = l->v_dep[i];
- int g;
- int number_of_groups = v->p.t_t.n_groups ;
-
- hash[i] = hsh_create (l->n_dep * number_of_groups,
- (hsh_compare_func *) compare_group,
- (hsh_hash_func *) hash_group,
- 0,(void *) l->v_indep->width);
+ struct variable *var = l->v_dep[i];
+ struct group_proc *gp = group_proc_get (var);
+ struct group_statistics *gs;
+ struct hsh_iterator hi;
lz[i].grand_total = 0;
lz[i].total_n = 0;
- lz[i].n_groups = number_of_groups;
+ lz[i].n_groups = gp->n_groups ;
- for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
+
+ for ( gs = hsh_first(gp->group_hash, &hi);
+ gs != 0;
+ gs = hsh_next(gp->group_hash, &hi))
{
- struct group_statistics *gs = &v->p.t_t.gs[g];
gs->lz_total = 0;
- hsh_insert(hash[i], gs);
- if ( gs->criterion != CMP_EQ )
- {
- inequality_compare = 1;
- }
}
+
}
}
static int
-levene_calc (struct ccase *c, void *_l)
+levene_calc (const struct ccase *c, void *_l)
{
- int i;
+ size_t i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
- double weight = dict_get_case_weight(default_dict,c);
-
+ double weight = dict_get_case_weight(default_dict,c,&warn);
/* Skip the entire case if /MISSING=LISTWISE is set */
if ( l->missing == LEV_LISTWISE )
for (i = 0; i < l->n_dep; ++i)
{
struct variable *v = l->v_dep[i];
- union value *val = &c->data[v->fv];
+ const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing (&v->miss, val) )
{
return 0;
}
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
struct variable *var = l->v_dep[i];
+ struct group_proc *gp = group_proc_get (var);
double levene_z;
- union value *v = &c->data[var->fv];
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(gp->group_hash,(void *) &key );
+
if ( 0 == gs )
continue ;
- if ( ! l->is_missing(v,var))
+ if ( ! l->is_missing(&var->miss, v))
{
levene_z= fabs(v->f - gs->mean);
lz[i].grand_total += levene_z * weight;
gs->lz_total += levene_z * weight;
}
+
}
return 0;
}
static void
levene_postcalc (void *_l)
{
- int v;
+ size_t v;
struct levene_info *l = (struct levene_info *) _l;
for (v = 0; v < l->n_dep; ++v)
{
+ /* This is Z_LL */
lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
-
}
+
}
static void
levene2_precalc (void *_l)
{
- int v;
+ size_t v;
struct levene_info *l = (struct levene_info *) _l;
- lz_denominator = (double *) xmalloc(sizeof(double) * l->n_dep);
+ lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator);
/* This stuff could go in the first post calc . . . */
for (v = 0; v < l->n_dep; ++v)
{
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = group_proc_get (var)->group_hash;
+
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
- g->lz_mean = g->lz_total/g->n ;
+ g->lz_mean = g->lz_total / g->n ;
}
lz_denominator[v] = 0;
}
}
static int
-levene2_calc (struct ccase *c, void *_l)
+levene2_calc (const struct ccase *c, void *_l)
{
- int i;
+ size_t i;
+ int warn = 0;
struct levene_info *l = (struct levene_info *) _l;
- double weight = dict_get_case_weight(default_dict,c);
+ double weight = dict_get_case_weight(default_dict,c,&warn);
- union value *gv = &c->data[l->v_indep->fv];
+ const union value *gv = case_data (c, l->v_indep->fv);
struct group_statistics key;
/* Skip the entire case if /MISSING=LISTWISE is set */
for (i = 0; i < l->n_dep; ++i)
{
struct variable *v = l->v_dep[i];
- union value *val = &c->data[v->fv];
+ const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing(&v->miss, val) )
{
return 0;
}
}
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
double levene_z;
struct variable *var = l->v_dep[i] ;
- union value *v = &c->data[var->fv];
+ const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
+
if ( 0 == gs )
continue;
- if ( ! l->is_missing(v,var) )
+ if ( ! l->is_missing (&var->miss, v) )
{
levene_z = fabs(v->f - gs->mean);
- lz_denominator[i] += weight * sqr(levene_z - gs->lz_mean);
+ lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
}
}
static void
levene2_postcalc (void *_l)
{
- int v;
+ size_t v;
struct levene_info *l = (struct levene_info *) _l;
double lz_numerator = 0;
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct group_proc *gp = group_proc_get (var);
+ struct hsh_table *hash = gp->group_hash;
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
+ lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
+ }
+ lz_numerator *= ( gp->ugs.n - gp->n_groups );
- lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean );
-
+ lz_denominator[v] *= (gp->n_groups - 1);
- }
- lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
- l->v_dep[v]->p.t_t.n_groups );
+ gp->levene = lz_numerator / lz_denominator[v] ;
- lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
-
- l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
}
/* Now clear up after ourselves */
free(lz_denominator);
- for (v = 0; v < l->n_dep; ++v)
- {
- hsh_destroy(hash[v]);
- }
-
- free(hash);
free(lz);
}
-
-/* Return the group belonging to the v_th dependent variable
- which matches the key */
-static struct group_statistics *
-get_group(int v, struct group_statistics *key)
-{
- struct group_statistics *gs;
- gs = hsh_find(hash[v],key);
-
-
- if ( ( !gs ) && inequality_compare)
- {
- /* Here we degrade to a linear search.
- This would seem inefficient. However, it should only ever happen
- with the T-TEST, for which there are exactly two groups */
-
- struct hsh_iterator hi;
-
- assert( hsh_count(hash[v]) == 2 ) ;
- for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
- gs != 0 ;
- gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
- {
- int cmp;
-
- cmp = compare_values(&gs->id, &key->id, 0);
-
- assert( cmp != 0 ); /* or else the hash would have found something */
-
- if ( cmp == -1 &&
- ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
- )
- break;
-
- if ( cmp == 1 &&
- ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )
- )
- break;
- }
- }
-
- return gs;
-}