You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "levene.h"
#include "error.h"
#include "case.h"
#include "casefile.h"
+#include "dictionary.h"
+#include "group_proc.h"
#include "hash.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
#include "misc.h"
-
+#include "group.h"
#include <math.h>
#include <stdlib.h>
*/
-static struct group_statistics *get_group(int v, struct group_statistics *key);
-
struct levene_info
{
enum lev_missing missing;
/* Function to test for missing values */
- is_missing_func is_missing;
-
+ is_missing_func *is_missing;
};
/* First pass */
}
-static struct hsh_table **hash;
-
-
-/* Return -1 if the id of a is less than b; +1 if greater than and
- 0 if equal */
-static int
-compare_group(const struct group_statistics *a,
- const struct group_statistics *b,
- int width)
-{
- int id_cmp = compare_values(&a->id, &b->id, width);
-
- if (id_cmp == 0 )
- {
- int c;
- c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
- return c;
- }
- else
- return id_cmp;
-}
-
-
-static unsigned
-hash_group(const struct group_statistics *g, int width)
-{
- unsigned id_hash;
-
- if ( 0 == width )
- id_hash = hsh_hash_double (g->id.f);
- else
- id_hash = hsh_hash_bytes (g->id.s, width);
-
- return id_hash;
-}
-
/* Internal variables used in calculating the Levene statistic */
/* Per variable statistics */
/* An array of lz_stats for each variable */
static struct lz_stats *lz;
-/* Set to 1 if the groups require inequality comparisions */
-static int inequality_compare;
-
static void
levene_precalc (const struct levene_info *l)
lz = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
- hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
-
- for(i=0; i < l->n_dep ; ++i )
+ for(i = 0; i < l->n_dep ; ++i )
{
- struct variable *v = l->v_dep[i];
- int g;
- int number_of_groups = v->p.t_t.n_groups ;
-
- hash[i] = hsh_create (l->n_dep * number_of_groups,
- (hsh_compare_func *) compare_group,
- (hsh_hash_func *) hash_group,
- 0,(void *) l->v_indep->width);
+ struct variable *var = l->v_dep[i];
+ struct group_proc *gp = group_proc_get (var);
+ struct group_statistics *gs;
+ struct hsh_iterator hi;
lz[i].grand_total = 0;
lz[i].total_n = 0;
- lz[i].n_groups = number_of_groups;
+ lz[i].n_groups = gp->n_groups ;
- for (g = 0 ; g < v->p.t_t.n_groups ; ++g )
+
+ for ( gs = hsh_first(gp->group_hash, &hi);
+ gs != 0;
+ gs = hsh_next(gp->group_hash, &hi))
{
- struct group_statistics *gs = &v->p.t_t.gs[g];
gs->lz_total = 0;
- hsh_insert(hash[i], gs);
- if ( gs->criterion != CMP_EQ )
- {
- inequality_compare = 1;
- }
}
+
}
}
struct group_statistics key;
double weight = dict_get_case_weight(default_dict,c,&warn);
-
/* Skip the entire case if /MISSING=LISTWISE is set */
if ( l->missing == LEV_LISTWISE )
{
struct variable *v = l->v_dep[i];
const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing (&v->miss, val) )
{
return 0;
}
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
struct variable *var = l->v_dep[i];
+ struct group_proc *gp = group_proc_get (var);
double levene_z;
const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(gp->group_hash,(void *) &key );
+
if ( 0 == gs )
continue ;
- if ( ! l->is_missing(v,var))
+ if ( ! l->is_missing(&var->miss, v))
{
levene_z= fabs(v->f - gs->mean);
lz[i].grand_total += levene_z * weight;
gs->lz_total += levene_z * weight;
}
+
}
return 0;
}
for (v = 0; v < l->n_dep; ++v)
{
+ /* This is Z_LL */
lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
-
}
+
}
{
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct hsh_table *hash = group_proc_get (var)->group_hash;
+
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
- g->lz_mean = g->lz_total/g->n ;
+ g->lz_mean = g->lz_total / g->n ;
}
lz_denominator[v] = 0;
}
struct variable *v = l->v_dep[i];
const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing(&v->miss, val) )
{
return 0;
}
}
key.id = *gv;
- key.criterion = CMP_EQ;
for (i = 0; i < l->n_dep; ++i)
{
struct variable *var = l->v_dep[i] ;
const union value *v = case_data (c, var->fv);
struct group_statistics *gs;
- gs = get_group(i,&key);
+
+ gs = hsh_find(group_proc_get (var)->group_hash,(void *) &key );
+
if ( 0 == gs )
continue;
- if ( ! l->is_missing(v,var) )
+ if ( ! l->is_missing (&var->miss, v) )
{
levene_z = fabs(v->f - gs->mean);
lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
double lz_numerator = 0;
struct hsh_iterator hi;
struct group_statistics *g;
- for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+ struct variable *var = l->v_dep[v] ;
+ struct group_proc *gp = group_proc_get (var);
+ struct hsh_table *hash = gp->group_hash;
+
+ for(g = (struct group_statistics *) hsh_first(hash,&hi);
g != 0 ;
- g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+ g = (struct group_statistics *) hsh_next(hash,&hi) )
{
-
lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
-
-
}
- lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n -
- l->v_dep[v]->p.t_t.n_groups );
+ lz_numerator *= ( gp->ugs.n - gp->n_groups );
+
+ lz_denominator[v] *= (gp->n_groups - 1);
+
+ gp->levene = lz_numerator / lz_denominator[v] ;
- lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
-
- l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
}
/* Now clear up after ourselves */
free(lz_denominator);
- for (v = 0; v < l->n_dep; ++v)
- {
- hsh_destroy(hash[v]);
- }
-
- free(hash);
free(lz);
}
-
-/* Return the group belonging to the v_th dependent variable
- which matches the key */
-static struct group_statistics *
-get_group(int v, struct group_statistics *key)
-{
- struct group_statistics *gs;
- gs = hsh_find(hash[v],key);
-
-
- if ( ( !gs ) && inequality_compare)
- {
- /* Here we degrade to a linear search.
- This would seem inefficient. However, it should only ever happen
- with the T-TEST, for which there are exactly two groups */
-
- struct hsh_iterator hi;
-
- assert( hsh_count(hash[v]) == 2 ) ;
- for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
- gs != 0 ;
- gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
- {
- int cmp;
-
- cmp = compare_values(&gs->id, &key->id, 0);
-
- assert( cmp != 0 ); /* or else the hash would have found something */
-
- if ( cmp == -1 &&
- ( gs->criterion == CMP_GT || gs->criterion == CMP_GE )
- )
- break;
-
- if ( cmp == 1 &&
- ( gs->criterion == CMP_LT || gs->criterion == CMP_LE )
- )
- break;
- }
- }
-
- return gs;
-}