Added (some of the) calculations for the examine command.

[pspp] / src / levene.c
diff --git a/src/levene.c b/src/levene.c

index 5b633746e305845416f3215a94e54d8a22c4e51a..e29e77629c110000b250a07667475e523522cda5 100644 (file)
--- a/src/levene.c
+++ b/src/levene.c
@@ -20,16 +20,17 @@
     02111-1307, USA. */
  
  #include <config.h>
+#include "levene.h"
  #include "error.h"
+#include "case.h"
  #include "casefile.h"
-#include "levene.h"
  #include "hash.h"
  #include "str.h"
  #include "var.h"
  #include "vfm.h"
  #include "alloc.h"
  #include "misc.h"
-
+#include "group.h"
  
  #include <math.h>
  #include <stdlib.h>
@@ -54,8 +55,6 @@
  
   */
  
-static struct group_statistics *get_group(int v, struct group_statistics *key);
-
  
  struct levene_info
  {
@@ -98,7 +97,7 @@ levene(const struct casefile *cf,
              enum lev_missing missing,   is_missing_func value_is_missing)
  {
    struct casereader *r;
-  const struct ccase *c;
+  struct ccase c;
    struct levene_info l;
  
    l.n_dep      = n_dep;
@@ -111,60 +110,26 @@ levene(const struct casefile *cf,
  
    levene_precalc(&l);
    for(r = casefile_get_reader (cf);
-      casereader_read (r, &c) ; ) 
+      casereader_read (r, &c) ;
+      case_destroy (&c)) 
      {
-      levene_calc(c,&l);
+      levene_calc(&c,&l);
      }
    casereader_destroy (r);
    levene_postcalc(&l);
  
    levene2_precalc(&l);
    for(r = casefile_get_reader (cf);
-      casereader_read (r, &c) ; ) 
+      casereader_read (r, &c) ;
+      case_destroy (&c)) 
      {
-      levene2_calc(c,&l);
+      levene2_calc(&c,&l);
      }
    casereader_destroy (r);
    levene2_postcalc(&l);
  
  }
  
-static struct hsh_table **hash;
-
-
-/* Return -1 if the id of a is less than b; +1 if greater than and 
-   0 if equal */
-static int 
-compare_group(const struct group_statistics *a, 
-                const struct group_statistics *b, 
-                int width)
-{
-  int id_cmp = compare_values(&a->id, &b->id, width);
-
-  if (id_cmp == 0 ) 
-    {
-      int c;
-      c= memcmp(&a->criterion,&b->criterion,sizeof(enum comparison));
-      return c;
-    }
-  else
-    return id_cmp;
-}
-
-
-static unsigned 
-hash_group(const struct group_statistics *g, int width)
-{
-  unsigned id_hash;
-
-  if ( 0 == width ) 
-    id_hash = hsh_hash_double (g->id.f);
-  else
-    id_hash = hsh_hash_bytes (g->id.s, width);
-
-  return id_hash;
-}
-
  /* Internal variables used in calculating the Levene statistic */
  
  /* Per variable statistics */
@@ -186,9 +151,6 @@ struct lz_stats
  /* An array of lz_stats for each variable */
  static struct lz_stats *lz;
  
-/* Set to 1 if the groups require inequality comparisions */ 
-static int inequality_compare;
-
  
  static void 
  levene_precalc (const struct levene_info *l)
@@ -197,33 +159,13 @@ levene_precalc (const struct levene_info *l)
  
    lz  = xmalloc (sizeof (struct lz_stats ) * l->n_dep ) ;
  
-  hash = xmalloc (sizeof ( struct hsh_table *) * l->n_dep );
-
    for(i=0; i < l->n_dep ; ++i ) 
      {
        struct variable *v = l->v_dep[i];
-      int g;
-      int number_of_groups = v->p.t_t.n_groups ; 
-
-      hash[i] = hsh_create (l->n_dep * number_of_groups,
-                           (hsh_compare_func *) compare_group, 
-                           (hsh_hash_func *) hash_group,
-                           0,(void *) l->v_indep->width);
  
        lz[i].grand_total = 0;
        lz[i].total_n = 0;
-      lz[i].n_groups = number_of_groups;
-
-      for (g = 0 ; g < v->p.t_t.n_groups ; ++g ) 
-       {
-         struct group_statistics *gs = &v->p.t_t.gs[g];
-         gs->lz_total = 0;
-         hsh_insert(hash[i], gs);
-         if ( gs->criterion != CMP_EQ ) 
-           {
-             inequality_compare = 1;
-           }
-       }
+      lz[i].n_groups = v->p.grp_data.n_groups ; 
      }
  
  }
@@ -232,11 +174,11 @@ static int
  levene_calc (const struct ccase *c, void *_l)
  {
    int i;
+  int warn = 0;
    struct levene_info *l = (struct levene_info *) _l;
-  const union value *gv = &c->data[l->v_indep->fv];
+  const union value *gv = case_data (c, l->v_indep->fv);
    struct group_statistics key;
-  double weight = dict_get_case_weight(default_dict,c); 
-
+  double weight = dict_get_case_weight(default_dict,c,&warn); 
  
    /* Skip the entire case if /MISSING=LISTWISE is set */
    if ( l->missing == LEV_LISTWISE ) 
@@ -244,7 +186,7 @@ levene_calc (const struct ccase *c, void *_l)
        for (i = 0; i < l->n_dep; ++i) 
         {
           struct variable *v = l->v_dep[i];
-         const union value *val = &c->data[v->fv];
+         const union value *val = case_data (c, v->fv);
  
           if (l->is_missing(val,v) )
             {
@@ -255,15 +197,16 @@ levene_calc (const struct ccase *c, void *_l)
  
    
    key.id = *gv;
-  key.criterion = CMP_EQ;
  
    for (i = 0; i < l->n_dep; ++i) 
      {
        struct variable *var = l->v_dep[i];
        double levene_z;
-      const union value *v = &c->data[var->fv];
+      const union value *v = case_data (c, var->fv);
        struct group_statistics *gs;
-      gs = get_group(i,&key); 
+
+      gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
+
        if ( 0 == gs ) 
         continue ;
  
@@ -289,8 +232,8 @@ levene_postcalc (void *_l)
  
    for (v = 0; v < l->n_dep; ++v) 
      {
+      /* This is Z_LL */
        lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
-
      }
  
  }
@@ -313,9 +256,14 @@ levene2_precalc (void *_l)
      {
        struct hsh_iterator hi;
        struct group_statistics *g;
-      for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+      struct variable *var = l->v_dep[v] ;
+      struct hsh_table *hash = var->p.grp_data.group_hash;
+
+
+      for(g = (struct group_statistics *) hsh_first(hash,&hi);
           g != 0 ;
-         g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+         g = (struct group_statistics *) hsh_next(hash,&hi) )
         {
           g->lz_mean = g->lz_total/g->n ;
         }
@@ -327,12 +275,13 @@ static int
  levene2_calc (const struct ccase *c, void *_l)
  {
    int i;
+  int warn = 0;
  
    struct levene_info *l = (struct levene_info *) _l;
  
-  double weight = dict_get_case_weight(default_dict,c); 
+  double weight = dict_get_case_weight(default_dict,c,&warn); 
  
-  const union value *gv = &c->data[l->v_indep->fv];
+  const union value *gv = case_data (c, l->v_indep->fv);
    struct group_statistics key;
  
    /* Skip the entire case if /MISSING=LISTWISE is set */
@@ -341,7 +290,7 @@ levene2_calc (const struct ccase *c, void *_l)
        for (i = 0; i < l->n_dep; ++i) 
         {
           struct variable *v = l->v_dep[i];
-         const union value *val = &c->data[v->fv];
+         const union value *val = case_data (c, v->fv);
  
           if (l->is_missing(val,v) )
             {
@@ -351,15 +300,16 @@ levene2_calc (const struct ccase *c, void *_l)
      }
  
    key.id = *gv;
-  key.criterion = CMP_EQ;
  
    for (i = 0; i < l->n_dep; ++i) 
      {
        double levene_z;
        struct variable *var = l->v_dep[i] ;
-      const union value *v = &c->data[var->fv];
+      const union value *v = case_data (c, var->fv);
        struct group_statistics *gs;
-      gs = get_group(i,&key); 
+
+      gs = hsh_find(var->p.grp_data.group_hash,(void *) &key );
+
        if ( 0 == gs ) 
         continue;
  
@@ -386,74 +336,27 @@ levene2_postcalc (void *_l)
        double lz_numerator = 0;
        struct hsh_iterator hi;
        struct group_statistics *g;
-      for(g = (struct group_statistics *) hsh_first(hash[v],&hi);
+
+      struct variable *var = l->v_dep[v] ;
+      struct hsh_table *hash = var->p.grp_data.group_hash;
+
+      for(g = (struct group_statistics *) hsh_first(hash,&hi);
           g != 0 ;
-         g = (struct group_statistics *) hsh_next(hash[v],&hi) )
+         g = (struct group_statistics *) hsh_next(hash,&hi) )
         {
-
           lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
-      
-
         }
-      lz_numerator *= ( l->v_dep[v]->p.t_t.ugs.n - 
-                       l->v_dep[v]->p.t_t.n_groups );
+      lz_numerator *= ( l->v_dep[v]->p.grp_data.ugs.n - 
+                       l->v_dep[v]->p.grp_data.n_groups );
  
-      lz_denominator[v] /= (l->v_dep[v]->p.t_t.n_groups - 1);
+      lz_denominator[v] *= (l->v_dep[v]->p.grp_data.n_groups - 1);
        
-      l->v_dep[v]->p.t_t.levene = lz_numerator/lz_denominator[v] ;
+      l->v_dep[v]->p.grp_data.levene = lz_numerator/lz_denominator[v] ;
+
      }
  
    /* Now clear up after ourselves */
    free(lz_denominator);
-  for (v = 0; v < l->n_dep; ++v) 
-    {
-      hsh_destroy(hash[v]);
-    }
-
-  free(hash);
    free(lz);
  }
  
-
-/* Return the group belonging to the v_th dependent variable
-   which matches the key */
-static struct group_statistics *
-get_group(int v, struct group_statistics *key)
-{
-  struct group_statistics *gs;
-  gs = hsh_find(hash[v],key);
-
-
-  if ( ( !gs )  && inequality_compare) 
-    {
-      /* Here we degrade to a linear search.
-        This would seem inefficient.  However, it should only ever happen 
-        with the T-TEST, for which there are exactly two groups */
-
-      struct hsh_iterator hi;
-
-      assert( hsh_count(hash[v]) == 2 ) ;
-      for(gs = (struct group_statistics *) hsh_first(hash[v],&hi);
-         gs != 0 ;
-         gs = (struct group_statistics *) hsh_next(hash[v],&hi) )
-       {
-         int cmp;
-
-         cmp = compare_values(&gs->id, &key->id, 0);
-
-         assert( cmp != 0 ); /* or else the hash would have found something */
-
-         if ( cmp == -1 && 
-              ( gs->criterion == CMP_GT || gs->criterion == CMP_GE ) 
-            ) 
-           break;
-
-         if ( cmp == 1 && 
-              ( gs->criterion == CMP_LT || gs->criterion == CMP_LE ) 
-            ) 
-           break;
-       }
-    }
-
-  return gs;
-}