From 49edfd56432f86f83dd3d4499a2744f19eccbea4 Mon Sep 17 00:00:00 2001
From: John Darrington <john@darrington.wattle.id.au>
Date: Wed, 23 Nov 2011 14:47:35 +0100
Subject: [PATCH] GLM: Implemented the Type I sums of squares and added a test
 for that.

---
 src/language/stats/glm.c    |  86 +++++++++++++++++++++++++--
 tests/language/stats/glm.at | 114 ++++++++++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+), 6 deletions(-)

diff --git a/src/language/stats/glm.c b/src/language/stats/glm.c
index 28f61bc68b..c7b62a6b69 100644
--- a/src/language/stats/glm.c
+++ b/src/language/stats/glm.c
@@ -345,9 +345,6 @@ error:
   return CMD_FAILURE;
 }
 
-static void get_ssq (struct covariance *, gsl_vector *,
-		     const struct glm_spec *);
-
 static inline bool
 not_dropped (size_t j, const bool *ff)
 {
@@ -380,9 +377,84 @@ fill_submatrix (const gsl_matrix * cov, gsl_matrix * submatrix, bool *dropped_f)
 	}
     }
 }
-	      
+
+
+/* 
+   Type 1 sums of squares.
+   Populate SSQ with the Type 1 sums of squares according to COV
+ */
+static void
+ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd)
+{
+  gsl_matrix *cm = covariance_calculate_unnormalized (cov);
+  size_t i;
+  size_t k;
+  bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped));
+  bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped));
+  const struct categoricals *cats = covariance_get_categoricals (cov);
+
+  size_t n_dropped_model = 0;
+  size_t n_dropped_submodel = 0;
+
+  for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
+    {
+      n_dropped_model++;
+      n_dropped_submodel++;
+      model_dropped[i] = true;
+      submodel_dropped[i] = true;
+    }
+
+  for (k = 0; k < cmd->n_interactions; k++)
+    {
+      gsl_matrix *model_cov = NULL;
+      gsl_matrix *submodel_cov = NULL;
+      
+      n_dropped_submodel = n_dropped_model;
+      for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
+	{
+	  submodel_dropped[i] = model_dropped[i];
+	}
+
+      for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++)
+	{
+	  const struct interaction * x = 
+	    categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars);
+
+	  if ( x == cmd->interactions [k])
+	    {
+	      model_dropped[i] = false;
+	      n_dropped_model--;
+	    }
+	}
+
+      model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model, cm->size2 - n_dropped_model);
+      submodel_cov = gsl_matrix_alloc (cm->size1 - n_dropped_submodel, cm->size2 - n_dropped_submodel);
+
+      fill_submatrix (cm, model_cov,    model_dropped);
+      fill_submatrix (cm, submodel_cov, submodel_dropped);
+
+      reg_sweep (model_cov, 0);
+      reg_sweep (submodel_cov, 0);
+
+      gsl_vector_set (ssq, k + 1,
+		      gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0)
+		      );
+
+      gsl_matrix_free (model_cov);
+      gsl_matrix_free (submodel_cov);
+    }
+
+  free (model_dropped);
+  free (submodel_dropped);
+  gsl_matrix_free (cm);
+}
+
+/* 
+   Type 2 sums of squares.
+   Populate SSQ with the Type 2 sums of squares according to COV
+ */
 static void
-get_ssq (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd)
+ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd)
 {
   gsl_matrix *cm = covariance_calculate_unnormalized (cov);
   size_t i;
@@ -541,10 +613,12 @@ run_glm (struct glm_spec *cmd, struct casereader *input,
     switch (cmd->ss_type)
       {
       case 1:
+	ssq_type1 (cov, ws.ssq, cmd);
 	break;
       case 2:
       case 3:
-	get_ssq (cov, ws.ssq, cmd);
+	/* Type 3 is not yet implemented :( but for balanced designs it is the same as type 2 */
+	ssq_type2 (cov, ws.ssq, cmd);
 	break;
       default:
 	NOT_REACHED ();
diff --git a/tests/language/stats/glm.at b/tests/language/stats/glm.at
index 394f4a2dfb..e6c961c7a3 100644
--- a/tests/language/stats/glm.at
+++ b/tests/language/stats/glm.at
@@ -125,3 +125,117 @@ Corrected Total,17833.918,19,,,
 ])
 
 AT_CLEANUP
+
+
+AT_SETUP([GLM Type 1 Sums of Squares])
+
+dnl  The following example comes from 
+dnl  http://www.uvm.edu/~dhowell/StatPages/More_Stuff/Type1-3.pdf
+AT_DATA([type1.sps], [dnl
+set decimal = dot.
+set format=F20.3.
+data list notable list /dv * Agrp * B0 * B1 * B2 * i0 * i1 * i2 * sss *.
+begin data.
+5 1 1 0 0 1 0 0 1.00
+7 1 1 0 0 1 0 0 1.00
+9 1 1 0 0 1 0 0 1.00
+8 1 1 0 0 1 0 0 1.00
+2 1 0 1 0 0 1 0 1.00
+5 1 0 1 0 0 1 0 1.00
+7 1 0 1 0 0 1 0 1.00
+3 1 0 1 0 0 1 0 1.00
+9 1 0 1 0 0 1 0 1.00
+8 1 0 0 1 0 0 1 1.00
+11 1 0 0 1 0 0 1 1.00
+12 1 0 0 1 0 0 1 1.00
+14 1 0 0 1 0 0 1 1.00
+11 1 -1 -1 -1 -1 -1 -1 1.00
+15 1 -1 -1 -1 -1 -1 -1 1.00
+16 1 -1 -1 -1 -1 -1 -1 1.00
+10 1 -1 -1 -1 -1 -1 -1 1.00
+9 1 -1 -1 -1 -1 -1 -1 1.00
+7 -1 1 0 0 -1 0 0 2.00
+9 -1 1 0 0 -1 0 0 2.00
+10 -1 1 0 0 -1 0 0 2.00
+9 -1 1 0 0 -1 0 0 2.00
+3 -1 0 1 0 0 -1 0 2.00
+8 -1 0 1 0 0 -1 0 2.00
+9 -1 0 1 0 0 -1 0 2.00
+11 -1 0 1 0 0 -1 0 2.00
+9 -1 0 0 1 0 0 -1 2.00
+12 -1 0 0 1 0 0 -1 2.00
+14 -1 0 0 1 0 0 -1 2.00
+8 -1 0 0 1 0 0 -1 2.00
+7 -1 0 0 1 0 0 -1 2.00
+11 -1 -1 -1 -1 1 1 1 2.00
+14 -1 -1 -1 -1 1 1 1 2.00
+10 -1 -1 -1 -1 1 1 1 2.00
+12 -1 -1 -1 -1 1 1 1 2.00
+13 -1 -1 -1 -1 1 1 1 2.00
+11 -1 -1 -1 -1 1 1 1 2.00
+12 -1 -1 -1 -1 1 1 1 2.00
+end data.
+
+do if B0 = -1 AND B1 = -1 AND B2 = -1.
+compute Bgrp = -1.
+end if.
+
+do if B0 = 0 AND B1 = 0 AND B2 = 1.
+compute Bgrp = 1.
+end if.
+
+do if B0 = 0 AND B1 = 1 AND B2 = 0.
+compute Bgrp = 2.
+end if.
+
+do if B0 = 1 AND B1 = 0 AND B2 = 0.
+compute Bgrp = 3.
+end if.
+
+
+do if B0 = 0 AND B1 = 1 AND B2 = 0.
+compute Bgrp = 4.
+end if.
+
+
+glm dv by Agrp Bgrp
+	/method = sstype (1)
+	.
+
+glm dv by Agrp Bgrp
+	/method = sstype (1)
+	/design Bgrp Agrp Bgrp * Agrp
+	.
+])
+
+
+AT_CHECK([pspp -O format=csv type1.sps], [0],
+  [dnl
+warning: GLM is experimental.  Do not rely on these results.
+
+Table: Tests of Between-Subjects Effects
+Source,Type I Sum of Squares,df,Mean Square,F,Sig.
+Corrected Model,216.017,7,30.860,5.046,.001
+Intercept,3410.526,1,3410.526,557.709,.000
+Agrp,9.579,1,9.579,1.566,.220
+Bgrp,186.225,3,62.075,10.151,.000
+Agrp * Bgrp,20.212,3,6.737,1.102,.364
+Error,183.457,30,6.115,,
+Total,3810.000,38,,,
+Corrected Total,399.474,37,,,
+
+warning: GLM is experimental.  Do not rely on these results.
+
+Table: Tests of Between-Subjects Effects
+Source,Type I Sum of Squares,df,Mean Square,F,Sig.
+Corrected Model,216.017,7,30.860,5.046,.001
+Intercept,3410.526,1,3410.526,557.709,.000
+Bgrp,193.251,3,64.417,10.534,.000
+Agrp,2.553,1,2.553,.418,.523
+Bgrp * Agrp,20.212,3,6.737,1.102,.364
+Error,183.457,30,6.115,,
+Total,3810.000,38,,,
+Corrected Total,399.474,37,,,
+])
+
+AT_CLEANUP
-- 
2.30.2