X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fts%2Finnovations.c;h=089665acb94bd9ee54b8914f7a472c72af10dd31;hb=f70f1b22e925d55c246372376de1c6ffaacf8a4b;hp=131284459096b2056b79c86e92b3b290c09bfea4;hpb=4dc2ebcfd1a113b25f6997ff3b66fa52ac41158b;p=pspp-builds.git

diff --git a/src/math/ts/innovations.c b/src/math/ts/innovations.c
index 13128445..089665ac 100644
--- a/src/math/ts/innovations.c
+++ b/src/math/ts/innovations.c
@@ -38,8 +38,8 @@
 #include <math/ts/innovations.h>
 
 static void
-get_mean_variance (const gsl_matrix *data,
-		   struct innovations_estimate **est)
+get_mean (const gsl_matrix *data,
+	  struct innovations_estimate **est)
 		   
 {
   size_t n;
@@ -49,9 +49,8 @@ get_mean_variance (const gsl_matrix *data,
 
   for (n = 0; n < data->size2; n++)
     {
-      est[n]->n_obs = 2.0;
+      est[n]->n_obs = 0.0;
       est[n]->mean = 0.0;
-      est[n]->variance = 0.0;
     }
   for (i = 0; i < data->size1; i++)
     {
@@ -60,20 +59,36 @@ get_mean_variance (const gsl_matrix *data,
 	  tmp = gsl_matrix_get (data, i, n);
 	  if (!gsl_isnan (tmp))
 	    {
+	      est[n]->n_obs += 1.0;
 	      d = (tmp - est[n]->mean) / est[n]->n_obs;
 	      est[n]->mean += d;
-	      est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
-	      est[n]->n_obs += 1.0;
 	    }
 	}
     }
-  for (n = 0; n < data->size2; n++)
+}
+static void 
+update_cov (struct innovations_estimate **est, gsl_vector_const_view x,
+	    gsl_vector_const_view y, size_t lag)
+{
+  size_t j;
+  double xj;
+  double yj;
+
+  for (j = 0; j < x.vector.size; j++)
     {
-      /* Maximum likelihood estimate of the variance. */
-      est[n]->variance /= est[n]->n_obs;
+      xj = gsl_vector_get (&x.vector, j);
+      yj = gsl_vector_get (&y.vector, j);
+      if (!gsl_isnan (xj))
+	{
+	  if (!gsl_isnan (yj))
+	    {
+	      xj -= est[j]->mean;
+	      yj -= est[j]->mean;
+	      *(est[j]->cov + lag) += xj * yj;
+	    }
+	}
     }
 }
-
 static int
 get_covariance (const gsl_matrix *data, 
 		struct innovations_estimate **est, size_t max_lag)
@@ -81,44 +96,41 @@ get_covariance (const gsl_matrix *data,
   size_t lag;
   size_t j;
   size_t i;
-  double x;
-  double y;
   int rc = 1;
 
   assert (data != NULL);
   assert (est != NULL);
-  
+
+  for (j = 0; j < data->size2; j++)
+    {
+      for (lag = 0; lag <= max_lag; lag++)
+	{
+	  *(est[j]->cov + lag) = 0.0;
+	}
+    }
+  /*
+    The rows are in the outer loop because a gsl_matrix is stored in
+    row-major order.
+   */
   for (i = 0; i < data->size1; i++)
     {
-      for (j = 0; j < data->size2; j++)
+      for (lag = 0; lag < max_lag && lag < data->size1 - i; lag++)
 	{
-	  x = gsl_matrix_get (data, i, j);
-
-	  if (!gsl_isnan (x))
-	    {
-	      x -= est[j]->mean;
-	      for (lag = 1; lag <= max_lag && lag < data->size1 - max_lag; lag++)
-		{
-		  y = gsl_matrix_get (data, i + lag, j);
-		  if (!gsl_isnan (y))
-		    {
-		      y -= est[j]->mean;
-		      *(est[j]->cov + lag) += y * x;
-		      est[i]->n_obs += 1.0;
-		    }
-		}
-	    }
+	  update_cov (est, gsl_matrix_const_row (data, i), 
+		      gsl_matrix_const_row (data, i + lag), lag);
 	}
     }
-  for (lag = 0; lag <= max_lag && lag < data->size1 - max_lag; lag++)
+  for (j = 0; j < data->size2; j++)
     {
-      for (j = 0; j < data->size2; j++)
+      for (lag = 0; lag <= max_lag; lag++)
 	{
-	  *(est[j]->cov + lag) /= (est[j]->n_obs - lag);
+	  *(est[j]->cov + lag) /= est[j]->n_obs;
 	}
     }
+
   return rc;
 }
+
 static double
 innovations_convolve (double **theta, struct innovations_estimate *est,
 		      int i, int j)
@@ -126,9 +138,9 @@ innovations_convolve (double **theta, struct innovations_estimate *est,
   int k;
   double result = 0.0;
 
-  for (k = 0; k < i; k++)
+  for (k = 0; k < j; k++)
     {
-      result += theta[i-1][i-k-1] * theta[j-1][j-k-1] * est->scale[k];
+      result += theta[i-1][i-k-1] * theta[j][j-k-1] * est->scale[k];
     }
   return result;
 }
@@ -140,54 +152,70 @@ innovations_update_scale (struct innovations_estimate *est, double *theta,
   size_t j;
   size_t k;
 
+  if (i < (size_t) est->max_lag)
+    {
+      result = est->cov[0];
+      for (j = 0; j < i; j++)
+	{
+	  k = i - j - 1;
+	  result -= theta[k] * theta[k] * est->scale[j];
+	}
+      est->scale[i] = result;
+    }
+}
+static void
+init_theta (double **theta, size_t max_lag)
+{
+  size_t i;
+  size_t j;
 
-  result = est->cov[0];
-  for (j = 0; j < i; j++)
+  for (i = 0; i < max_lag; i++)
     {
-      k = i - j;
-      result -= theta[k] * theta[k] * est->scale[j];
+      for (j = 0; j <= i; j++)
+	{
+	  theta[i][j] = 0.0;
+	}
     }
-  est->scale[i] = result;
 }
+static void
+innovations_update_coeff (double **theta, struct innovations_estimate *est,
+			  size_t max_lag)
+{
+  size_t i;
+  size_t j;
+  size_t k;
 
+  for (i = 0; i < max_lag; i++)
+    {
+      for (j = 0; j <= i; j++)
+	{
+	  k = i - j;
+	  theta[i][k] = (est->cov[k] - 
+	    innovations_convolve (theta, est, i, j))
+	    / est->scale[k];
+	}
+      innovations_update_scale (est, theta[i], i + 1);
+    }  
+}
 static void
 get_coef (const gsl_matrix *data,
 	  struct innovations_estimate **est, size_t max_lag)
 {
-  size_t j;
   size_t i;
-  size_t k;
   size_t n;
-  double v;
   double **theta;
 
   theta = xnmalloc (max_lag, sizeof (*theta));
   for (i = 0; i < max_lag; i++)
     {
-      theta[i] = xnmalloc (i+1, sizeof (theta[i]));
-
+      theta[i] = xnmalloc (max_lag, sizeof (**(theta + i)));
     }
+
   for (n = 0; n < data->size2; n++)
     {
-      for (i = 0; i < max_lag; i++)
-	{
-	  for (j = 0; j < i; j++)
-	    {
-	      theta[i][j] = 0.0;
-	    }
-	}
+      init_theta (theta, max_lag);
       innovations_update_scale (est[n], theta[0], 0);
-      for (i = 0; i < max_lag; i++)
-	{
-	  v = est[n]->cov[i];
-	  for (j = 0; j < i; j++)
-	    {
-	      k = i - j;
-	      theta[i-1][k-1] = est[n]->cov[k] - 
-		innovations_convolve (theta, est[n], i, j);
-	    }
-	  innovations_update_scale (est[n], theta[i], i);
-	}
+      innovations_update_coeff (theta, est[n], max_lag);
       /* Copy the final row of coefficients into EST->COEFF.*/
       for (i = 0; i < max_lag; i++)
 	{
@@ -213,6 +241,7 @@ get_coef (const gsl_matrix *data,
 	  pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 	}
     }
+
   for (i = 0; i < max_lag; i++)
     {
       free (theta[i]);
@@ -220,32 +249,78 @@ get_coef (const gsl_matrix *data,
   free (theta);
 }
 
+static void
+innovations_struct_init (struct innovations_estimate *est, 
+			 const struct design_matrix *dm, 
+			 size_t lag)
+{
+  size_t j;
+
+  est->mean = 0.0;
+  /* COV[0] stores the lag 0 covariance (i.e., the variance), COV[1]
+     holds the lag-1 covariance, etc.
+   */
+  est->cov = xnmalloc (lag + 1, sizeof (*est->cov));
+  est->scale = xnmalloc (lag + 1, sizeof (*est->scale));
+  est->coeff = xnmalloc (lag, sizeof (*est->coeff)); /* No intercept. */
+
+  /*
+    The loop below is an unusual use of PSPP_COEFF_INIT(). In a
+    typical model, one column of a DESIGN_MATRIX has one
+    coefficient. But in a time-series model, one column has many
+    coefficients.
+   */
+  for (j = 0; j < lag; j++)
+    {
+      pspp_coeff_init (est->coeff + j, dm);
+    }
+  est->max_lag = (double) lag;
+}
+      
 struct innovations_estimate ** 
-pspp_innovations (const gsl_matrix *data, size_t lag)
+pspp_innovations (const struct design_matrix *dm, size_t lag)
 {
   struct innovations_estimate **est;
   size_t i;
-  size_t j;
 
-  est = xnmalloc (data->size2, sizeof *est);
-  for (i = 0; i < data->size2; i++)
+  est = xnmalloc (dm->m->size2, sizeof *est);
+  for (i = 0; i < dm->m->size2; i++)
     {
-      est[i] = xmalloc (sizeof **est);
+      est[i] = xmalloc (sizeof *est[i]);
 /*       est[i]->variable = vars[i]; */
-      est[i]->mean = 0.0;
-      est[i]->variance = 0.0;
-      est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
-      est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
-      est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
-      for (j = 0; j < lag; j++)
-	{
-	  est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j)));
-	}
+      innovations_struct_init (est[i], dm, lag);
     }
 
-  get_mean_variance (data, est);
-  get_covariance (data, est, lag);
-  get_coef (data, est, lag);
+  get_mean (dm->m, est);
+  get_covariance (dm->m, est, lag);
+  get_coef (dm->m, est, lag);
   
   return est;
 }
+
+static void 
+pspp_innovations_free_one (struct innovations_estimate *est)
+{
+  size_t i;
+
+  assert (est != NULL);
+  for (i = 0; i < (size_t) est->max_lag; i++)
+    {
+      pspp_coeff_free (est->coeff[i]);
+    }
+  free (est->scale);
+  free (est->cov);
+  free (est);
+}
+
+void pspp_innovations_free (struct innovations_estimate **est, size_t n)
+{
+  size_t i;
+
+  assert (est != NULL);
+  for (i = 0; i < n; i++)
+    {
+      pspp_innovations_free_one (est[i]);
+    }
+  free (est);
+}