corrected increment of lag

[pspp-builds.git] / src / math / ts / innovations.c
diff --git a/src/math/ts/innovations.c b/src/math/ts/innovations.c

index 6870c4e4d37c4fcd601ca5c556996ae3bdea7235..cec669339f47f5a538fc4d3acd5495f383096376 100644 (file)
--- a/src/math/ts/innovations.c
+++ b/src/math/ts/innovations.c
@@ -83,15 +83,18 @@ get_mean_variance (size_t n_vars, const struct casefile *cf,
    Read the first MAX_LAG cases.
   */
  static bool
-innovations_init_cases (struct casereader *r, struct ccase **c, size_t max_lag)
+innovations_init_cases (struct casereader *r, struct ccase **inn_cs, size_t max_lag)
  {
    bool value = true;
    size_t lag = 0;
  
+  assert (r != NULL);
+  assert (inn_cs != NULL);
    while (value && lag < max_lag)
      {
+      assert (inn_cs[lag] != NULL);
+      value = casereader_read (r, inn_cs[lag]);
        lag++;
-      value = casereader_read (r, c[lag]);
      }
    return value;
  }
@@ -117,7 +120,7 @@ get_covariance (size_t n_vars, const struct casefile *cf,
                 struct innovations_estimate **est, size_t max_lag)
  {
    struct casereader *r;
-  struct ccase **c;
+  struct ccase **inn_c;
    size_t lag;
    size_t n;
    bool read_case = false;
@@ -126,27 +129,27 @@ get_covariance (size_t n_vars, const struct casefile *cf,
    const union value *tmp;
    const union value *tmp2;
  
-  c = xnmalloc (max_lag, sizeof (*c));
+  inn_c = xnmalloc (max_lag, sizeof (*inn_c));
    
    for (lag = 0; lag < max_lag; lag++)
      {
-      c[lag] = xmalloc (sizeof *c[lag]);
+      inn_c[lag] = xmalloc (sizeof *inn_c[lag]);
      }
  
    r = casefile_get_reader (cf);
-  read_case = innovations_init_cases (r, c, max_lag);
+  read_case = innovations_init_cases (r, inn_c, max_lag);
  
    while (read_case)
      {
        for (n = 0; n < n_vars; n++)
         {
-         tmp2 = case_data (c[0], est[n]->variable->fv);
+         tmp2 = case_data (inn_c[0], est[n]->variable->fv);
           if (!mv_is_value_missing (&est[n]->variable->miss, tmp2))
             {
               x = tmp2->f - est[n]->mean;
               for (lag = 1; lag <= max_lag; lag++)
                 {
-                 tmp = case_data (c[lag], est[n]->variable->fv);
+                 tmp = case_data (inn_c[lag], est[n]->variable->fv);
                   if (!mv_is_value_missing (&est[n]->variable->miss, tmp))
                     {
                       d = (tmp->f - est[n]->mean);
@@ -155,7 +158,7 @@ get_covariance (size_t n_vars, const struct casefile *cf,
                 }
             }
         }
-      read_case = innovations_update_cases (r, c, max_lag);
+      read_case = innovations_update_cases (r, inn_c, max_lag);
      }
    for (lag = 0; lag <= max_lag; lag++)
      {
@@ -166,9 +169,9 @@ get_covariance (size_t n_vars, const struct casefile *cf,
      }
    for (lag = 0; lag < max_lag; lag++)
      {
-      free (c[lag]);
+      free (inn_c[lag]);
      }
-  free (c);
+  free (inn_c);
  }
  static double
  innovations_convolve (double **theta, struct innovations_estimate *est,
@@ -179,39 +182,101 @@ innovations_convolve (double **theta, struct innovations_estimate *est,
  
    for (k = 0; k < i; k++)
      {
-      result += theta[i][i-k] * theta[j][i-j] * est->cov[k];
+      result += theta[i-1][i-k-1] * theta[j-1][j-k-1] * est->scale[k];
      }
    return result;
  }
+static void
+innovations_update_scale (struct innovations_estimate *est, double *theta,
+                         size_t i)
+{
+  double result = 0.0;
+  size_t j;
+  size_t k;
+
+
+  result = est->cov[0];
+  for (j = 0; j < i; j++)
+    {
+      k = i - j;
+      result -= theta[k] * theta[k] * est->scale[j];
+    }
+  est->scale[i] = result;
+}
+
  static void
  get_coef (size_t n_vars, const struct casefile *cf, 
                 struct innovations_estimate **est, size_t max_lag)
  {
-  int j;
-  int i;
-  int k;
+  size_t j;
+  size_t i;
+  size_t k;
    size_t n;
    double v;
    double **theta;
  
+  theta = xnmalloc (max_lag, sizeof (*theta));
+  for (i = 0; i < max_lag; i++)
+    {
+      theta[i] = xnmalloc (i+1, sizeof (theta[i]));
+
+    }
    for (n = 0; n < n_vars; n++)
      {
+      for (i = 0; i < max_lag; i++)
+       {
+         for (j = 0; j < i; j++)
+           {
+             theta[i][j] = 0.0;
+           }
+       }
+      innovations_update_scale (est[n], theta[0], 0);
        for (i = 0; i < max_lag; i++)
         {
           v = est[n]->cov[i];
           for (j = 0; j < i; j++)
             {
               k = i - j;
-             theta[i][k] = est[n]->cov[k] - 
-               innovations_convolve (theta, est, i, j);
+             theta[i-1][k-1] = est[n]->cov[k] - 
+               innovations_convolve (theta, est[n], i, j);
             }
+         innovations_update_scale (est[n], theta[i], i);
         }
+      /* Copy the final row of coefficients into EST->COEFF.*/
+      for (i = 0; i < max_lag; i++)
+       {
+         /*
+           The order of storage here means that the best predicted value
+           for the time series is computed as follows:
+
+           Let X[m], X[m-1],... denote the original series.
+           Let X_hat[0] denote the best predicted value of X[0],
+           X_hat[1] denote the projection of X[1] onto the subspace
+           spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the 
+           projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
+           X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
+
+           Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
+                         + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
+                         ...
+                         + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
+
+           (That is what X_hat[m] SHOULD be, anyway. These routines need
+           to be tested.)
+          */
+         pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
+       }
+    }
+  for (i = 0; i < max_lag; i++)
+    {
+      free (theta[i]);
      }
+  free (theta);
  }
  
  struct innovations_estimate ** 
  pspp_innovations (const struct variable **vars, 
-                 size_t *n_vars,
+                 size_t n_vars,
                   size_t lag, 
                   const struct casefile *cf)
  {
@@ -219,8 +284,8 @@ pspp_innovations (const struct variable **vars,
    size_t i;
    size_t j;
  
-  est = xnmalloc (*n_vars, sizeof *est);
-  for (i = 0; i < *n_vars; i++)
+  est = xnmalloc (n_vars, sizeof *est);
+  for (i = 0; i < n_vars; i++)
      {
        if (vars[i]->type == NUMERIC)
         {
@@ -228,7 +293,8 @@ pspp_innovations (const struct variable **vars,
           est[i]->variable = vars[i];
           est[i]->mean = 0.0;
           est[i]->variance = 0.0;
-         est[i]->cov = xnmalloc (lag, sizeof (est[i]->cov));
+         est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
+         est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
           est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
           for (j = 0; j < lag; j++)
             {
@@ -237,15 +303,15 @@ pspp_innovations (const struct variable **vars,
         }
        else
         {
-         *n_vars--;
+         n_vars--;
  /*       msg (MW, _("Cannot compute autocovariance for a non-numeric variable %s"), */
  /*                  var_to_string (vars[i])); */
         }
      }
  
-  get_mean_variance (*n_vars, cf, est);
-  get_covariance (*n_vars, cf, est, lag);
-  get_coef (*n_vars, cf, est, lag);
+  get_mean_variance (n_vars, cf, est);
+  get_covariance (n_vars, cf, est, lag);
+  get_coef (n_vars, cf, est, lag);
    
    return est;
  }