src/math/ts/innovations.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2006, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 /*
  18   Find preliminary ARMA coefficients via the innovations algorithm.
  19   Also compute the sample mean and covariance matrix for each series.
  20
  21   Reference:
  22
  23   P. J. Brockwell and R. A. Davis. Time Series: Theory and
  24   Methods. Second edition. Springer. New York. 1991. ISBN
  25   0-387-97429-6. Sections 5.2, 8.3 and 8.4.
  26  */
  27
  28 #include <config.h>
  29
  30 #include "math/ts/innovations.h"
  31
  32 #include <gsl/gsl_matrix.h>
  33 #include <gsl/gsl_vector.h>
  34 #include <math.h>
  35 #include <stdlib.h>
  36
  37 #include "libpspp/compiler.h"
  38 #include "libpspp/misc.h"
  39 #include "math/coefficient.h"
  40
  41 #include "gl/xalloc.h"
  42
  43 static void
  44 get_mean (const gsl_matrix *data,
  45           struct innovations_estimate **est)
  46
  47 {
  48   size_t n;
  49   size_t i;
  50   double d;
  51   double tmp;
  52
  53   for (n = 0; n < data->size2; n++)
  54     {
  55       est[n]->n_obs = 0.0;
  56       est[n]->mean = 0.0;
  57     }
  58   for (i = 0; i < data->size1; i++)
  59     {
  60       for (n = 0; n < data->size2; n++)
  61         {
  62           tmp = gsl_matrix_get (data, i, n);
  63           if (!isnan (tmp))
  64             {
  65               est[n]->n_obs += 1.0;
  66               d = (tmp - est[n]->mean) / est[n]->n_obs;
  67               est[n]->mean += d;
  68             }
  69         }
  70     }
  71 }
  72 static void
  73 update_cov (struct innovations_estimate **est, gsl_vector_const_view x,
  74             gsl_vector_const_view y, size_t lag)
  75 {
  76   size_t j;
  77   double xj;
  78   double yj;
  79
  80   for (j = 0; j < x.vector.size; j++)
  81     {
  82       xj = gsl_vector_get (&x.vector, j);
  83       yj = gsl_vector_get (&y.vector, j);
  84       if (!isnan (xj))
  85         {
  86           if (!isnan (yj))
  87             {
  88               xj -= est[j]->mean;
  89               yj -= est[j]->mean;
  90               *(est[j]->cov + lag) += xj * yj;
  91             }
  92         }
  93     }
  94 }
  95 static int
  96 get_covariance (const gsl_matrix *data,
  97                 struct innovations_estimate **est, size_t max_lag)
  98 {
  99   size_t lag;
 100   size_t j;
 101   size_t i;
 102   int rc = 1;
 103
 104   assert (data != NULL);
 105   assert (est != NULL);
 106
 107   for (j = 0; j < data->size2; j++)
 108     {
 109       for (lag = 0; lag <= max_lag; lag++)
 110         {
 111           *(est[j]->cov + lag) = 0.0;
 112         }
 113     }
 114   /*
 115     The rows are in the outer loop because a gsl_matrix is stored in
 116     row-major order.
 117    */
 118   for (i = 0; i < data->size1; i++)
 119     {
 120       for (lag = 0; lag <= max_lag && lag < data->size1 - i; lag++)
 121         {
 122           update_cov (est, gsl_matrix_const_row (data, i),
 123                       gsl_matrix_const_row (data, i + lag), lag);
 124         }
 125     }
 126   for (j = 0; j < data->size2; j++)
 127     {
 128       for (lag = 0; lag <= max_lag; lag++)
 129         {
 130           *(est[j]->cov + lag) /= est[j]->n_obs;
 131         }
 132     }
 133
 134   return rc;
 135 }
 136
 137 static double
 138 innovations_convolve (double *x, double *y, struct innovations_estimate *est,
 139                       int i)
 140 {
 141   int k;
 142   double result = 0.0;
 143
 144   assert (x != NULL && y != NULL);
 145   assert (est != NULL);
 146   assert (est->scale != NULL);
 147   assert (i > 0);
 148   for (k = 0; k < i; k++)
 149     {
 150       result += x[k] * y[k] * est->scale[i-k-1];
 151     }
 152   return result;
 153 }
 154 static void
 155 innovations_update_scale (struct innovations_estimate *est, double *theta,
 156                           size_t i)
 157 {
 158   double result = 0.0;
 159   size_t j;
 160   size_t k;
 161
 162   if (i < (size_t) est->max_lag)
 163     {
 164       result = est->cov[0];
 165       for (j = 0; j < i; j++)
 166         {
 167           k = i - j - 1;
 168           result -= pow2 (theta[k]) * est->scale[j];
 169         }
 170       est->scale[i] = result;
 171     }
 172 }
 173 static void
 174 init_theta (double **theta, size_t max_lag)
 175 {
 176   size_t i;
 177   size_t j;
 178
 179   for (i = 0; i < max_lag; i++)
 180     {
 181       for (j = 0; j <= i; j++)
 182         {
 183           theta[i][j] = 0.0;
 184         }
 185     }
 186 }
 187 static void
 188 innovations_update_coeff (double **theta, struct innovations_estimate *est,
 189                           size_t max_lag)
 190 {
 191   size_t i;
 192   size_t j;
 193   size_t k;
 194
 195   for (i = 0; i < max_lag; i++)
 196     {
 197       theta[i][i] = est->cov[i+1] / est->scale[0];
 198       for (j = 1; j <= i; j++)
 199         {
 200           k = i - j;
 201           theta[i][k] = (est->cov[k+1] -
 202                          innovations_convolve (theta[i] + k + 1, theta[j - 1], est, j))
 203             / est->scale[j];
 204         }
 205       innovations_update_scale (est, theta[i], i + 1);
 206     }
 207 }
 208 static void
 209 get_coef (const gsl_matrix *data,
 210           struct innovations_estimate **est, size_t max_lag)
 211 {
 212   size_t i;
 213   size_t n;
 214   double **theta;
 215
 216   theta = xnmalloc (max_lag, sizeof (*theta));
 217   for (i = 0; i < max_lag; i++)
 218     {
 219       theta[i] = xnmalloc (max_lag, sizeof (**(theta + i)));
 220     }
 221
 222   for (n = 0; n < data->size2; n++)
 223     {
 224       init_theta (theta, max_lag);
 225       innovations_update_scale (est[n], theta[0], 0);
 226       innovations_update_coeff (theta, est[n], max_lag);
 227       /* Copy the final row of coefficients into EST->COEFF.*/
 228       for (i = 0; i < max_lag; i++)
 229         {
 230           /*
 231             The order of storage here means that the best predicted value
 232             for the time series is computed as follows:
 233
 234             Let X[m], X[m-1],... denote the original series.
 235             Let X_hat[0] denote the best predicted value of X[0],
 236             X_hat[1] denote the projection of X[1] onto the subspace
 237             spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
 238             projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
 239             X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
 240
 241             Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
 242                           + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
 243                           ...
 244                           + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
 245            */
 246           pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 247         }
 248     }
 249
 250   for (i = 0; i < max_lag; i++)
 251     {
 252       free (theta[i]);
 253     }
 254   free (theta);
 255 }
 256
 257 static void
 258 innovations_struct_init (struct innovations_estimate *est,
 259                          const struct design_matrix *dm,
 260                          size_t lag)
 261 {
 262   size_t j;
 263
 264   est->mean = 0.0;
 265   /* COV[0] stores the lag 0 covariance (i.e., the variance), COV[1]
 266      holds the lag-1 covariance, etc.
 267    */
 268   est->cov = xnmalloc (lag + 1, sizeof (*est->cov));
 269   est->scale = xnmalloc (lag + 1, sizeof (*est->scale));
 270   est->coeff = xnmalloc (lag, sizeof (*est->coeff)); /* No intercept. */
 271
 272   /*
 273     The loop below is an unusual use of PSPP_COEFF_INIT(). In a
 274     typical model, one column of a DESIGN_MATRIX has one
 275     coefficient. But in a time-series model, one column has many
 276     coefficients.
 277    */
 278   for (j = 0; j < lag; j++)
 279     {
 280       pspp_coeff_init (est->coeff + j, dm);
 281     }
 282   est->max_lag = (double) lag;
 283 }
 284 /*
 285   The mean is subtracted from the original data before computing the
 286   coefficients. The mean is NOT added back, so if you want to predict
 287   a new value, you must add the mean to X_hat[m] to get the correct
 288   value.
 289  */
 290 static void
 291 subtract_mean (gsl_matrix *m, struct innovations_estimate **est)
 292 {
 293   size_t i;
 294   size_t j;
 295   double tmp;
 296
 297   for (i = 0; i < m->size1; i++)
 298     {
 299       for (j = 0; j < m->size2; j++)
 300         {
 301           tmp = gsl_matrix_get (m, i, j) - est[j]->mean;
 302           gsl_matrix_set (m, i, j, tmp);
 303         }
 304     }
 305 }
 306 struct innovations_estimate **
 307 pspp_innovations (const struct design_matrix *dm, size_t lag)
 308 {
 309   struct innovations_estimate **est;
 310   size_t i;
 311
 312   est = xnmalloc (dm->m->size2, sizeof *est);
 313   for (i = 0; i < dm->m->size2; i++)
 314     {
 315       est[i] = xmalloc (sizeof *est[i]);
 316 /*       est[i]->variable = vars[i]; */
 317       innovations_struct_init (est[i], dm, lag);
 318     }
 319
 320   get_mean (dm->m, est);
 321   subtract_mean (dm->m, est);
 322   get_covariance (dm->m, est, lag);
 323   get_coef (dm->m, est, lag);
 324
 325   return est;
 326 }
 327
 328 static void
 329 pspp_innovations_free_one (struct innovations_estimate *est)
 330 {
 331   size_t i;
 332
 333   assert (est != NULL);
 334   for (i = 0; i < (size_t) est->max_lag; i++)
 335     {
 336       pspp_coeff_free (est->coeff[i]);
 337     }
 338   free (est->scale);
 339   free (est->cov);
 340   free (est);
 341 }
 342
 343 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
 344 {
 345   size_t i;
 346
 347   assert (est != NULL);
 348   for (i = 0; i < n; i++)
 349     {
 350       pspp_innovations_free_one (est[i]);
 351     }
 352   free (est);
 353 }