src/math/ts/innovations.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2006 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 /*
  18   Find preliminary ARMA coefficients via the innovations algorithm.
  19   Also compute the sample mean and covariance matrix for each series.
  20
  21   Reference:
  22
  23   P. J. Brockwell and R. A. Davis. Time Series: Theory and
  24   Methods. Second edition. Springer. New York. 1991. ISBN
  25   0-387-97429-6. Sections 5.2, 8.3 and 8.4.
  26  */
  27
  28 #include <config.h>
  29 #include <gsl/gsl_matrix.h>
  30 #include <gsl/gsl_vector.h>
  31 #include <stdlib.h>
  32 #include <libpspp/compiler.h>
  33 #include <math/coefficient.h>
  34 #include <math/ts/innovations.h>
  35
  36 #include "xalloc.h"
  37
  38 static void
  39 get_mean (const gsl_matrix *data,
  40           struct innovations_estimate **est)
  41
  42 {
  43   size_t n;
  44   size_t i;
  45   double d;
  46   double tmp;
  47
  48   for (n = 0; n < data->size2; n++)
  49     {
  50       est[n]->n_obs = 0.0;
  51       est[n]->mean = 0.0;
  52     }
  53   for (i = 0; i < data->size1; i++)
  54     {
  55       for (n = 0; n < data->size2; n++)
  56         {
  57           tmp = gsl_matrix_get (data, i, n);
  58           if (!isnan (tmp))
  59             {
  60               est[n]->n_obs += 1.0;
  61               d = (tmp - est[n]->mean) / est[n]->n_obs;
  62               est[n]->mean += d;
  63             }
  64         }
  65     }
  66 }
  67 static void
  68 update_cov (struct innovations_estimate **est, gsl_vector_const_view x,
  69             gsl_vector_const_view y, size_t lag)
  70 {
  71   size_t j;
  72   double xj;
  73   double yj;
  74
  75   for (j = 0; j < x.vector.size; j++)
  76     {
  77       xj = gsl_vector_get (&x.vector, j);
  78       yj = gsl_vector_get (&y.vector, j);
  79       if (!isnan (xj))
  80         {
  81           if (!isnan (yj))
  82             {
  83               xj -= est[j]->mean;
  84               yj -= est[j]->mean;
  85               *(est[j]->cov + lag) += xj * yj;
  86             }
  87         }
  88     }
  89 }
  90 static int
  91 get_covariance (const gsl_matrix *data,
  92                 struct innovations_estimate **est, size_t max_lag)
  93 {
  94   size_t lag;
  95   size_t j;
  96   size_t i;
  97   int rc = 1;
  98
  99   assert (data != NULL);
 100   assert (est != NULL);
 101
 102   for (j = 0; j < data->size2; j++)
 103     {
 104       for (lag = 0; lag <= max_lag; lag++)
 105         {
 106           *(est[j]->cov + lag) = 0.0;
 107         }
 108     }
 109   /*
 110     The rows are in the outer loop because a gsl_matrix is stored in
 111     row-major order.
 112    */
 113   for (i = 0; i < data->size1; i++)
 114     {
 115       for (lag = 0; lag <= max_lag && lag < data->size1 - i; lag++)
 116         {
 117           update_cov (est, gsl_matrix_const_row (data, i),
 118                       gsl_matrix_const_row (data, i + lag), lag);
 119         }
 120     }
 121   for (j = 0; j < data->size2; j++)
 122     {
 123       for (lag = 0; lag <= max_lag; lag++)
 124         {
 125           *(est[j]->cov + lag) /= est[j]->n_obs;
 126         }
 127     }
 128
 129   return rc;
 130 }
 131
 132 static double
 133 innovations_convolve (double *x, double *y, struct innovations_estimate *est,
 134                       int i)
 135 {
 136   int k;
 137   double result = 0.0;
 138
 139   assert (x != NULL && y != NULL);
 140   assert (est != NULL);
 141   assert (est->scale != NULL);
 142   assert (i > 0);
 143   for (k = 0; k < i; k++)
 144     {
 145       result += x[k] * y[k] * est->scale[i-k-1];
 146     }
 147   return result;
 148 }
 149 static void
 150 innovations_update_scale (struct innovations_estimate *est, double *theta,
 151                           size_t i)
 152 {
 153   double result = 0.0;
 154   size_t j;
 155   size_t k;
 156
 157   if (i < (size_t) est->max_lag)
 158     {
 159       result = est->cov[0];
 160       for (j = 0; j < i; j++)
 161         {
 162           k = i - j - 1;
 163           result -= pow2 (theta[k]) * est->scale[j];
 164         }
 165       est->scale[i] = result;
 166     }
 167 }
 168 static void
 169 init_theta (double **theta, size_t max_lag)
 170 {
 171   size_t i;
 172   size_t j;
 173
 174   for (i = 0; i < max_lag; i++)
 175     {
 176       for (j = 0; j <= i; j++)
 177         {
 178           theta[i][j] = 0.0;
 179         }
 180     }
 181 }
 182 static void
 183 innovations_update_coeff (double **theta, struct innovations_estimate *est,
 184                           size_t max_lag)
 185 {
 186   size_t i;
 187   size_t j;
 188   size_t k;
 189
 190   for (i = 0; i < max_lag; i++)
 191     {
 192       theta[i][i] = est->cov[i+1] / est->scale[0];
 193       for (j = 1; j <= i; j++)
 194         {
 195           k = i - j;
 196           theta[i][k] = (est->cov[k+1] -
 197                          innovations_convolve (theta[i] + k + 1, theta[j - 1], est, j))
 198             / est->scale[j];
 199         }
 200       innovations_update_scale (est, theta[i], i + 1);
 201     }
 202 }
 203 static void
 204 get_coef (const gsl_matrix *data,
 205           struct innovations_estimate **est, size_t max_lag)
 206 {
 207   size_t i;
 208   size_t n;
 209   double **theta;
 210
 211   theta = xnmalloc (max_lag, sizeof (*theta));
 212   for (i = 0; i < max_lag; i++)
 213     {
 214       theta[i] = xnmalloc (max_lag, sizeof (**(theta + i)));
 215     }
 216
 217   for (n = 0; n < data->size2; n++)
 218     {
 219       init_theta (theta, max_lag);
 220       innovations_update_scale (est[n], theta[0], 0);
 221       innovations_update_coeff (theta, est[n], max_lag);
 222       /* Copy the final row of coefficients into EST->COEFF.*/
 223       for (i = 0; i < max_lag; i++)
 224         {
 225           /*
 226             The order of storage here means that the best predicted value
 227             for the time series is computed as follows:
 228
 229             Let X[m], X[m-1],... denote the original series.
 230             Let X_hat[0] denote the best predicted value of X[0],
 231             X_hat[1] denote the projection of X[1] onto the subspace
 232             spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
 233             projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
 234             X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
 235
 236             Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
 237                           + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
 238                           ...
 239                           + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
 240            */
 241           pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 242         }
 243     }
 244
 245   for (i = 0; i < max_lag; i++)
 246     {
 247       free (theta[i]);
 248     }
 249   free (theta);
 250 }
 251
 252 static void
 253 innovations_struct_init (struct innovations_estimate *est,
 254                          const struct design_matrix *dm,
 255                          size_t lag)
 256 {
 257   size_t j;
 258
 259   est->mean = 0.0;
 260   /* COV[0] stores the lag 0 covariance (i.e., the variance), COV[1]
 261      holds the lag-1 covariance, etc.
 262    */
 263   est->cov = xnmalloc (lag + 1, sizeof (*est->cov));
 264   est->scale = xnmalloc (lag + 1, sizeof (*est->scale));
 265   est->coeff = xnmalloc (lag, sizeof (*est->coeff)); /* No intercept. */
 266
 267   /*
 268     The loop below is an unusual use of PSPP_COEFF_INIT(). In a
 269     typical model, one column of a DESIGN_MATRIX has one
 270     coefficient. But in a time-series model, one column has many
 271     coefficients.
 272    */
 273   for (j = 0; j < lag; j++)
 274     {
 275       pspp_coeff_init (est->coeff + j, dm);
 276     }
 277   est->max_lag = (double) lag;
 278 }
 279 /*
 280   The mean is subtracted from the original data before computing the
 281   coefficients. The mean is NOT added back, so if you want to predict
 282   a new value, you must add the mean to X_hat[m] to get the correct
 283   value.
 284  */
 285 static void
 286 subtract_mean (gsl_matrix *m, struct innovations_estimate **est)
 287 {
 288   size_t i;
 289   size_t j;
 290   double tmp;
 291
 292   for (i = 0; i < m->size1; i++)
 293     {
 294       for (j = 0; j < m->size2; j++)
 295         {
 296           tmp = gsl_matrix_get (m, i, j) - est[j]->mean;
 297           gsl_matrix_set (m, i, j, tmp);
 298         }
 299     }
 300 }
 301 struct innovations_estimate **
 302 pspp_innovations (const struct design_matrix *dm, size_t lag)
 303 {
 304   struct innovations_estimate **est;
 305   size_t i;
 306
 307   est = xnmalloc (dm->m->size2, sizeof *est);
 308   for (i = 0; i < dm->m->size2; i++)
 309     {
 310       est[i] = xmalloc (sizeof *est[i]);
 311 /*       est[i]->variable = vars[i]; */
 312       innovations_struct_init (est[i], dm, lag);
 313     }
 314
 315   get_mean (dm->m, est);
 316   subtract_mean (dm->m, est);
 317   get_covariance (dm->m, est, lag);
 318   get_coef (dm->m, est, lag);
 319
 320   return est;
 321 }
 322
 323 static void
 324 pspp_innovations_free_one (struct innovations_estimate *est)
 325 {
 326   size_t i;
 327
 328   assert (est != NULL);
 329   for (i = 0; i < (size_t) est->max_lag; i++)
 330     {
 331       pspp_coeff_free (est->coeff[i]);
 332     }
 333   free (est->scale);
 334   free (est->cov);
 335   free (est);
 336 }
 337
 338 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
 339 {
 340   size_t i;
 341
 342   assert (est != NULL);
 343   for (i = 0; i < n; i++)
 344     {
 345       pspp_innovations_free_one (est[i]);
 346     }
 347   free (est);
 348 }