src/math/ts/innovations.c

   1 /*
   2   src/math/ts/innovations.c
   3
   4   Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover.
   5
   6   This program is free software; you can redistribute it and/or modify it under
   7   the terms of the GNU General Public License as published by the Free
   8   Software Foundation; either version 2 of the License, or (at your option)
   9   any later version.
  10
  11   This program is distributed in the hope that it will be useful, but WITHOUT
  12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14   more details.
  15
  16   You should have received a copy of the GNU General Public License along with
  17   this program; if not, write to the Free Software Foundation, Inc., 51
  18   Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
  19  */
  20 /*
  21   Find preliminary ARMA coefficients via the innovations algorithm.
  22   Also compute the sample mean and covariance matrix for each series.
  23
  24   Reference:
  25
  26   P. J. Brockwell and R. A. Davis. Time Series: Theory and
  27   Methods. Second edition. Springer. New York. 1991. ISBN
  28   0-387-97429-6. Sections 5.2, 8.3 and 8.4.
  29  */
  30
  31 #include <gsl/gsl_matrix.h>
  32 #include <gsl/gsl_vector.h>
  33 #include <gsl/gsl_math.h>
  34 #include <stdlib.h>
  35 #include <libpspp/alloc.h>
  36 #include <libpspp/compiler.h>
  37 #include <math/coefficient.h>
  38 #include <math/ts/innovations.h>
  39
  40 static void
  41 get_mean_variance (const gsl_matrix *data,
  42                    struct innovations_estimate **est)
  43
  44 {
  45   size_t n;
  46   size_t i;
  47   double d;
  48   double tmp;
  49
  50   for (n = 0; n < data->size2; n++)
  51     {
  52       est[n]->n_obs = 2.0;
  53       est[n]->mean = 0.0;
  54       est[n]->variance = 0.0;
  55     }
  56   for (i = 0; i < data->size1; i++)
  57     {
  58       for (n = 0; n < data->size2; n++)
  59         {
  60           tmp = gsl_matrix_get (data, i, n);
  61           if (!gsl_isnan (tmp))
  62             {
  63               d = (tmp - est[n]->mean) / est[n]->n_obs;
  64               est[n]->mean += d;
  65               est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
  66               est[n]->n_obs += 1.0;
  67             }
  68         }
  69     }
  70   for (n = 0; n < data->size2; n++)
  71     {
  72       /* Maximum likelihood estimate of the variance. */
  73       est[n]->variance /= est[n]->n_obs;
  74     }
  75 }
  76
  77 static int
  78 get_covariance (const gsl_matrix *data,
  79                 struct innovations_estimate **est, size_t max_lag)
  80 {
  81   size_t lag;
  82   size_t j;
  83   size_t i;
  84   double x;
  85   double y;
  86   int rc = 1;
  87
  88   assert (data != NULL);
  89   assert (est != NULL);
  90
  91   for (i = 0; i < data->size1; i++)
  92     {
  93       for (j = 0; j < data->size2; j++)
  94         {
  95           x = gsl_matrix_get (data, i, j);
  96
  97           if (!gsl_isnan (x))
  98             {
  99               x -= est[j]->mean;
 100               for (lag = 1; lag <= max_lag && lag < (data->size1 - i); lag++)
 101                 {
 102                   y = gsl_matrix_get (data, i + lag, j);
 103                   if (!gsl_isnan (y))
 104                     {
 105                       y -= est[j]->mean;
 106                       *(est[j]->cov + lag - 1) += y * x;
 107                       est[j]->n_obs += 1.0;
 108                     }
 109                 }
 110             }
 111         }
 112     }
 113   for (lag = 1; lag <= max_lag; lag++)
 114     {
 115       for (j = 0; j < data->size2; j++)
 116         {
 117           *(est[j]->cov + lag) /= (est[j]->n_obs - lag);
 118         }
 119     }
 120   return rc;
 121 }
 122 static double
 123 innovations_convolve (double **theta, struct innovations_estimate *est,
 124                       int i, int j)
 125 {
 126   int k;
 127   double result = 0.0;
 128
 129   for (k = 0; k < j; k++)
 130     {
 131       result += theta[i-1][i-k-1] * theta[j][j-k-1] * est->scale[k];
 132     }
 133   return result;
 134 }
 135 static void
 136 innovations_update_scale (struct innovations_estimate *est, double *theta,
 137                           size_t i)
 138 {
 139   double result = 0.0;
 140   size_t j;
 141   size_t k;
 142
 143
 144   result = est->variance;
 145   for (j = 0; j < i; j++)
 146     {
 147       k = i - j - 1;
 148       result -= theta[k] * theta[k] * est->scale[j];
 149     }
 150   est->scale[i] = result;
 151 }
 152 static void
 153 init_theta (double **theta, size_t max_lag)
 154 {
 155   size_t i;
 156   size_t j;
 157
 158   for (i = 0; i < max_lag; i++)
 159     {
 160       for (j = 0; j <= i; j++)
 161         {
 162           theta[i][j] = 0.0;
 163         }
 164     }
 165 }
 166 static void
 167 innovations_update_coeff (double **theta, struct innovations_estimate *est,
 168                           size_t max_lag)
 169 {
 170   size_t i;
 171   size_t j;
 172   size_t k;
 173   double v;
 174
 175   for (i = 0; i < max_lag; i++)
 176     {
 177       for (j = 0; j <= i; j++)
 178         {
 179           k = i - j;
 180           theta[i][k] = (est->cov[k] -
 181             innovations_convolve (theta, est, i, j))
 182             / est->scale[k];
 183         }
 184       innovations_update_scale (est, theta[i], i + 1);
 185     }
 186 }
 187 static void
 188 get_coef (const gsl_matrix *data,
 189           struct innovations_estimate **est, size_t max_lag)
 190 {
 191   size_t i;
 192   size_t n;
 193   double **theta;
 194
 195   theta = xnmalloc (max_lag, sizeof (*theta));
 196   for (i = 0; i < max_lag; i++)
 197     {
 198       theta[i] = xnmalloc (i + 1, sizeof (**(theta + i)));
 199     }
 200
 201   for (n = 0; n < data->size2; n++)
 202     {
 203       init_theta (theta, max_lag);
 204       innovations_update_scale (est[n], theta[0], 0);
 205       innovations_update_coeff (theta, est[n], max_lag);
 206       /* Copy the final row of coefficients into EST->COEFF.*/
 207       for (i = 0; i < max_lag; i++)
 208         {
 209           /*
 210             The order of storage here means that the best predicted value
 211             for the time series is computed as follows:
 212
 213             Let X[m], X[m-1],... denote the original series.
 214             Let X_hat[0] denote the best predicted value of X[0],
 215             X_hat[1] denote the projection of X[1] onto the subspace
 216             spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
 217             projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
 218             X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
 219
 220             Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
 221                           + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
 222                           ...
 223                           + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
 224
 225             (That is what X_hat[m] SHOULD be, anyway. These routines need
 226             to be tested.)
 227            */
 228           pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 229         }
 230     }
 231
 232   for (i = 0; i < max_lag; i++)
 233     {
 234       free (theta[i]);
 235     }
 236   free (theta);
 237 }
 238
 239 struct innovations_estimate **
 240 pspp_innovations (const gsl_matrix *data, size_t lag)
 241 {
 242   struct innovations_estimate **est;
 243   size_t i;
 244   size_t j;
 245
 246   est = xnmalloc (data->size2, sizeof *est);
 247   for (i = 0; i < data->size2; i++)
 248     {
 249       est[i] = xmalloc (sizeof *est[i]);
 250 /*       est[i]->variable = vars[i]; */
 251       est[i]->mean = 0.0;
 252       est[i]->variance = 0.0;
 253       /* COV does not the variance (i.e., the lag 0 covariance). So COV[0]
 254          holds the lag 1 covariance, COV[i] holds the lag i+1 covariance. */
 255       est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
 256       est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
 257       est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
 258       est[i]->max_lag = (double) lag;
 259       for (j = 0; j < lag; j++)
 260         {
 261           est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j)));
 262         }
 263     }
 264
 265   get_mean_variance (data, est);
 266   get_covariance (data, est, lag);
 267   get_coef (data, est, lag);
 268
 269   return est;
 270 }
 271
 272 static void
 273 pspp_innovations_free_one (struct innovations_estimate *est)
 274 {
 275   size_t i;
 276
 277   assert (est != NULL);
 278   free (est->cov);
 279   free (est->scale);
 280   for (i = 0; i < (size_t) est->max_lag; i++)
 281     {
 282       pspp_coeff_free (est->coeff[i]);
 283     }
 284 }
 285
 286 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
 287 {
 288   size_t i;
 289
 290   assert (est != NULL);
 291   for (i = 0; i < n; i++)
 292     {
 293       pspp_innovations_free_one (est[i]);
 294     }
 295   free (est);
 296 }