pintos-os.org Git - pspp/blob - src/math/ts/innovations.c

   1 /*
   2   src/math/ts/innovations.c
   3
   4   Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover.
   5
   6   This program is free software; you can redistribute it and/or modify it under
   7   the terms of the GNU General Public License as published by the Free
   8   Software Foundation; either version 2 of the License, or (at your option)
   9   any later version.
  10
  11   This program is distributed in the hope that it will be useful, but WITHOUT
  12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14   more details.
  15
  16   You should have received a copy of the GNU General Public License along with
  17   this program; if not, write to the Free Software Foundation, Inc., 51
  18   Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
  19  */
  20 /*
  21   Find preliminary ARMA coefficients via the innovations algorithm.
  22   Also compute the sample mean and covariance matrix for each series.
  23
  24   Reference:
  25
  26   P. J. Brockwell and R. A. Davis. Time Series: Theory and
  27   Methods. Second edition. Springer. New York. 1991. ISBN
  28   0-387-97429-6. Sections 5.2, 8.3 and 8.4.
  29  */
  30
  31 #include <gsl/gsl_matrix.h>
  32 #include <gsl/gsl_vector.h>
  33 #include <gsl/gsl_math.h>
  34 #include <stdlib.h>
  35 #include <libpspp/alloc.h>
  36 #include <libpspp/compiler.h>
  37 #include <math/coefficient.h>
  38 #include <math/ts/innovations.h>
  39
  40 static void
  41 get_mean_variance (const gsl_matrix *data,
  42                    struct innovations_estimate **est)
  43
  44 {
  45   size_t n;
  46   size_t i;
  47   double d;
  48   double tmp;
  49
  50   for (n = 0; n < data->size2; n++)
  51     {
  52       est[n]->n_obs = 2.0;
  53       est[n]->mean = 0.0;
  54       est[n]->variance = 0.0;
  55     }
  56   for (i = 0; i < data->size1; i++)
  57     {
  58       for (n = 0; n < data->size2; n++)
  59         {
  60           tmp = gsl_matrix_get (data, i, n);
  61           if (!gsl_isnan (tmp))
  62             {
  63               d = (tmp - est[n]->mean) / est[n]->n_obs;
  64               est[n]->mean += d;
  65               est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
  66               est[n]->n_obs += 1.0;
  67             }
  68         }
  69     }
  70   for (n = 0; n < data->size2; n++)
  71     {
  72       /* Maximum likelihood estimate of the variance. */
  73       est[n]->variance /= est[n]->n_obs;
  74     }
  75 }
  76
  77 static int
  78 get_covariance (const gsl_matrix *data,
  79                 struct innovations_estimate **est, size_t max_lag)
  80 {
  81   size_t lag;
  82   size_t j;
  83   size_t i;
  84   double x;
  85   double y;
  86   int rc = 1;
  87
  88   assert (data != NULL);
  89   assert (est != NULL);
  90
  91   for (i = 0; i < data->size1; i++)
  92     {
  93       for (j = 0; j < data->size2; j++)
  94         {
  95           x = gsl_matrix_get (data, i, j);
  96
  97           if (!gsl_isnan (x))
  98             {
  99               x -= est[j]->mean;
 100               for (lag = 1; lag <= max_lag && lag < (data->size1 - i); lag++)
 101                 {
 102                   y = gsl_matrix_get (data, i + lag, j);
 103                   if (!gsl_isnan (y))
 104                     {
 105                       y -= est[j]->mean;
 106                       *(est[j]->cov + lag - 1) += y * x;
 107                       est[j]->n_obs += 1.0;
 108                     }
 109                 }
 110             }
 111         }
 112     }
 113   for (lag = 1; lag <= max_lag; lag++)
 114     {
 115       for (j = 0; j < data->size2; j++)
 116         {
 117           *(est[j]->cov + lag) /= (est[j]->n_obs - lag);
 118         }
 119     }
 120   return rc;
 121 }
 122 static double
 123 innovations_convolve (double **theta, struct innovations_estimate *est,
 124                       int i, int j)
 125 {
 126   int k;
 127   double result = 0.0;
 128
 129   for (k = 0; k < j; k++)
 130     {
 131       result += theta[i-1][i-k-1] * theta[j][j-k-1] * est->scale[k];
 132     }
 133   return result;
 134 }
 135 static void
 136 innovations_update_scale (struct innovations_estimate *est, double *theta,
 137                           size_t i)
 138 {
 139   double result = 0.0;
 140   size_t j;
 141   size_t k;
 142
 143
 144   result = est->variance;
 145   for (j = 0; j < i; j++)
 146     {
 147       k = i - j - 1;
 148       result -= theta[k] * theta[k] * est->scale[j];
 149     }
 150   est->scale[i] = result;
 151 }
 152 static void
 153 init_theta (double **theta, size_t max_lag)
 154 {
 155   size_t i;
 156   size_t j;
 157
 158   for (i = 0; i < max_lag; i++)
 159     {
 160       for (j = 0; j <= i; j++)
 161         {
 162           theta[i][j] = 0.0;
 163         }
 164     }
 165 }
 166 static void
 167 innovations_update_coeff (double **theta, struct innovations_estimate *est,
 168                           size_t max_lag)
 169 {
 170   size_t i;
 171   size_t j;
 172   size_t k;
 173
 174   for (i = 0; i < max_lag; i++)
 175     {
 176       for (j = 0; j <= i; j++)
 177         {
 178           k = i - j;
 179           theta[i][k] = (est->cov[k] -
 180             innovations_convolve (theta, est, i, j))
 181             / est->scale[k];
 182         }
 183       innovations_update_scale (est, theta[i], i + 1);
 184     }
 185 }
 186 static void
 187 get_coef (const gsl_matrix *data,
 188           struct innovations_estimate **est, size_t max_lag)
 189 {
 190   size_t i;
 191   size_t n;
 192   double **theta;
 193
 194   theta = xnmalloc (max_lag, sizeof (*theta));
 195   for (i = 0; i < max_lag; i++)
 196     {
 197       theta[i] = xnmalloc (max_lag, sizeof (**(theta + i)));
 198     }
 199
 200   for (n = 0; n < data->size2; n++)
 201     {
 202       init_theta (theta, max_lag);
 203       innovations_update_scale (est[n], theta[0], 0);
 204       innovations_update_coeff (theta, est[n], max_lag);
 205       /* Copy the final row of coefficients into EST->COEFF.*/
 206       for (i = 0; i < max_lag; i++)
 207         {
 208           /*
 209             The order of storage here means that the best predicted value
 210             for the time series is computed as follows:
 211
 212             Let X[m], X[m-1],... denote the original series.
 213             Let X_hat[0] denote the best predicted value of X[0],
 214             X_hat[1] denote the projection of X[1] onto the subspace
 215             spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
 216             projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
 217             X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
 218
 219             Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
 220                           + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
 221                           ...
 222                           + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
 223
 224             (That is what X_hat[m] SHOULD be, anyway. These routines need
 225             to be tested.)
 226            */
 227           pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 228         }
 229     }
 230
 231   for (i = 0; i < max_lag; i++)
 232     {
 233       free (theta[i]);
 234     }
 235   free (theta);
 236 }
 237
 238 static void
 239 innovations_struct_init (struct innovations_estimate *est, size_t lag)
 240 {
 241   size_t j;
 242
 243   est->mean = 0.0;
 244   est->variance = 0.0;
 245   est->cov = xnmalloc (lag, sizeof (*est->cov));
 246   est->scale = xnmalloc (lag, sizeof (*est->scale));
 247   est->coeff = xnmalloc (lag, sizeof (*est->coeff));
 248   est->max_lag = (double) lag;
 249   /* COV does not the variance (i.e., the lag 0 covariance). So COV[0]
 250      holds the lag 1 covariance, COV[i] holds the lag i+1 covariance. */
 251   for (j = 0; j < lag; j++)
 252     {
 253       est->coeff[j] = xmalloc (sizeof (*(est->coeff[j])));
 254     }
 255 }
 256
 257 struct innovations_estimate **
 258 pspp_innovations (const gsl_matrix *data, size_t lag)
 259 {
 260   struct innovations_estimate **est;
 261   size_t i;
 262
 263   est = xnmalloc (data->size2, sizeof *est);
 264   for (i = 0; i < data->size2; i++)
 265     {
 266       est[i] = xmalloc (sizeof *est[i]);
 267 /*       est[i]->variable = vars[i]; */
 268       innovations_struct_init (est[i], lag);
 269     }
 270
 271   get_mean_variance (data, est);
 272   get_covariance (data, est, lag);
 273   get_coef (data, est, lag);
 274
 275   return est;
 276 }
 277
 278 static void
 279 pspp_innovations_free_one (struct innovations_estimate *est)
 280 {
 281   size_t i;
 282
 283   assert (est != NULL);
 284   free (est->cov);
 285   free (est->scale);
 286   for (i = 0; i < (size_t) est->max_lag; i++)
 287     {
 288       pspp_coeff_free (est->coeff[i]);
 289     }
 290 }
 291
 292 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
 293 {
 294   size_t i;
 295
 296   assert (est != NULL);
 297   for (i = 0; i < n; i++)
 298     {
 299       pspp_innovations_free_one (est[i]);
 300     }
 301   free (est);
 302 }