src/math/ts/innovations.c

   1 /*
   2   src/math/ts/innovations.c
   3
   4   Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover.
   5
   6   This program is free software; you can redistribute it and/or modify it under
   7   the terms of the GNU General Public License as published by the Free
   8   Software Foundation; either version 2 of the License, or (at your option)
   9   any later version.
  10
  11   This program is distributed in the hope that it will be useful, but WITHOUT
  12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14   more details.
  15
  16   You should have received a copy of the GNU General Public License along with
  17   this program; if not, write to the Free Software Foundation, Inc., 51
  18   Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
  19  */
  20 /*
  21   Find preliminary ARMA coefficients via the innovations algorithm.
  22   Also compute the sample mean and covariance matrix for each series.
  23
  24   Reference:
  25
  26   P. J. Brockwell and R. A. Davis. Time Series: Theory and
  27   Methods. Second edition. Springer. New York. 1991. ISBN
  28   0-387-97429-6. Sections 5.2, 8.3 and 8.4.
  29  */
  30
  31 #include <gsl/gsl_matrix.h>
  32 #include <gsl/gsl_vector.h>
  33 #include <math.h>
  34 #include <stdlib.h>
  35 #include <data/case.h>
  36 #include <data/casefile.h>
  37 #include <libpspp/alloc.h>
  38 #include <libpspp/compiler.h>
  39 #include <libpspp/message.h>
  40 #include <math/coefficient.h>
  41 #include <math/ts/innovations.h>
  42
  43 static void
  44 get_mean_variance (size_t n_vars, const struct casefile *cf,
  45                    struct innovations_estimate **est)
  46
  47 {
  48   struct casereader *r;
  49   struct ccase c;
  50   size_t n;
  51   double d;
  52   const union value *tmp;
  53
  54   for (n = 0; n < n_vars; n++)
  55     {
  56       est[n]->n_obs = 2.0;
  57       est[n]->mean = 0.0;
  58       est[n]->variance = 0.0;
  59     }
  60   for (r = casefile_get_reader (cf); casereader_read (r, &c);
  61        case_destroy (&c))
  62     {
  63       for (n = 0; n < n_vars; n++)
  64         {
  65           tmp = case_data (&c, est[n]->variable->fv);
  66           if (!mv_is_value_missing (&(est[n]->variable->miss), tmp))
  67             {
  68               d = (tmp->f - est[n]->mean) / est[n]->n_obs;
  69               est[n]->mean += d;
  70               est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
  71               est[n]->n_obs += 1.0;
  72             }
  73         }
  74     }
  75   for (n = 0; n < n_vars; n++)
  76     {
  77       /* Maximum likelihood estimate of the variance. */
  78       est[n]->variance /= est[n]->n_obs;
  79     }
  80 }
  81
  82 /*
  83   Read the first MAX_LAG cases.
  84  */
  85 static bool
  86 innovations_init_cases (struct casereader *r, struct ccase **inn_cs, size_t max_lag)
  87 {
  88   bool value = true;
  89   size_t lag = 0;
  90
  91   assert (r != NULL);
  92   assert (inn_cs != NULL);
  93   while (value && lag < max_lag)
  94     {
  95       assert (inn_cs[lag] != NULL);
  96       value = casereader_read (r, inn_cs[lag]);
  97       lag++;
  98     }
  99   return value;
 100 }
 101
 102 /*
 103   Read one case and update C, which contains the last MAX_LAG cases.
 104  */
 105 static bool
 106 innovations_update_cases (struct casereader *r, struct ccase **c, size_t max_lag)
 107 {
 108   size_t lag;
 109   bool value = false;
 110
 111   for (lag = 0; lag < max_lag - 1; lag++)
 112     {
 113       c[lag] = c[lag+1];
 114     }
 115   value = casereader_read (r, c[lag]);
 116   return value;
 117 }
 118 static void
 119 get_covariance (size_t n_vars, const struct casefile *cf,
 120                 struct innovations_estimate **est, size_t max_lag)
 121 {
 122   struct casereader *r;
 123   struct ccase **inn_c;
 124   size_t lag;
 125   size_t n;
 126   bool read_case = false;
 127   double d;
 128   double x;
 129   const union value *tmp;
 130   const union value *tmp2;
 131
 132   inn_c = xnmalloc (max_lag, sizeof (*inn_c));
 133
 134   for (lag = 0; lag < max_lag; lag++)
 135     {
 136       inn_c[lag] = xmalloc (sizeof *inn_c[lag]);
 137     }
 138
 139   r = casefile_get_reader (cf);
 140   read_case = innovations_init_cases (r, inn_c, max_lag);
 141
 142   while (read_case)
 143     {
 144       for (n = 0; n < n_vars; n++)
 145         {
 146           tmp2 = case_data (inn_c[0], est[n]->variable->fv);
 147           if (!mv_is_value_missing (&est[n]->variable->miss, tmp2))
 148             {
 149               x = tmp2->f - est[n]->mean;
 150               for (lag = 1; lag <= max_lag; lag++)
 151                 {
 152                   tmp = case_data (inn_c[lag], est[n]->variable->fv);
 153                   if (!mv_is_value_missing (&est[n]->variable->miss, tmp))
 154                     {
 155                       d = (tmp->f - est[n]->mean);
 156                       *(est[n]->cov + lag) += d * x;
 157                     }
 158                 }
 159             }
 160         }
 161       read_case = innovations_update_cases (r, inn_c, max_lag);
 162     }
 163   for (lag = 0; lag <= max_lag; lag++)
 164     {
 165       for (n = 0; n < n_vars; n++)
 166         {
 167           *(est[n]->cov + lag) /= (est[n]->n_obs - lag);
 168         }
 169     }
 170   for (lag = 0; lag < max_lag; lag++)
 171     {
 172       free (inn_c[lag]);
 173     }
 174   free (inn_c);
 175 }
 176 static double
 177 innovations_convolve (double **theta, struct innovations_estimate *est,
 178                       int i, int j)
 179 {
 180   int k;
 181   double result = 0.0;
 182
 183   for (k = 0; k < i; k++)
 184     {
 185       result += theta[i-1][i-k-1] * theta[j-1][j-k-1] * est->scale[k];
 186     }
 187   return result;
 188 }
 189 static void
 190 innovations_update_scale (struct innovations_estimate *est, double *theta,
 191                           size_t i)
 192 {
 193   double result = 0.0;
 194   size_t j;
 195   size_t k;
 196
 197
 198   result = est->cov[0];
 199   for (j = 0; j < i; j++)
 200     {
 201       k = i - j;
 202       result -= theta[k] * theta[k] * est->scale[j];
 203     }
 204   est->scale[i] = result;
 205 }
 206
 207 static void
 208 get_coef (size_t n_vars, const struct casefile *cf,
 209                 struct innovations_estimate **est, size_t max_lag)
 210 {
 211   size_t j;
 212   size_t i;
 213   size_t k;
 214   size_t n;
 215   double v;
 216   double **theta;
 217
 218   theta = xnmalloc (max_lag, sizeof (*theta));
 219   for (i = 0; i < max_lag; i++)
 220     {
 221       theta[i] = xnmalloc (i+1, sizeof (theta[i]));
 222
 223     }
 224   for (n = 0; n < n_vars; n++)
 225     {
 226       for (i = 0; i < max_lag; i++)
 227         {
 228           for (j = 0; j < i; j++)
 229             {
 230               theta[i][j] = 0.0;
 231             }
 232         }
 233       innovations_update_scale (est[n], theta[0], 0);
 234       for (i = 0; i < max_lag; i++)
 235         {
 236           v = est[n]->cov[i];
 237           for (j = 0; j < i; j++)
 238             {
 239               k = i - j;
 240               theta[i-1][k-1] = est[n]->cov[k] -
 241                 innovations_convolve (theta, est[n], i, j);
 242             }
 243           innovations_update_scale (est[n], theta[i], i);
 244         }
 245       /* Copy the final row of coefficients into EST->COEFF.*/
 246       for (i = 0; i < max_lag; i++)
 247         {
 248           /*
 249             The order of storage here means that the best predicted value
 250             for the time series is computed as follows:
 251
 252             Let X[m], X[m-1],... denote the original series.
 253             Let X_hat[0] denote the best predicted value of X[0],
 254             X_hat[1] denote the projection of X[1] onto the subspace
 255             spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
 256             projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
 257             X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
 258
 259             Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
 260                           + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
 261                           ...
 262                           + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
 263
 264             (That is what X_hat[m] SHOULD be, anyway. These routines need
 265             to be tested.)
 266            */
 267           pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
 268         }
 269     }
 270   for (i = 0; i < max_lag; i++)
 271     {
 272       free (theta[i]);
 273     }
 274   free (theta);
 275 }
 276
 277 struct innovations_estimate **
 278 pspp_innovations (const struct variable **vars,
 279                   size_t n_vars,
 280                   size_t lag,
 281                   const struct casefile *cf)
 282 {
 283   struct innovations_estimate **est;
 284   size_t i;
 285   size_t j;
 286
 287   est = xnmalloc (n_vars, sizeof *est);
 288   for (i = 0; i < n_vars; i++)
 289     {
 290       if (vars[i]->type == NUMERIC)
 291         {
 292           est[i] = xmalloc (sizeof **est);
 293           est[i]->variable = vars[i];
 294           est[i]->mean = 0.0;
 295           est[i]->variance = 0.0;
 296           est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
 297           est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
 298           est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
 299           for (j = 0; j < lag; j++)
 300             {
 301               est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j)));
 302             }
 303         }
 304       else
 305         {
 306           n_vars--;
 307 /*        msg (MW, _("Cannot compute autocovariance for a non-numeric variable %s"), */
 308 /*                   var_to_string (vars[i])); */
 309         }
 310     }
 311
 312   get_mean_variance (n_vars, cf, est);
 313   get_covariance (n_vars, cf, est, lag);
 314   get_coef (n_vars, cf, est, lag);
 315
 316   return est;
 317 }