2 src/math/ts/innovations.c
4 Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover.
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2 of the License, or (at your option)
11 This program is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 You should have received a copy of the GNU General Public License along with
17 this program; if not, write to the Free Software Foundation, Inc., 51
18 Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
21 Find preliminary ARMA coefficients via the innovations algorithm.
22 Also compute the sample mean and covariance matrix for each series.
26 P. J. Brockwell and R. A. Davis. Time Series: Theory and
27 Methods. Second edition. Springer. New York. 1991. ISBN
28 0-387-97429-6. Sections 5.2, 8.3 and 8.4.
31 #include <gsl/gsl_matrix.h>
32 #include <gsl/gsl_vector.h>
33 #include <gsl/gsl_math.h>
35 #include <libpspp/alloc.h>
36 #include <libpspp/compiler.h>
37 #include <math/coefficient.h>
38 #include <math/ts/innovations.h>
41 get_mean_variance (const gsl_matrix *data,
42 struct innovations_estimate **est)
50 for (n = 0; n < data->size2; n++)
54 est[n]->variance = 0.0;
56 for (i = 0; i < data->size1; i++)
58 for (n = 0; n < data->size2; n++)
60 tmp = gsl_matrix_get (data, i, n);
63 d = (tmp - est[n]->mean) / est[n]->n_obs;
65 est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
70 for (n = 0; n < data->size2; n++)
72 /* Maximum likelihood estimate of the variance. */
73 est[n]->variance /= est[n]->n_obs;
78 get_covariance (const gsl_matrix *data,
79 struct innovations_estimate **est, size_t max_lag)
88 assert (data != NULL);
91 for (i = 0; i < data->size1; i++)
93 for (j = 0; j < data->size2; j++)
95 x = gsl_matrix_get (data, i, j);
100 for (lag = 1; lag <= max_lag && lag < (data->size1 - i); lag++)
102 y = gsl_matrix_get (data, i + lag, j);
106 *(est[j]->cov + lag - 1) += y * x;
107 est[j]->n_obs += 1.0;
113 for (lag = 1; lag <= max_lag; lag++)
115 for (j = 0; j < data->size2; j++)
117 *(est[j]->cov + lag) /= (est[j]->n_obs - lag);
123 innovations_convolve (double **theta, struct innovations_estimate *est,
129 for (k = 0; k < j; k++)
131 result += theta[i-1][i-k-1] * theta[j][j-k-1] * est->scale[k];
136 innovations_update_scale (struct innovations_estimate *est, double *theta,
144 result = est->variance;
145 for (j = 0; j < i; j++)
148 result -= theta[k] * theta[k] * est->scale[j];
150 est->scale[i] = result;
153 init_theta (double **theta, size_t max_lag)
158 for (i = 0; i < max_lag; i++)
160 for (j = 0; j <= i; j++)
167 innovations_update_coeff (double **theta, struct innovations_estimate *est,
175 for (i = 0; i < max_lag; i++)
177 for (j = 0; j <= i; j++)
180 theta[i][k] = (est->cov[k] -
181 innovations_convolve (theta, est, i, j))
184 innovations_update_scale (est, theta[i], i + 1);
188 get_coef (const gsl_matrix *data,
189 struct innovations_estimate **est, size_t max_lag)
195 theta = xnmalloc (max_lag, sizeof (*theta));
196 for (i = 0; i < max_lag; i++)
198 theta[i] = xnmalloc (i + 1, sizeof (**(theta + i)));
201 for (n = 0; n < data->size2; n++)
203 init_theta (theta, max_lag);
204 innovations_update_scale (est[n], theta[0], 0);
205 innovations_update_coeff (theta, est[n], max_lag);
206 /* Copy the final row of coefficients into EST->COEFF.*/
207 for (i = 0; i < max_lag; i++)
210 The order of storage here means that the best predicted value
211 for the time series is computed as follows:
213 Let X[m], X[m-1],... denote the original series.
214 Let X_hat[0] denote the best predicted value of X[0],
215 X_hat[1] denote the projection of X[1] onto the subspace
216 spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
217 projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
218 X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
220 Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
221 + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
223 + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
225 (That is what X_hat[m] SHOULD be, anyway. These routines need
228 pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
232 for (i = 0; i < max_lag; i++)
239 struct innovations_estimate **
240 pspp_innovations (const gsl_matrix *data, size_t lag)
242 struct innovations_estimate **est;
246 est = xnmalloc (data->size2, sizeof *est);
247 for (i = 0; i < data->size2; i++)
249 est[i] = xmalloc (sizeof *est[i]);
250 /* est[i]->variable = vars[i]; */
252 est[i]->variance = 0.0;
253 /* COV does not the variance (i.e., the lag 0 covariance). So COV[0]
254 holds the lag 1 covariance, COV[i] holds the lag i+1 covariance. */
255 est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
256 est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
257 est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
258 est[i]->max_lag = (double) lag;
259 for (j = 0; j < lag; j++)
261 est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j)));
265 get_mean_variance (data, est);
266 get_covariance (data, est, lag);
267 get_coef (data, est, lag);
273 pspp_innovations_free_one (struct innovations_estimate *est)
277 assert (est != NULL);
280 for (i = 0; i < (size_t) est->max_lag; i++)
282 pspp_coeff_free (est->coeff[i]);
286 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
290 assert (est != NULL);
291 for (i = 0; i < n; i++)
293 pspp_innovations_free_one (est[i]);