1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2006, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 Find preliminary ARMA coefficients via the innovations algorithm.
19 Also compute the sample mean and covariance matrix for each series.
23 P. J. Brockwell and R. A. Davis. Time Series: Theory and
24 Methods. Second edition. Springer. New York. 1991. ISBN
25 0-387-97429-6. Sections 5.2, 8.3 and 8.4.
30 #include "math/ts/innovations.h"
32 #include <gsl/gsl_matrix.h>
33 #include <gsl/gsl_vector.h>
37 #include "libpspp/compiler.h"
38 #include "libpspp/misc.h"
39 #include "math/coefficient.h"
41 #include "gl/xalloc.h"
44 get_mean (const gsl_matrix *data,
45 struct innovations_estimate **est)
53 for (n = 0; n < data->size2; n++)
58 for (i = 0; i < data->size1; i++)
60 for (n = 0; n < data->size2; n++)
62 tmp = gsl_matrix_get (data, i, n);
66 d = (tmp - est[n]->mean) / est[n]->n_obs;
73 update_cov (struct innovations_estimate **est, gsl_vector_const_view x,
74 gsl_vector_const_view y, size_t lag)
80 for (j = 0; j < x.vector.size; j++)
82 xj = gsl_vector_get (&x.vector, j);
83 yj = gsl_vector_get (&y.vector, j);
90 *(est[j]->cov + lag) += xj * yj;
96 get_covariance (const gsl_matrix *data,
97 struct innovations_estimate **est, size_t max_lag)
104 assert (data != NULL);
105 assert (est != NULL);
107 for (j = 0; j < data->size2; j++)
109 for (lag = 0; lag <= max_lag; lag++)
111 *(est[j]->cov + lag) = 0.0;
115 The rows are in the outer loop because a gsl_matrix is stored in
118 for (i = 0; i < data->size1; i++)
120 for (lag = 0; lag <= max_lag && lag < data->size1 - i; lag++)
122 update_cov (est, gsl_matrix_const_row (data, i),
123 gsl_matrix_const_row (data, i + lag), lag);
126 for (j = 0; j < data->size2; j++)
128 for (lag = 0; lag <= max_lag; lag++)
130 *(est[j]->cov + lag) /= est[j]->n_obs;
138 innovations_convolve (double *x, double *y, struct innovations_estimate *est,
144 assert (x != NULL && y != NULL);
145 assert (est != NULL);
146 assert (est->scale != NULL);
148 for (k = 0; k < i; k++)
150 result += x[k] * y[k] * est->scale[i-k-1];
155 innovations_update_scale (struct innovations_estimate *est, double *theta,
162 if (i < (size_t) est->max_lag)
164 result = est->cov[0];
165 for (j = 0; j < i; j++)
168 result -= pow2 (theta[k]) * est->scale[j];
170 est->scale[i] = result;
174 init_theta (double **theta, size_t max_lag)
179 for (i = 0; i < max_lag; i++)
181 for (j = 0; j <= i; j++)
188 innovations_update_coeff (double **theta, struct innovations_estimate *est,
195 for (i = 0; i < max_lag; i++)
197 theta[i][i] = est->cov[i+1] / est->scale[0];
198 for (j = 1; j <= i; j++)
201 theta[i][k] = (est->cov[k+1] -
202 innovations_convolve (theta[i] + k + 1, theta[j - 1], est, j))
205 innovations_update_scale (est, theta[i], i + 1);
209 get_coef (const gsl_matrix *data,
210 struct innovations_estimate **est, size_t max_lag)
216 theta = xnmalloc (max_lag, sizeof (*theta));
217 for (i = 0; i < max_lag; i++)
219 theta[i] = xnmalloc (max_lag, sizeof (**(theta + i)));
222 for (n = 0; n < data->size2; n++)
224 init_theta (theta, max_lag);
225 innovations_update_scale (est[n], theta[0], 0);
226 innovations_update_coeff (theta, est[n], max_lag);
227 /* Copy the final row of coefficients into EST->COEFF.*/
228 for (i = 0; i < max_lag; i++)
231 The order of storage here means that the best predicted value
232 for the time series is computed as follows:
234 Let X[m], X[m-1],... denote the original series.
235 Let X_hat[0] denote the best predicted value of X[0],
236 X_hat[1] denote the projection of X[1] onto the subspace
237 spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
238 projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
239 X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
241 Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
242 + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
244 + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
246 pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
250 for (i = 0; i < max_lag; i++)
258 innovations_struct_init (struct innovations_estimate *est,
259 const struct design_matrix *dm,
265 /* COV[0] stores the lag 0 covariance (i.e., the variance), COV[1]
266 holds the lag-1 covariance, etc.
268 est->cov = xnmalloc (lag + 1, sizeof (*est->cov));
269 est->scale = xnmalloc (lag + 1, sizeof (*est->scale));
270 est->coeff = xnmalloc (lag, sizeof (*est->coeff)); /* No intercept. */
273 The loop below is an unusual use of PSPP_COEFF_INIT(). In a
274 typical model, one column of a DESIGN_MATRIX has one
275 coefficient. But in a time-series model, one column has many
278 for (j = 0; j < lag; j++)
280 pspp_coeff_init (est->coeff + j, dm);
282 est->max_lag = (double) lag;
285 The mean is subtracted from the original data before computing the
286 coefficients. The mean is NOT added back, so if you want to predict
287 a new value, you must add the mean to X_hat[m] to get the correct
291 subtract_mean (gsl_matrix *m, struct innovations_estimate **est)
297 for (i = 0; i < m->size1; i++)
299 for (j = 0; j < m->size2; j++)
301 tmp = gsl_matrix_get (m, i, j) - est[j]->mean;
302 gsl_matrix_set (m, i, j, tmp);
306 struct innovations_estimate **
307 pspp_innovations (const struct design_matrix *dm, size_t lag)
309 struct innovations_estimate **est;
312 est = xnmalloc (dm->m->size2, sizeof *est);
313 for (i = 0; i < dm->m->size2; i++)
315 est[i] = xmalloc (sizeof *est[i]);
316 /* est[i]->variable = vars[i]; */
317 innovations_struct_init (est[i], dm, lag);
320 get_mean (dm->m, est);
321 subtract_mean (dm->m, est);
322 get_covariance (dm->m, est, lag);
323 get_coef (dm->m, est, lag);
329 pspp_innovations_free_one (struct innovations_estimate *est)
333 assert (est != NULL);
334 for (i = 0; i < (size_t) est->max_lag; i++)
336 pspp_coeff_free (est->coeff[i]);
343 void pspp_innovations_free (struct innovations_estimate **est, size_t n)
347 assert (est != NULL);
348 for (i = 0; i < n; i++)
350 pspp_innovations_free_one (est[i]);