2 src/math/ts/innovations.c
4 Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover.
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2 of the License, or (at your option)
11 This program is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 You should have received a copy of the GNU General Public License along with
17 this program; if not, write to the Free Software Foundation, Inc., 51
18 Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
21 Find preliminary ARMA coefficients via the innovations algorithm.
22 Also compute the sample mean and covariance matrix for each series.
26 P. J. Brockwell and R. A. Davis. Time Series: Theory and
27 Methods. Second edition. Springer. New York. 1991. ISBN
28 0-387-97429-6. Sections 5.2, 8.3 and 8.4.
31 #include <gsl/gsl_matrix.h>
32 #include <gsl/gsl_vector.h>
35 #include <data/case.h>
36 #include <data/casefile.h>
37 #include <libpspp/alloc.h>
38 #include <libpspp/compiler.h>
39 #include <libpspp/message.h>
40 #include <math/coefficient.h>
41 #include <math/ts/innovations.h>
44 get_mean_variance (size_t n_vars, const struct casefile *cf,
45 struct innovations_estimate **est)
52 const union value *tmp;
54 for (n = 0; n < n_vars; n++)
58 est[n]->variance = 0.0;
60 for (r = casefile_get_reader (cf); casereader_read (r, &c);
63 for (n = 0; n < n_vars; n++)
65 tmp = case_data (&c, est[n]->variable->fv);
66 if (!mv_is_value_missing (&(est[n]->variable->miss), tmp))
68 d = (tmp->f - est[n]->mean) / est[n]->n_obs;
70 est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d;
75 for (n = 0; n < n_vars; n++)
77 /* Maximum likelihood estimate of the variance. */
78 est[n]->variance /= est[n]->n_obs;
83 Read the first MAX_LAG cases.
86 innovations_init_cases (struct casereader *r, struct ccase **inn_cs, size_t max_lag)
92 assert (inn_cs != NULL);
93 while (value && lag < max_lag)
95 assert (inn_cs[lag] != NULL);
96 value = casereader_read (r, inn_cs[lag]);
103 Read one case and update C, which contains the last MAX_LAG cases.
106 innovations_update_cases (struct casereader *r, struct ccase **c, size_t max_lag)
111 for (lag = 0; lag < max_lag - 1; lag++)
115 value = casereader_read (r, c[lag]);
119 get_covariance (size_t n_vars, const struct casefile *cf,
120 struct innovations_estimate **est, size_t max_lag)
122 struct casereader *r;
123 struct ccase **inn_c;
126 bool read_case = false;
129 const union value *tmp;
130 const union value *tmp2;
132 inn_c = xnmalloc (max_lag, sizeof (*inn_c));
134 for (lag = 0; lag < max_lag; lag++)
136 inn_c[lag] = xmalloc (sizeof *inn_c[lag]);
139 r = casefile_get_reader (cf);
140 read_case = innovations_init_cases (r, inn_c, max_lag);
144 for (n = 0; n < n_vars; n++)
146 tmp2 = case_data (inn_c[0], est[n]->variable->fv);
147 if (!mv_is_value_missing (&est[n]->variable->miss, tmp2))
149 x = tmp2->f - est[n]->mean;
150 for (lag = 1; lag <= max_lag; lag++)
152 tmp = case_data (inn_c[lag], est[n]->variable->fv);
153 if (!mv_is_value_missing (&est[n]->variable->miss, tmp))
155 d = (tmp->f - est[n]->mean);
156 *(est[n]->cov + lag) += d * x;
161 read_case = innovations_update_cases (r, inn_c, max_lag);
163 for (lag = 0; lag <= max_lag; lag++)
165 for (n = 0; n < n_vars; n++)
167 *(est[n]->cov + lag) /= (est[n]->n_obs - lag);
170 for (lag = 0; lag < max_lag; lag++)
177 innovations_convolve (double **theta, struct innovations_estimate *est,
183 for (k = 0; k < i; k++)
185 result += theta[i-1][i-k-1] * theta[j-1][j-k-1] * est->scale[k];
190 innovations_update_scale (struct innovations_estimate *est, double *theta,
198 result = est->cov[0];
199 for (j = 0; j < i; j++)
202 result -= theta[k] * theta[k] * est->scale[j];
204 est->scale[i] = result;
208 get_coef (size_t n_vars, const struct casefile *cf,
209 struct innovations_estimate **est, size_t max_lag)
218 theta = xnmalloc (max_lag, sizeof (*theta));
219 for (i = 0; i < max_lag; i++)
221 theta[i] = xnmalloc (i+1, sizeof (theta[i]));
224 for (n = 0; n < n_vars; n++)
226 for (i = 0; i < max_lag; i++)
228 for (j = 0; j < i; j++)
233 innovations_update_scale (est[n], theta[0], 0);
234 for (i = 0; i < max_lag; i++)
237 for (j = 0; j < i; j++)
240 theta[i-1][k-1] = est[n]->cov[k] -
241 innovations_convolve (theta, est[n], i, j);
243 innovations_update_scale (est[n], theta[i], i);
245 /* Copy the final row of coefficients into EST->COEFF.*/
246 for (i = 0; i < max_lag; i++)
249 The order of storage here means that the best predicted value
250 for the time series is computed as follows:
252 Let X[m], X[m-1],... denote the original series.
253 Let X_hat[0] denote the best predicted value of X[0],
254 X_hat[1] denote the projection of X[1] onto the subspace
255 spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the
256 projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1],
257 X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}.
259 Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1])
260 + est->coeff[m-1] * (X[m-2] - X_hat[m-2])
262 + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag])
264 (That is what X_hat[m] SHOULD be, anyway. These routines need
267 pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]);
270 for (i = 0; i < max_lag; i++)
277 struct innovations_estimate **
278 pspp_innovations (const struct variable **vars,
281 const struct casefile *cf)
283 struct innovations_estimate **est;
287 est = xnmalloc (n_vars, sizeof *est);
288 for (i = 0; i < n_vars; i++)
290 if (vars[i]->type == NUMERIC)
292 est[i] = xmalloc (sizeof **est);
293 est[i]->variable = vars[i];
295 est[i]->variance = 0.0;
296 est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov));
297 est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale));
298 est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff));
299 for (j = 0; j < lag; j++)
301 est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j)));
307 /* msg (MW, _("Cannot compute autocovariance for a non-numeric variable %s"), */
308 /* var_to_string (vars[i])); */
312 get_mean_variance (n_vars, cf, est);
313 get_covariance (n_vars, cf, est, lag);
314 get_coef (n_vars, cf, est, lag);