X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fts%2Finnovations.c;h=ba2120fb4e6bc735f41d300b5aef406be5c2828d;hb=81579d9e9f994fb2908f50af41c3eb033d216e58;hp=43d86ec1677c8c9bb381f59b23109da8f09940f5;hpb=29e712969d0d5bc1ee20458309dee7df2c38f1ae;p=pspp-builds.git diff --git a/src/math/ts/innovations.c b/src/math/ts/innovations.c index 43d86ec1..ba2120fb 100644 --- a/src/math/ts/innovations.c +++ b/src/math/ts/innovations.c @@ -1,22 +1,19 @@ -/* - src/math/time-series/arma/innovations.c - - Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover. - - This program is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 51 - Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA. - */ +/* PSPP - a program for statistical analysis. + Copyright (C) 2006, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + /* Find preliminary ARMA coefficients via the innovations algorithm. Also compute the sample mean and covariance matrix for each series. @@ -28,185 +25,329 @@ 0-387-97429-6. Sections 5.2, 8.3 and 8.4. */ +#include + +#include "math/ts/innovations.h" + #include #include #include #include -#include -#include -#include -#include -#include -#include -#include + +#include "libpspp/compiler.h" +#include "libpspp/misc.h" +#include "math/coefficient.h" + +#include "gl/xalloc.h" static void -get_mean_variance (size_t n_vars, const struct casefile *cf, - struct innovations_estimate **est) - +get_mean (const gsl_matrix *data, + struct innovations_estimate **est) + { - struct casereader *r; - struct ccase c; size_t n; + size_t i; double d; - const union value *tmp; + double tmp; - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { - est[n]->n_obs = 2.0; + est[n]->n_obs = 0.0; est[n]->mean = 0.0; - est[n]->variance = 0.0; } - for (r = casefile_get_reader (cf); casereader_read (r, &c); - case_destroy (&c)) + for (i = 0; i < data->size1; i++) { - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { - tmp = case_data (&c, est[n]->variable->fv); - if (!mv_is_value_missing (&(est[n]->variable->miss), tmp)) + tmp = gsl_matrix_get (data, i, n); + if (!isnan (tmp)) { - d = (tmp->f - est[n]->mean) / est[n]->n_obs; - est[n]->mean += d; - est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d; est[n]->n_obs += 1.0; + d = (tmp - est[n]->mean) / est[n]->n_obs; + est[n]->mean += d; } } } - for (n = 0; n < n_vars; n++) +} +static void +update_cov (struct innovations_estimate **est, gsl_vector_const_view x, + gsl_vector_const_view y, size_t lag) +{ + size_t j; + double xj; + double yj; + + for (j = 0; j < x.vector.size; j++) { - /* Maximum likelihood estimate of the variance. */ - est[n]->variance /= est[n]->n_obs; + xj = gsl_vector_get (&x.vector, j); + yj = gsl_vector_get (&y.vector, j); + if (!isnan (xj)) + { + if (!isnan (yj)) + { + xj -= est[j]->mean; + yj -= est[j]->mean; + *(est[j]->cov + lag) += xj * yj; + } + } } } - -/* - Read the first MAX_LAG cases. - */ -static bool -innovations_init_cases (struct casereader *r, struct ccase **c, size_t max_lag) +static int +get_covariance (const gsl_matrix *data, + struct innovations_estimate **est, size_t max_lag) { - bool value = true; - size_t lag = 0; + size_t lag; + size_t j; + size_t i; + int rc = 1; - while (value && lag < max_lag) + assert (data != NULL); + assert (est != NULL); + + for (j = 0; j < data->size2; j++) + { + for (lag = 0; lag <= max_lag; lag++) + { + *(est[j]->cov + lag) = 0.0; + } + } + /* + The rows are in the outer loop because a gsl_matrix is stored in + row-major order. + */ + for (i = 0; i < data->size1; i++) + { + for (lag = 0; lag <= max_lag && lag < data->size1 - i; lag++) + { + update_cov (est, gsl_matrix_const_row (data, i), + gsl_matrix_const_row (data, i + lag), lag); + } + } + for (j = 0; j < data->size2; j++) { - lag++; - value = casereader_read (r, c[lag]); + for (lag = 0; lag <= max_lag; lag++) + { + *(est[j]->cov + lag) /= est[j]->n_obs; + } } - return value; + + return rc; } -/* - Read one case and update C, which contains the last MAX_LAG cases. - */ -static bool -innovations_update_cases (struct casereader *r, struct ccase **c, size_t max_lag) +static double +innovations_convolve (double *x, double *y, struct innovations_estimate *est, + int i) { - size_t lag; - bool value = false; - - for (lag = 0; lag < max_lag - 1; lag++) + int k; + double result = 0.0; + + assert (x != NULL && y != NULL); + assert (est != NULL); + assert (est->scale != NULL); + assert (i > 0); + for (k = 0; k < i; k++) { - c[lag] = c[lag+1]; + result += x[k] * y[k] * est->scale[i-k-1]; } - value = casereader_read (r, c[lag]); - return value; + return result; } static void -get_covariance (size_t n_vars, const struct casefile *cf, - struct innovations_estimate **est, size_t max_lag) +innovations_update_scale (struct innovations_estimate *est, double *theta, + size_t i) { - struct casereader *r; - struct ccase **c; - size_t lag; - size_t n; - bool read_case = false; - double d; - double x; - const union value *tmp; - const union value *tmp2; + double result = 0.0; + size_t j; + size_t k; - c = xnmalloc (max_lag, sizeof (*c)); - - for (lag = 0; lag < max_lag; lag++) + if (i < (size_t) est->max_lag) { - c[lag] = xmalloc (sizeof *c[lag]); + result = est->cov[0]; + for (j = 0; j < i; j++) + { + k = i - j - 1; + result -= pow2 (theta[k]) * est->scale[j]; + } + est->scale[i] = result; } +} +static void +init_theta (double **theta, size_t max_lag) +{ + size_t i; + size_t j; - r = casefile_get_reader (cf); - read_case = innovations_init_cases (r, c, max_lag); + for (i = 0; i < max_lag; i++) + { + for (j = 0; j <= i; j++) + { + theta[i][j] = 0.0; + } + } +} +static void +innovations_update_coeff (double **theta, struct innovations_estimate *est, + size_t max_lag) +{ + size_t i; + size_t j; + size_t k; - while (read_case) + for (i = 0; i < max_lag; i++) { - for (n = 0; n < n_vars; n++) + theta[i][i] = est->cov[i+1] / est->scale[0]; + for (j = 1; j <= i; j++) { - tmp2 = case_data (c[0], est[n]->variable->fv); - if (!mv_is_value_missing (&est[n]->variable->miss, tmp2)) - { - x = tmp2->f - est[n]->mean; - for (lag = 1; lag <= max_lag; lag++) - { - tmp = case_data (c[lag], est[n]->variable->fv); - if (!mv_is_value_missing (&est[n]->variable->miss, tmp)) - { - d = (tmp->f - est[n]->mean); - *(est[n]->cov + lag) += d * x; - } - } - } + k = i - j; + theta[i][k] = (est->cov[k+1] - + innovations_convolve (theta[i] + k + 1, theta[j - 1], est, j)) + / est->scale[j]; } - read_case = innovations_update_cases (r, c, max_lag); + innovations_update_scale (est, theta[i], i + 1); + } +} +static void +get_coef (const gsl_matrix *data, + struct innovations_estimate **est, size_t max_lag) +{ + size_t i; + size_t n; + double **theta; + + theta = xnmalloc (max_lag, sizeof (*theta)); + for (i = 0; i < max_lag; i++) + { + theta[i] = xnmalloc (max_lag, sizeof (**(theta + i))); } - for (lag = 0; lag <= max_lag; lag++) + + for (n = 0; n < data->size2; n++) { - for (n = 0; n < n_vars; n++) + init_theta (theta, max_lag); + innovations_update_scale (est[n], theta[0], 0); + innovations_update_coeff (theta, est[n], max_lag); + /* Copy the final row of coefficients into EST->COEFF.*/ + for (i = 0; i < max_lag; i++) { - *(est[n]->cov + lag) /= (est[n]->n_obs - lag); + /* + The order of storage here means that the best predicted value + for the time series is computed as follows: + + Let X[m], X[m-1],... denote the original series. + Let X_hat[0] denote the best predicted value of X[0], + X_hat[1] denote the projection of X[1] onto the subspace + spanned by {X[0] - X_hat[0]}. Let X_hat[m] denote the + projection of X[m] onto the subspace spanned by {X[m-1] - X_hat[m-1], + X[m-2] - X_hat[m-2],...,X[0] - X_hat[0]}. + + Then X_hat[m] = est->coeff[m-1] * (X[m-1] - X_hat[m-1]) + + est->coeff[m-1] * (X[m-2] - X_hat[m-2]) + ... + + est->coeff[m-max_lag] * (X[m - max_lag] - X_hat[m - max_lag]) + */ + pspp_coeff_set_estimate (est[n]->coeff[i], theta[max_lag - 1][i]); } } - for (lag = 0; lag < max_lag; lag++) + + for (i = 0; i < max_lag; i++) { - free (c[lag]); + free (theta[i]); } - free (c); + free (theta); } -struct innovations_estimate ** pspp_innovations (const struct variable **vars, size_t *n_vars, - size_t lag, const struct casefile *cf) +static void +innovations_struct_init (struct innovations_estimate *est, + const struct design_matrix *dm, + size_t lag) +{ + size_t j; + + est->mean = 0.0; + /* COV[0] stores the lag 0 covariance (i.e., the variance), COV[1] + holds the lag-1 covariance, etc. + */ + est->cov = xnmalloc (lag + 1, sizeof (*est->cov)); + est->scale = xnmalloc (lag + 1, sizeof (*est->scale)); + est->coeff = xnmalloc (lag, sizeof (*est->coeff)); /* No intercept. */ + + /* + The loop below is an unusual use of PSPP_COEFF_INIT(). In a + typical model, one column of a DESIGN_MATRIX has one + coefficient. But in a time-series model, one column has many + coefficients. + */ + for (j = 0; j < lag; j++) + { + pspp_coeff_init (est->coeff + j, dm); + } + est->max_lag = (double) lag; +} +/* + The mean is subtracted from the original data before computing the + coefficients. The mean is NOT added back, so if you want to predict + a new value, you must add the mean to X_hat[m] to get the correct + value. + */ +static void +subtract_mean (gsl_matrix *m, struct innovations_estimate **est) { - struct innovations_estimate **est; size_t i; size_t j; + double tmp; - est = xnmalloc (*n_vars, sizeof *est); - for (i = 0; i < *n_vars; i++) + for (i = 0; i < m->size1; i++) { - if (vars[i]->type == NUMERIC) - { - est[i] = xmalloc (sizeof **est); - est[i]->variable = vars[i]; - est[i]->mean = 0.0; - est[i]->variance = 0.0; - est[i]->cov = xnmalloc (lag, sizeof (est[i]->cov)); - est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff)); - for (j = 0; j < lag; j++) - { - est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j))); - } - } - else + for (j = 0; j < m->size2; j++) { - *n_vars--; -/* msg (MW, _("Cannot compute autocovariance for a non-numeric variable %s"), */ -/* var_to_string (vars[i])); */ + tmp = gsl_matrix_get (m, i, j) - est[j]->mean; + gsl_matrix_set (m, i, j, tmp); } } +} +struct innovations_estimate ** +pspp_innovations (const struct design_matrix *dm, size_t lag) +{ + struct innovations_estimate **est; + size_t i; + + est = xnmalloc (dm->m->size2, sizeof *est); + for (i = 0; i < dm->m->size2; i++) + { + est[i] = xmalloc (sizeof *est[i]); +/* est[i]->variable = vars[i]; */ + innovations_struct_init (est[i], dm, lag); + } + + get_mean (dm->m, est); + subtract_mean (dm->m, est); + get_covariance (dm->m, est, lag); + get_coef (dm->m, est, lag); - /* - First data pass to get the mean and variance. - */ - get_mean_variance (*n_vars, cf, est); - get_covariance (*n_vars, cf, est, lag); - return est; } + +static void +pspp_innovations_free_one (struct innovations_estimate *est) +{ + size_t i; + + assert (est != NULL); + for (i = 0; i < (size_t) est->max_lag; i++) + { + pspp_coeff_free (est->coeff[i]); + } + free (est->scale); + free (est->cov); + free (est); +} + +void pspp_innovations_free (struct innovations_estimate **est, size_t n) +{ + size_t i; + + assert (est != NULL); + for (i = 0; i < n; i++) + { + pspp_innovations_free_one (est[i]); + } + free (est); +}