From: Jason Stover Date: Sat, 1 Jul 2006 23:54:02 +0000 (+0000) Subject: dropped use of casefiles X-Git-Tag: v0.6.0~793 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=4dc2ebcfd1a113b25f6997ff3b66fa52ac41158b dropped use of casefiles --- diff --git a/src/math/ts/ChangeLog b/src/math/ts/ChangeLog index e2880996..0a07f8cb 100644 --- a/src/math/ts/ChangeLog +++ b/src/math/ts/ChangeLog @@ -1,3 +1,8 @@ +2006-07-01 Jason Stover + + * innovations.c: Use gsl_matrices to avoid use of casefiles by + backend math routine. + 2006-06-21 Jason Stover * innovations.c (innovations_update_scale): New function. diff --git a/src/math/ts/innovations.c b/src/math/ts/innovations.c index cec66933..13128445 100644 --- a/src/math/ts/innovations.c +++ b/src/math/ts/innovations.c @@ -30,148 +30,94 @@ #include #include -#include +#include #include -#include -#include #include #include -#include #include #include static void -get_mean_variance (size_t n_vars, const struct casefile *cf, +get_mean_variance (const gsl_matrix *data, struct innovations_estimate **est) { - struct casereader *r; - struct ccase c; size_t n; + size_t i; double d; - const union value *tmp; + double tmp; - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { est[n]->n_obs = 2.0; est[n]->mean = 0.0; est[n]->variance = 0.0; } - for (r = casefile_get_reader (cf); casereader_read (r, &c); - case_destroy (&c)) + for (i = 0; i < data->size1; i++) { - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { - tmp = case_data (&c, est[n]->variable->fv); - if (!mv_is_value_missing (&(est[n]->variable->miss), tmp)) + tmp = gsl_matrix_get (data, i, n); + if (!gsl_isnan (tmp)) { - d = (tmp->f - est[n]->mean) / est[n]->n_obs; + d = (tmp - est[n]->mean) / est[n]->n_obs; est[n]->mean += d; est[n]->variance += est[n]->n_obs * est[n]->n_obs * d * d; est[n]->n_obs += 1.0; } } } - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { /* Maximum likelihood estimate of the variance. */ est[n]->variance /= est[n]->n_obs; } } -/* - Read the first MAX_LAG cases. - */ -static bool -innovations_init_cases (struct casereader *r, struct ccase **inn_cs, size_t max_lag) -{ - bool value = true; - size_t lag = 0; - - assert (r != NULL); - assert (inn_cs != NULL); - while (value && lag < max_lag) - { - assert (inn_cs[lag] != NULL); - value = casereader_read (r, inn_cs[lag]); - lag++; - } - return value; -} - -/* - Read one case and update C, which contains the last MAX_LAG cases. - */ -static bool -innovations_update_cases (struct casereader *r, struct ccase **c, size_t max_lag) -{ - size_t lag; - bool value = false; - - for (lag = 0; lag < max_lag - 1; lag++) - { - c[lag] = c[lag+1]; - } - value = casereader_read (r, c[lag]); - return value; -} -static void -get_covariance (size_t n_vars, const struct casefile *cf, +static int +get_covariance (const gsl_matrix *data, struct innovations_estimate **est, size_t max_lag) { - struct casereader *r; - struct ccase **inn_c; size_t lag; - size_t n; - bool read_case = false; - double d; + size_t j; + size_t i; double x; - const union value *tmp; - const union value *tmp2; + double y; + int rc = 1; - inn_c = xnmalloc (max_lag, sizeof (*inn_c)); + assert (data != NULL); + assert (est != NULL); - for (lag = 0; lag < max_lag; lag++) - { - inn_c[lag] = xmalloc (sizeof *inn_c[lag]); - } - - r = casefile_get_reader (cf); - read_case = innovations_init_cases (r, inn_c, max_lag); - - while (read_case) + for (i = 0; i < data->size1; i++) { - for (n = 0; n < n_vars; n++) + for (j = 0; j < data->size2; j++) { - tmp2 = case_data (inn_c[0], est[n]->variable->fv); - if (!mv_is_value_missing (&est[n]->variable->miss, tmp2)) + x = gsl_matrix_get (data, i, j); + + if (!gsl_isnan (x)) { - x = tmp2->f - est[n]->mean; - for (lag = 1; lag <= max_lag; lag++) + x -= est[j]->mean; + for (lag = 1; lag <= max_lag && lag < data->size1 - max_lag; lag++) { - tmp = case_data (inn_c[lag], est[n]->variable->fv); - if (!mv_is_value_missing (&est[n]->variable->miss, tmp)) + y = gsl_matrix_get (data, i + lag, j); + if (!gsl_isnan (y)) { - d = (tmp->f - est[n]->mean); - *(est[n]->cov + lag) += d * x; + y -= est[j]->mean; + *(est[j]->cov + lag) += y * x; + est[i]->n_obs += 1.0; } } } } - read_case = innovations_update_cases (r, inn_c, max_lag); } - for (lag = 0; lag <= max_lag; lag++) + for (lag = 0; lag <= max_lag && lag < data->size1 - max_lag; lag++) { - for (n = 0; n < n_vars; n++) + for (j = 0; j < data->size2; j++) { - *(est[n]->cov + lag) /= (est[n]->n_obs - lag); + *(est[j]->cov + lag) /= (est[j]->n_obs - lag); } } - for (lag = 0; lag < max_lag; lag++) - { - free (inn_c[lag]); - } - free (inn_c); + return rc; } static double innovations_convolve (double **theta, struct innovations_estimate *est, @@ -205,8 +151,8 @@ innovations_update_scale (struct innovations_estimate *est, double *theta, } static void -get_coef (size_t n_vars, const struct casefile *cf, - struct innovations_estimate **est, size_t max_lag) +get_coef (const gsl_matrix *data, + struct innovations_estimate **est, size_t max_lag) { size_t j; size_t i; @@ -221,7 +167,7 @@ get_coef (size_t n_vars, const struct casefile *cf, theta[i] = xnmalloc (i+1, sizeof (theta[i])); } - for (n = 0; n < n_vars; n++) + for (n = 0; n < data->size2; n++) { for (i = 0; i < max_lag; i++) { @@ -275,43 +221,31 @@ get_coef (size_t n_vars, const struct casefile *cf, } struct innovations_estimate ** -pspp_innovations (const struct variable **vars, - size_t n_vars, - size_t lag, - const struct casefile *cf) +pspp_innovations (const gsl_matrix *data, size_t lag) { struct innovations_estimate **est; size_t i; size_t j; - est = xnmalloc (n_vars, sizeof *est); - for (i = 0; i < n_vars; i++) + est = xnmalloc (data->size2, sizeof *est); + for (i = 0; i < data->size2; i++) { - if (vars[i]->type == NUMERIC) - { - est[i] = xmalloc (sizeof **est); - est[i]->variable = vars[i]; - est[i]->mean = 0.0; - est[i]->variance = 0.0; - est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov)); - est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale)); - est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff)); - for (j = 0; j < lag; j++) - { - est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j))); - } - } - else + est[i] = xmalloc (sizeof **est); +/* est[i]->variable = vars[i]; */ + est[i]->mean = 0.0; + est[i]->variance = 0.0; + est[i]->cov = xnmalloc (lag, sizeof (*est[i]->cov)); + est[i]->scale = xnmalloc (lag, sizeof (*est[i]->scale)); + est[i]->coeff = xnmalloc (lag, sizeof (*est[i]->coeff)); + for (j = 0; j < lag; j++) { - n_vars--; -/* msg (MW, _("Cannot compute autocovariance for a non-numeric variable %s"), */ -/* var_to_string (vars[i])); */ + est[i]->coeff[j] = xmalloc (sizeof (*(est[i]->coeff + j))); } } - get_mean_variance (n_vars, cf, est); - get_covariance (n_vars, cf, est, lag); - get_coef (n_vars, cf, est, lag); + get_mean_variance (data, est); + get_covariance (data, est, lag); + get_coef (data, est, lag); return est; } diff --git a/src/math/ts/innovations.h b/src/math/ts/innovations.h index 94289eaa..a020f876 100644 --- a/src/math/ts/innovations.h +++ b/src/math/ts/innovations.h @@ -1,5 +1,5 @@ /* - src/math/time-series/arma/innovations.h + src/math/ts/innovations.h Copyright (C) 2006 Free Software Foundation, Inc. Written by Jason H. Stover. @@ -36,10 +36,10 @@ struct innovations_estimate double mean; double variance; double *cov; + double *scale; double n_obs; double max_lag; coefficient **coeff; }; -struct innovations_estimate ** pspp_innovations (const struct variable **, size_t *, - size_t, const struct casefile *); +struct innovations_estimate ** pspp_innovations (const gsl_matrix *, size_t); #endif