From 6bfd11241c49bd3ccaad7c5cef89bf1c754e89cb Mon Sep 17 00:00:00 2001 From: Jason H Stover Date: Sat, 16 Aug 2008 17:26:37 -0400 Subject: [PATCH] Initial versions of functions to estimate parameters via the covariance matrix --- src/math/ChangeLog | 10 +++++ src/math/linreg.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/src/math/ChangeLog b/src/math/ChangeLog index 78fbfbc1..83ef85fe 100644 --- a/src/math/ChangeLog +++ b/src/math/ChangeLog @@ -1,3 +1,13 @@ +2008-08-16 Jason H Stover + + * linreg.c (pspp_linreg_with_cov): New function to estimate + parameters directly from covariance matrix instead of using the + entire data set. + + * linreg.c (rearrange_covariance_matrix): New function to ensure + the columns of the covariance matrix correspond to the variables + in the model. + 2008-07-24 John Darrington * merge.c merge.h sort.c sort.h: Removed the value_cnt associated diff --git a/src/math/linreg.c b/src/math/linreg.c index 7e7d4a55..b7cc2040 100644 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@ -617,3 +617,98 @@ void pspp_linreg_set_indep_variable_mean (pspp_linreg_cache *c, const struct var pspp_coeff_set_mean (coef, m); } } + +/* + Make sure the dependent variable is at the last column, and that + only variables in the model are in the covariance matrix. + */ +static struct design_matrix * +rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache *c) +{ + struct variable **v; + struct variable **model_vars; + struct variable *tmp; + struct design_matrix *result; + int n_vars; + int found; + size_t *columns; + size_t i; + size_t j; + size_t k; + size_t dep_col; + + assert (cov != NULL); + assert (c != NULL); + assert (cov->m->size1 > 0); + assert (cov->m->size2 == cov->m->size1); + v = xnmalloc (c->n_coeffs, sizeof (*v)); + model_vars = xnmalloc (c->n_coeffs, sizeof (*model_vars)); + columns = xnmalloc (cov->m->size2, sizeof (*columns)); + n_vars = pspp_linreg_get_vars (c, v); + dep_col = 0; + k = 0; + for (i = 0; i < cov->m->size2; i++) + { + tmp = design_matrix_col_to_var (cov, i); + found = 0; + j = 0; + while (!found && j < n_vars) + { + if (tmp == v[j]) + { + found = 1; + if (tmp == c->depvar) + { + dep_col = j; + } + else + { + columns[k] = j; + k++; + } + } + j++; + } + } + k++; + columns[k] = dep_col; + /* + K should now be equal to C->N_INDEPS + 1. If it is not, then + either the code above is wrong or the caller didn't send us the + correct values in C. + */ + assert (k == c->n_indeps + 1); + /* + Put the model variables in the right order in MODEL_VARS. + */ + for (i = 0; i < k; i++) + { + model_vars[i] = v[columns[i]]; + } + + result = covariance_matrix_create (k, model_vars); + for (i = 0; i < result->m->size1; i++) + { + for (j = 0; j < result->m->size2; j++) + { + gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, columns[i], columns[j])); + } + } + free (columns); + free (v); + return result; +} +/* + Estimate the model parameters from the covariance matrix only. This + method uses less memory than PSPP_LINREG, which requires the entire + data set to be stored in memory. +*/ +int +pspp_linreg_with_cov (const struct design_matrix *cov, + const pspp_linreg_opts * opts, pspp_linreg_cache * cache) +{ + assert (cov != NULL); + assert (opts != NULL); + assert (cache != NULL); +} + -- 2.30.2