From: Jason Stover Date: Fri, 28 Oct 2005 20:52:08 +0000 (+0000) Subject: Added variable/parameter estimate matching via new struct X-Git-Tag: v0.6.0~1158 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=afcb1175f3a4e6f9c03d073062968a545d104511;p=pspp-builds.git Added variable/parameter estimate matching via new struct --- diff --git a/lib/linreg/linreg.c b/lib/linreg/linreg.c index 26efe886..85b302d1 100644 --- a/lib/linreg/linreg.c +++ b/lib/linreg/linreg.c @@ -87,38 +87,39 @@ linreg_mean_std (gsl_vector_const_view v, double *mp, double *sp, double *ssp) pspp_linreg_cache * pspp_linreg_cache_alloc (size_t n, size_t p) { - pspp_linreg_cache *cache; + pspp_linreg_cache *c; - cache = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache)); - cache->param_estimates = gsl_vector_alloc (p + 1); - cache->indep_means = gsl_vector_alloc (p); - cache->indep_std = gsl_vector_alloc (p); - cache->ssx = gsl_vector_alloc (p); /* Sums of squares for the independent + c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache)); + c->param_estimates = gsl_vector_alloc (p + 1); + c->indep_means = gsl_vector_alloc (p); + c->indep_std = gsl_vector_alloc (p); + c->ssx = gsl_vector_alloc (p); /* Sums of squares for the independent variables. */ - cache->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model - parameters. - */ - cache->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ - cache->n_obs = n; - cache->n_indeps = p; + c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model + parameters. + */ + c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ + c->n_obs = n; + c->n_indeps = p; /* Default settings. */ - cache->method = PSPP_LINREG_SWEEP; + c->method = PSPP_LINREG_SWEEP; - return cache; + return c; } void -pspp_linreg_cache_free (pspp_linreg_cache * cache) +pspp_linreg_cache_free (pspp_linreg_cache * c) { - gsl_vector_free (cache->param_estimates); - gsl_vector_free (cache->indep_means); - gsl_vector_free (cache->indep_std); - gsl_vector_free (cache->ss_indeps); - gsl_matrix_free (cache->cov); - free (cache); + gsl_vector_free (c->param_estimates); + gsl_vector_free (c->indep_means); + gsl_vector_free (c->indep_std); + gsl_vector_free (c->ss_indeps); + gsl_matrix_free (c->cov); + free (c->coeff); + free (c); } /* @@ -180,9 +181,9 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, standard deviations of the independent variables here since doing so would cause a miscalculation of the residual sums of squares. Dividing by the standard deviation is done GSL's linear - regression functions, so if the design matrix has a very poor + regression functions, so if the design matrix has a poor condition, use QR decomposition. - * + The design matrix here does not include a column for the intercept (i.e., a column of 1's). If using PSPP_LINREG_QR, we need that column, so design is allocated here when sweeping, or below if using QR. @@ -244,6 +245,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, for (i = 0; i < cache->n_indeps; i++) { tmp = gsl_matrix_get (sw, i, cache->n_indeps); + cache->coeff[i + 1].estimate = tmp; gsl_vector_set (cache->param_estimates, i + 1, tmp); m -= tmp * gsl_vector_get (cache->indep_means, i); } diff --git a/lib/linreg/pspp_linreg.h b/lib/linreg/pspp_linreg.h index 5fa4e63b..40c5ad84 100644 --- a/lib/linreg/pspp_linreg.h +++ b/lib/linreg/pspp_linreg.h @@ -63,23 +63,36 @@ enum /* Cache for the relevant data from the model. There are several - members which the caller may not use, and which could use a lot of + members which the caller might not use, and which could use a lot of storage. Therefore non-essential members of the struct will be allocated only when requested. */ +struct pspp_linreg_coeff +{ + double estimate; /* Estimated coefficient. */ + const struct variable *v; /* The variable associated with this coefficient. + The calling function should supply the variable + when it creates the design matrix. The estimation + procedure ignores the struct variable *. It is here so + the caller can match parameters with relevant + variables. + */ +}; struct pspp_linreg_cache_struct { int n_obs; /* Number of observations. */ int n_indeps; /* Number of independent variables. */ + int n_coeffs; /* - The var structs are ignored during estimation. - They are here so the calling procedures can - find the variables used in the model. + The variable struct is ignored during estimation. + It is here so the calling procedure can + find the variable used in the model. */ - struct var *depvar; - struct var **indepvar; + const struct variable *depvar; + gsl_vector *residuals; + struct pspp_linreg_coeff *coeff; gsl_vector *param_estimates; int method; /* Method to use to estimate parameters. */ /*