pspp_linreg_cache *
pspp_linreg_cache_alloc (size_t n, size_t p)
{
- pspp_linreg_cache *cache;
+ pspp_linreg_cache *c;
- cache = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache));
- cache->param_estimates = gsl_vector_alloc (p + 1);
- cache->indep_means = gsl_vector_alloc (p);
- cache->indep_std = gsl_vector_alloc (p);
- cache->ssx = gsl_vector_alloc (p); /* Sums of squares for the independent
+ c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache));
+ c->param_estimates = gsl_vector_alloc (p + 1);
+ c->indep_means = gsl_vector_alloc (p);
+ c->indep_std = gsl_vector_alloc (p);
+ c->ssx = gsl_vector_alloc (p); /* Sums of squares for the independent
variables.
*/
- cache->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model
- parameters.
- */
- cache->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */
- cache->n_obs = n;
- cache->n_indeps = p;
+ c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model
+ parameters.
+ */
+ c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */
+ c->n_obs = n;
+ c->n_indeps = p;
/*
Default settings.
*/
- cache->method = PSPP_LINREG_SWEEP;
+ c->method = PSPP_LINREG_SWEEP;
- return cache;
+ return c;
}
void
-pspp_linreg_cache_free (pspp_linreg_cache * cache)
+pspp_linreg_cache_free (pspp_linreg_cache * c)
{
- gsl_vector_free (cache->param_estimates);
- gsl_vector_free (cache->indep_means);
- gsl_vector_free (cache->indep_std);
- gsl_vector_free (cache->ss_indeps);
- gsl_matrix_free (cache->cov);
- free (cache);
+ gsl_vector_free (c->param_estimates);
+ gsl_vector_free (c->indep_means);
+ gsl_vector_free (c->indep_std);
+ gsl_vector_free (c->ss_indeps);
+ gsl_matrix_free (c->cov);
+ free (c->coeff);
+ free (c);
}
/*
standard deviations of the independent variables here since doing
so would cause a miscalculation of the residual sums of
squares. Dividing by the standard deviation is done GSL's linear
- regression functions, so if the design matrix has a very poor
+ regression functions, so if the design matrix has a poor
condition, use QR decomposition.
- *
+
The design matrix here does not include a column for the intercept
(i.e., a column of 1's). If using PSPP_LINREG_QR, we need that column,
so design is allocated here when sweeping, or below if using QR.
for (i = 0; i < cache->n_indeps; i++)
{
tmp = gsl_matrix_get (sw, i, cache->n_indeps);
+ cache->coeff[i + 1].estimate = tmp;
gsl_vector_set (cache->param_estimates, i + 1, tmp);
m -= tmp * gsl_vector_get (cache->indep_means, i);
}
/*
Cache for the relevant data from the model. There are several
- members which the caller may not use, and which could use a lot of
+ members which the caller might not use, and which could use a lot of
storage. Therefore non-essential members of the struct will be
allocated only when requested.
*/
+struct pspp_linreg_coeff
+{
+ double estimate; /* Estimated coefficient. */
+ const struct variable *v; /* The variable associated with this coefficient.
+ The calling function should supply the variable
+ when it creates the design matrix. The estimation
+ procedure ignores the struct variable *. It is here so
+ the caller can match parameters with relevant
+ variables.
+ */
+};
struct pspp_linreg_cache_struct
{
int n_obs; /* Number of observations. */
int n_indeps; /* Number of independent variables. */
+ int n_coeffs;
/*
- The var structs are ignored during estimation.
- They are here so the calling procedures can
- find the variables used in the model.
+ The variable struct is ignored during estimation.
+ It is here so the calling procedure can
+ find the variable used in the model.
*/
- struct var *depvar;
- struct var **indepvar;
+ const struct variable *depvar;
+
gsl_vector *residuals;
+ struct pspp_linreg_coeff *coeff;
gsl_vector *param_estimates;
int method; /* Method to use to estimate parameters. */
/*