X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg.c;h=2fb98cf76822d020c4369d31acdf8ce0a91b1457;hb=6b6bf4e34558b2854222a1cd3c507c025ff38fde;hp=7e7d4a5504a88bbf8924e2d04ec450bab8cf3669;hpb=4aa40ed36fcdb13f73520945d804e6d3d8d52738;p=pspp-builds.git diff --git a/src/math/linreg.c b/src/math/linreg.c index 7e7d4a55..2fb98cf7 100644 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -192,7 +193,94 @@ pspp_linreg_cache_free (void *m) } return true; } +static void +cache_init (pspp_linreg_cache *cache, const struct design_matrix *dm) +{ + assert (cache != NULL); + cache->dft = cache->n_obs - 1; + cache->dfm = cache->n_indeps; + cache->dfe = cache->dft - cache->dfm; + cache->n_coeffs = dm->m->size2; + cache->intercept = 0.0; +} +static void +post_sweep_computations (pspp_linreg_cache *cache, const struct design_matrix *dm, + gsl_matrix *sw) +{ + gsl_matrix *xm; + gsl_matrix_view xtx; + gsl_matrix_view xmxtx; + double m; + double tmp; + size_t i; + size_t j; + int rc; + + assert (sw != NULL); + assert (cache != NULL); + + cache->sse = gsl_matrix_get (sw, cache->n_indeps, cache->n_indeps); + cache->mse = cache->sse / cache->dfe; + /* + Get the intercept. + */ + m = cache->depvar_mean; + for (i = 0; i < cache->n_indeps; i++) + { + tmp = gsl_matrix_get (sw, i, cache->n_indeps); + cache->coeff[i]->estimate = tmp; + m -= tmp * pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i)); + } + /* + Get the covariance matrix of the parameter estimates. + Only the upper triangle is necessary. + */ + + /* + The loops below do not compute the entries related + to the estimated intercept. + */ + for (i = 0; i < cache->n_indeps; i++) + for (j = i; j < cache->n_indeps; j++) + { + tmp = -1.0 * cache->mse * gsl_matrix_get (sw, i, j); + gsl_matrix_set (cache->cov, i + 1, j + 1, tmp); + } + /* + Get the covariances related to the intercept. + */ + xtx = gsl_matrix_submatrix (sw, 0, 0, cache->n_indeps, cache->n_indeps); + xmxtx = gsl_matrix_submatrix (cache->cov, 0, 1, 1, cache->n_indeps); + xm = gsl_matrix_calloc (1, cache->n_indeps); + for (i = 0; i < xm->size2; i++) + { + gsl_matrix_set (xm, 0, i, + pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i))); + } + rc = gsl_blas_dsymm (CblasRight, CblasUpper, cache->mse, + &xtx.matrix, xm, 0.0, &xmxtx.matrix); + gsl_matrix_free (xm); + if (rc == GSL_SUCCESS) + { + tmp = cache->mse / cache->n_obs; + for (i = 1; i < 1 + cache->n_indeps; i++) + { + tmp -= gsl_matrix_get (cache->cov, 0, i) + * pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i - 1)); + } + gsl_matrix_set (cache->cov, 0, 0, tmp); + + cache->intercept = m; + } + else + { + fprintf (stderr, "%s:%d:gsl_blas_dsymm: %s\n", + __FILE__, __LINE__, gsl_strerror (rc)); + exit (rc); + } +} + /* Fit the linear model via least squares. All pointers passed to pspp_linreg are assumed to be allocated to the correct size and initialized to the @@ -205,8 +293,6 @@ pspp_linreg (const gsl_vector * Y, const struct design_matrix *dm, int rc; gsl_matrix *design = NULL; gsl_matrix_view xtx; - gsl_matrix *xm; - gsl_matrix_view xmxtx; gsl_vector_view xty; gsl_vector_view xi; gsl_vector_view xj; @@ -234,13 +320,7 @@ pspp_linreg (const gsl_vector * Y, const struct design_matrix *dm, cache->depvar_std = s; cache->sst = ss; } - - cache->dft = cache->n_obs - 1; - cache->dfm = cache->n_indeps; - cache->dfe = cache->dft - cache->dfm; - cache->n_coeffs = dm->m->size2; - cache->intercept = 0.0; - + cache_init (cache, dm); for (i = 0; i < dm->m->size2; i++) { if (opts->get_indep_mean_std[i]) @@ -326,65 +406,7 @@ pspp_linreg (const gsl_vector * Y, const struct design_matrix *dm, Sweep on the matrix sw, which contains XtX, XtY and YtY. */ reg_sweep (sw); - cache->sse = gsl_matrix_get (sw, cache->n_indeps, cache->n_indeps); - cache->mse = cache->sse / cache->dfe; - /* - Get the intercept. - */ - m = cache->depvar_mean; - for (i = 0; i < cache->n_indeps; i++) - { - tmp = gsl_matrix_get (sw, i, cache->n_indeps); - cache->coeff[i]->estimate = tmp; - m -= tmp * pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i)); - } - /* - Get the covariance matrix of the parameter estimates. - Only the upper triangle is necessary. - */ - - /* - The loops below do not compute the entries related - to the estimated intercept. - */ - for (i = 0; i < cache->n_indeps; i++) - for (j = i; j < cache->n_indeps; j++) - { - tmp = -1.0 * cache->mse * gsl_matrix_get (sw, i, j); - gsl_matrix_set (cache->cov, i + 1, j + 1, tmp); - } - /* - Get the covariances related to the intercept. - */ - xtx = gsl_matrix_submatrix (sw, 0, 0, cache->n_indeps, cache->n_indeps); - xmxtx = gsl_matrix_submatrix (cache->cov, 0, 1, 1, cache->n_indeps); - xm = gsl_matrix_calloc (1, cache->n_indeps); - for (i = 0; i < xm->size2; i++) - { - gsl_matrix_set (xm, 0, i, - pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i))); - } - rc = gsl_blas_dsymm (CblasRight, CblasUpper, cache->mse, - &xtx.matrix, xm, 0.0, &xmxtx.matrix); - gsl_matrix_free (xm); - if (rc == GSL_SUCCESS) - { - tmp = cache->mse / cache->n_obs; - for (i = 1; i < 1 + cache->n_indeps; i++) - { - tmp -= gsl_matrix_get (cache->cov, 0, i) - * pspp_linreg_get_indep_variable_mean (cache, design_matrix_col_to_var (dm, i - 1)); - } - gsl_matrix_set (cache->cov, 0, 0, tmp); - - cache->intercept = m; - } - else - { - fprintf (stderr, "%s:%d:gsl_blas_dsymm: %s\n", - __FILE__, __LINE__, gsl_strerror (rc)); - exit (rc); - } + post_sweep_computations (cache, dm, sw); gsl_matrix_free (sw); } else if (cache->method == PSPP_LINREG_CONDITIONAL_INVERSE) @@ -617,3 +639,105 @@ void pspp_linreg_set_indep_variable_mean (pspp_linreg_cache *c, const struct var pspp_coeff_set_mean (coef, m); } } + +/* + Make sure the dependent variable is at the last column, and that + only variables in the model are in the covariance matrix. + */ +static struct design_matrix * +rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache *c) +{ + struct variable **v; + struct variable **model_vars; + struct variable *tmp; + struct design_matrix *result; + int n_vars; + int found; + size_t *columns; + size_t i; + size_t j; + size_t k; + size_t dep_col; + + assert (cov != NULL); + assert (c != NULL); + assert (cov->m->size1 > 0); + assert (cov->m->size2 == cov->m->size1); + v = xnmalloc (c->n_coeffs, sizeof (*v)); + model_vars = xnmalloc (c->n_coeffs, sizeof (*model_vars)); + columns = xnmalloc (cov->m->size2, sizeof (*columns)); + n_vars = pspp_linreg_get_vars (c, (const struct variable **) v); + dep_col = 0; + k = 0; + for (i = 0; i < cov->m->size2; i++) + { + tmp = design_matrix_col_to_var (cov, i); + found = 0; + j = 0; + while (!found && j < n_vars) + { + if (tmp == v[j]) + { + found = 1; + if (tmp == c->depvar) + { + dep_col = j; + } + else + { + columns[k] = j; + k++; + } + } + j++; + } + } + k++; + columns[k] = dep_col; + /* + K should now be equal to C->N_INDEPS + 1. If it is not, then + either the code above is wrong or the caller didn't send us the + correct values in C. + */ + assert (k == c->n_indeps + 1); + /* + Put the model variables in the right order in MODEL_VARS. + */ + for (i = 0; i < k; i++) + { + model_vars[i] = v[columns[i]]; + } + + result = covariance_matrix_create (k, model_vars); + for (i = 0; i < result->m->size1; i++) + { + for (j = 0; j < result->m->size2; j++) + { + gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, columns[i], columns[j])); + } + } + free (columns); + free (v); + return result; +} +/* + Estimate the model parameters from the covariance matrix only. This + method uses less memory than PSPP_LINREG, which requires the entire + data set to be stored in memory. +*/ +int +pspp_linreg_with_cov (const struct design_matrix *full_cov, + pspp_linreg_cache * cache) +{ + struct design_matrix *cov; + + assert (cov != NULL); + assert (cache != NULL); + + cov = rearrange_covariance_matrix (full_cov, cache); + cache_init (cache, cov); + reg_sweep (cov->m); + post_sweep_computations (cache, cov, cov->m); + covariance_matrix_destroy (cov); +} +