X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg%2Flinreg.c;h=e496150e71ca27140bcaefc7538e5822aa4c7696;hb=2e5ba1599578af2f13aa483db3ee49d591aed42f;hp=e3d02345f7776000d6c54fd210f9079b67fbbbea;hpb=573068f2bdcd3f8796e9646668fed910a90f890b;p=pspp-builds.git diff --git a/src/math/linreg/linreg.c b/src/math/linreg/linreg.c index e3d02345..e496150e 100644 --- a/src/math/linreg/linreg.c +++ b/src/math/linreg/linreg.c @@ -1,23 +1,22 @@ -/* lib/linreg/linreg.c - - Copyright (C) 2005 Free Software Foundation, Inc. - Written by Jason H. Stover. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or (at - your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02111-1307, USA. -*/ +/* + lib/linreg/linreg.c + + Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover. + + This program is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., 51 + Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA. + */ #include #include @@ -32,7 +31,7 @@ Y = Xb + Z - where Y is an n-by-1 column vector, X is an n-by-p matrix of + where Y is an n-by-1 column vector, X is an n-by-p matrix of independent variables, b is a p-by-1 vector of regression coefficients, and Z is an n-by-1 normally-distributed random vector with independent identically distributed components with mean 0. @@ -54,7 +53,7 @@ */ #include -#include +#include #include #include /* @@ -89,9 +88,57 @@ linreg_mean_std (gsl_vector_const_view v, double *mp, double *sp, double *ssp) return GSL_SUCCESS; } +/* + Set V to contain an array of pointers to the variables + used in the model. V must be at least C->N_COEFFS in length. + The return value is the number of distinct variables found. + */ +int +pspp_linreg_get_vars (const void *c_, struct variable **v) +{ + const pspp_linreg_cache *c = c_; + struct pspp_linreg_coeff *coef = NULL; + const struct variable *tmp; + int i; + int result = 0; + + /* + Make sure the caller doesn't try to sneak a variable + into V that is not in the model. + */ + for (i = 0; i < c->n_coeffs; i++) + { + v[i] = NULL; + } + /* + Start at c->coeff[1] to avoid the intercept. + */ + v[result] = (struct variable *) pspp_linreg_coeff_get_var (c->coeff[1], 0); + result = (v[result] == NULL) ? 0 : 1; + + for (coef = c->coeff[2]; coef < c->coeff[c->n_coeffs]; coef++) + { + tmp = pspp_linreg_coeff_get_var (coef, 0); + assert (tmp != NULL); + /* Repeated variables are likely to bunch together, at the end + of the array. */ + i = result - 1; + while (i >= 0 && (v[i]->index != tmp->index)) + { + i--; + } + if (i < 0 && result < c->n_coeffs) + { + v[result] = (struct variable *) tmp; + result++; + } + } + return result; +} + /* Allocate a pspp_linreg_cache and return a pointer - to it. n is the number of cases, p is the number of + to it. n is the number of cases, p is the number of independent variables. */ pspp_linreg_cache * @@ -100,13 +147,14 @@ pspp_linreg_cache_alloc (size_t n, size_t p) pspp_linreg_cache *c; c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache)); + c->depvar = NULL; c->indep_means = gsl_vector_alloc (p); c->indep_std = gsl_vector_alloc (p); - c->ssx = gsl_vector_alloc (p); /* Sums of squares for the independent - variables. + c->ssx = gsl_vector_alloc (p); /* Sums of squares for the + independent variables. */ - c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model - parameters. + c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the + model parameters. */ c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ c->n_obs = n; @@ -115,25 +163,40 @@ pspp_linreg_cache_alloc (size_t n, size_t p) Default settings. */ c->method = PSPP_LINREG_SWEEP; + c->predict = pspp_linreg_predict; + c->residual = pspp_linreg_residual; /* The procedure to compute my + residuals. */ + c->get_vars = pspp_linreg_get_vars; /* The procedure that returns + pointers to model + variables. */ + c->resid = NULL; /* The variable storing my residuals. */ + c->pred = NULL; /* The variable storing my predicted values. */ return c; } -void -pspp_linreg_cache_free (pspp_linreg_cache * c) +bool +pspp_linreg_cache_free (void *m) { + int i; + + pspp_linreg_cache *c = m; gsl_vector_free (c->indep_means); gsl_vector_free (c->indep_std); gsl_vector_free (c->ss_indeps); gsl_matrix_free (c->cov); - pspp_linreg_coeff_free (c->coeff); + for (i = 0; i < c->n_coeffs; i++) + { + pspp_linreg_coeff_free (c->coeff[i]); + } free (c); + return true; } /* Fit the linear model via least squares. All pointers passed to pspp_linreg are assumed to be allocated to the correct size and initialized to the - values as indicated by opts. + values as indicated by opts. */ int pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, @@ -181,9 +244,9 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, cache->dft = cache->n_obs - 1; cache->dfm = cache->n_indeps; cache->dfe = cache->dft - cache->dfm; - cache->n_coeffs = X->size2 + 1; /* Adjust this later to allow for regression - through the origin. - */ + cache->n_coeffs = X->size2 + 1; /* Adjust this later to allow for + regression through the origin. + */ if (cache->method == PSPP_LINREG_SWEEP) { gsl_matrix *sw; @@ -257,12 +320,12 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, for (i = 0; i < cache->n_indeps; i++) { tmp = gsl_matrix_get (sw, i, cache->n_indeps); - cache->coeff[i + 1].estimate = tmp; + cache->coeff[i + 1]->estimate = tmp; m -= tmp * gsl_vector_get (cache->indep_means, i); } /* Get the covariance matrix of the parameter estimates. - Only the upper triangle is necessary. + Only the upper triangle is necessary. */ /* @@ -293,7 +356,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, } gsl_matrix_set (cache->cov, 0, 0, tmp); - cache->coeff[0].estimate = m; + cache->coeff[0]->estimate = m; } else { @@ -305,7 +368,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, } else { - gsl_multifit_linear_workspace *wk ; + gsl_multifit_linear_workspace *wk; /* Use QR decomposition via GSL. */ @@ -328,7 +391,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, cache->cov, &(cache->sse), wk); for (i = 0; i < cache->n_coeffs; i++) { - cache->coeff[i].estimate = gsl_vector_get (param_estimates, i); + cache->coeff[i]->estimate = gsl_vector_get (param_estimates, i); } if (rc == GSL_SUCCESS) {