X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg%2Flinreg.c;h=659d6ca48306440a22e5e99e8a355241ab1fd319;hb=0993546e3908becb569bbd979f09ab284cbbc09c;hp=ec1354f33ef44b68eaf08d841bb5bf3fe6aa70af;hpb=4d12096b7b029b064fce4df4e3b1e153ed932b9a;p=pspp-builds.git diff --git a/src/math/linreg/linreg.c b/src/math/linreg/linreg.c index ec1354f3..659d6ca4 100644 --- a/src/math/linreg/linreg.c +++ b/src/math/linreg/linreg.c @@ -1,23 +1,20 @@ -/* - lib/linreg/linreg.c - - Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover. - - This program is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., 51 - Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA. - */ +/* PSPP - a program for statistical analysis. + Copyright (C) 2005 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include #include #include @@ -53,7 +50,7 @@ */ #include -#include +#include #include #include /* @@ -88,6 +85,48 @@ linreg_mean_std (gsl_vector_const_view v, double *mp, double *sp, double *ssp) return GSL_SUCCESS; } +/* + Set V to contain an array of pointers to the variables + used in the model. V must be at least C->N_COEFFS in length. + The return value is the number of distinct variables found. + */ +int +pspp_linreg_get_vars (const void *c_, const struct variable **v) +{ + const pspp_linreg_cache *c = c_; + const struct variable *tmp; + int i; + int j; + int result = 0; + + /* + Make sure the caller doesn't try to sneak a variable + into V that is not in the model. + */ + for (i = 0; i < c->n_coeffs; i++) + { + v[i] = NULL; + } + for (j = 0; j < c->n_coeffs; j++) + { + tmp = pspp_coeff_get_var (c->coeff[j], 0); + assert (tmp != NULL); + /* Repeated variables are likely to bunch together, at the end + of the array. */ + i = result - 1; + while (i >= 0 && v[i] != tmp) + { + i--; + } + if (i < 0 && result < c->n_coeffs) + { + v[result] = tmp; + result++; + } + } + return result; +} + /* Allocate a pspp_linreg_cache and return a pointer to it. n is the number of cases, p is the number of @@ -103,11 +142,11 @@ pspp_linreg_cache_alloc (size_t n, size_t p) c->indep_means = gsl_vector_alloc (p); c->indep_std = gsl_vector_alloc (p); c->ssx = gsl_vector_alloc (p); /* Sums of squares for the - independent variables. - */ + independent variables. + */ c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the - model parameters. - */ + model parameters. + */ c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ c->n_obs = n; c->n_indeps = p; @@ -116,21 +155,37 @@ pspp_linreg_cache_alloc (size_t n, size_t p) */ c->method = PSPP_LINREG_SWEEP; c->predict = pspp_linreg_predict; - c->residual = pspp_linreg_residual; + c->residual = pspp_linreg_residual; /* The procedure to compute my + residuals. */ + c->get_vars = pspp_linreg_get_vars; /* The procedure that returns + pointers to model + variables. */ + c->resid = NULL; /* The variable storing my residuals. */ + c->pred = NULL; /* The variable storing my predicted values. */ return c; } bool -pspp_linreg_cache_free (void * m) +pspp_linreg_cache_free (void *m) { + int i; + pspp_linreg_cache *c = m; - gsl_vector_free (c->indep_means); - gsl_vector_free (c->indep_std); - gsl_vector_free (c->ss_indeps); - gsl_matrix_free (c->cov); - pspp_linreg_coeff_free (c->coeff); - free (c); + if (c != NULL) + { + gsl_vector_free (c->indep_means); + gsl_vector_free (c->indep_std); + gsl_vector_free (c->ss_indeps); + gsl_matrix_free (c->cov); + gsl_vector_free (c->ssx); + for (i = 0; i < c->n_coeffs; i++) + { + pspp_coeff_free (c->coeff[i]); + } + free (c->coeff); + free (c); + } return true; } @@ -144,7 +199,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, const pspp_linreg_opts * opts, pspp_linreg_cache * cache) { int rc; - gsl_matrix *design; + gsl_matrix *design = NULL; gsl_matrix_view xtx; gsl_matrix_view xm; gsl_matrix_view xmxtx; @@ -185,9 +240,9 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, cache->dft = cache->n_obs - 1; cache->dfm = cache->n_indeps; cache->dfe = cache->dft - cache->dfm; - cache->n_coeffs = X->size2 + 1; /* Adjust this later to allow for - regression through the origin. - */ + cache->n_coeffs = X->size2; + cache->intercept = 0.0; + if (cache->method == PSPP_LINREG_SWEEP) { gsl_matrix *sw; @@ -261,7 +316,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, for (i = 0; i < cache->n_indeps; i++) { tmp = gsl_matrix_get (sw, i, cache->n_indeps); - cache->coeff[i + 1].estimate = tmp; + cache->coeff[i]->estimate = tmp; m -= tmp * gsl_vector_get (cache->indep_means, i); } /* @@ -297,7 +352,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, } gsl_matrix_set (cache->cov, 0, 0, tmp); - cache->coeff[0].estimate = m; + cache->intercept = m; } else { @@ -307,6 +362,18 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, } gsl_matrix_free (sw); } + else if (cache->method == PSPP_LINREG_CONDITIONAL_INVERSE) + { + /* + Use the SVD of X^T X to find a conditional inverse of X^TX. If + the SVD is X^T X = U D V^T, then set the conditional inverse + to (X^T X)^c = V D^- U^T. D^- is defined as follows: If entry + (i, i) has value sigma_i, then entry (i, i) of D^- is 1 / + sigma_i if sigma_i > 0, and 0 otherwise. Then solve the normal + equations by setting the estimated parameter vector to + (X^TX)^c X^T Y. + */ + } else { gsl_multifit_linear_workspace *wk; @@ -332,8 +399,9 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, cache->cov, &(cache->sse), wk); for (i = 0; i < cache->n_coeffs; i++) { - cache->coeff[i].estimate = gsl_vector_get (param_estimates, i); + cache->coeff[i]->estimate = gsl_vector_get (param_estimates, i + 1); } + cache->intercept = gsl_vector_get (param_estimates, 0); if (rc == GSL_SUCCESS) { gsl_multifit_linear_free (wk);