X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg%2Flinreg.c;h=0782c36fc3df82aae30d00c5f88fbd5f89aa3ad7;hb=5b5f08a99c564c2f3e981ae0efe21ffc3c578ac4;hp=6bbd94c50cb8e742d6ec011d2a768d1685f6e47f;hpb=1c4b7cfcec922cf6d1e92a607f23f00d26e7324e;p=pspp-builds.git
diff --git a/src/math/linreg/linreg.c b/src/math/linreg/linreg.c
index 6bbd94c5..0782c36f 100644
--- a/src/math/linreg/linreg.c
+++ b/src/math/linreg/linreg.c
@@ -1,23 +1,20 @@
-/*
- lib/linreg/linreg.c
-
- Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover.
-
- This program is free software; you can redistribute it and/or modify it under
- the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc., 51
- Franklin Street, Fifth Floor, Boston, MA 02111-1307, USA.
- */
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see . */
+#include
#include
#include
@@ -94,12 +91,12 @@ linreg_mean_std (gsl_vector_const_view v, double *mp, double *sp, double *ssp)
The return value is the number of distinct variables found.
*/
int
-pspp_linreg_get_vars (const void *c_, struct variable **v)
+pspp_linreg_get_vars (const void *c_, const struct variable **v)
{
const pspp_linreg_cache *c = c_;
- struct pspp_coeff *coef = NULL;
const struct variable *tmp;
int i;
+ int j;
int result = 0;
/*
@@ -110,15 +107,9 @@ pspp_linreg_get_vars (const void *c_, struct variable **v)
{
v[i] = NULL;
}
- /*
- Start at c->coeff[1] to avoid the intercept.
- */
- v[result] = (struct variable *) pspp_coeff_get_var (c->coeff[1], 0);
- result = (v[result] == NULL) ? 0 : 1;
-
- for (coef = c->coeff[2]; coef < c->coeff[c->n_coeffs]; coef++)
+ for (j = 0; j < c->n_coeffs; j++)
{
- tmp = pspp_coeff_get_var (coef, 0);
+ tmp = pspp_coeff_get_var (c->coeff[j], 0);
assert (tmp != NULL);
/* Repeated variables are likely to bunch together, at the end
of the array. */
@@ -129,7 +120,7 @@ pspp_linreg_get_vars (const void *c_, struct variable **v)
}
if (i < 0 && result < c->n_coeffs)
{
- v[result] = (struct variable *) tmp;
+ v[result] = tmp;
result++;
}
}
@@ -151,10 +142,10 @@ pspp_linreg_cache_alloc (size_t n, size_t p)
c->indep_means = gsl_vector_alloc (p);
c->indep_std = gsl_vector_alloc (p);
c->ssx = gsl_vector_alloc (p); /* Sums of squares for the
- independent variables.
+ independent variables.
*/
c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the
- model parameters.
+ model parameters.
*/
c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */
c->n_obs = n;
@@ -181,15 +172,20 @@ pspp_linreg_cache_free (void *m)
int i;
pspp_linreg_cache *c = m;
- gsl_vector_free (c->indep_means);
- gsl_vector_free (c->indep_std);
- gsl_vector_free (c->ss_indeps);
- gsl_matrix_free (c->cov);
- for (i = 0; i < c->n_coeffs; i++)
+ if (c != NULL)
{
- pspp_coeff_free (c->coeff[i]);
+ gsl_vector_free (c->indep_means);
+ gsl_vector_free (c->indep_std);
+ gsl_vector_free (c->ss_indeps);
+ gsl_matrix_free (c->cov);
+ gsl_vector_free (c->ssx);
+ for (i = 0; i < c->n_coeffs; i++)
+ {
+ pspp_coeff_free (c->coeff[i]);
+ }
+ free (c->coeff);
+ free (c);
}
- free (c);
return true;
}
@@ -203,7 +199,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
const pspp_linreg_opts * opts, pspp_linreg_cache * cache)
{
int rc;
- gsl_matrix *design;
+ gsl_matrix *design = NULL;
gsl_matrix_view xtx;
gsl_matrix_view xm;
gsl_matrix_view xmxtx;
@@ -244,9 +240,9 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
cache->dft = cache->n_obs - 1;
cache->dfm = cache->n_indeps;
cache->dfe = cache->dft - cache->dfm;
- cache->n_coeffs = X->size2 + 1; /* Adjust this later to allow for
- regression through the origin.
- */
+ cache->n_coeffs = X->size2;
+ cache->intercept = 0.0;
+
if (cache->method == PSPP_LINREG_SWEEP)
{
gsl_matrix *sw;
@@ -320,7 +316,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
for (i = 0; i < cache->n_indeps; i++)
{
tmp = gsl_matrix_get (sw, i, cache->n_indeps);
- cache->coeff[i + 1]->estimate = tmp;
+ cache->coeff[i]->estimate = tmp;
m -= tmp * gsl_vector_get (cache->indep_means, i);
}
/*
@@ -356,7 +352,7 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
}
gsl_matrix_set (cache->cov, 0, 0, tmp);
- cache->coeff[0]->estimate = m;
+ cache->intercept = m;
}
else
{
@@ -366,6 +362,18 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
}
gsl_matrix_free (sw);
}
+ else if (cache->method == PSPP_LINREG_CONDITIONAL_INVERSE)
+ {
+ /*
+ Use the SVD of X^T X to find a conditional inverse of X^TX. If
+ the SVD is X^T X = U D V^T, then set the conditional inverse
+ to (X^T X)^c = V D^- U^T. D^- is defined as follows: If entry
+ (i, i) has value sigma_i, then entry (i, i) of D^- is 1 /
+ sigma_i if sigma_i > 0, and 0 otherwise. Then solve the normal
+ equations by setting the estimated parameter vector to
+ (X^TX)^c X^T Y.
+ */
+ }
else
{
gsl_multifit_linear_workspace *wk;
@@ -389,10 +397,11 @@ pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
wk = gsl_multifit_linear_alloc (design->size1, design->size2);
rc = gsl_multifit_linear (design, Y, param_estimates,
cache->cov, &(cache->sse), wk);
- for (i = 0; i < cache->n_coeffs; i++)
+ for (i = 1; i < cache->n_coeffs; i++)
{
cache->coeff[i]->estimate = gsl_vector_get (param_estimates, i);
}
+ cache->intercept = gsl_vector_get (param_estimates, 0);
if (rc == GSL_SUCCESS)
{
gsl_multifit_linear_free (wk);