From: Jason Stover Date: Sat, 14 Jun 2008 19:38:24 +0000 (-0400) Subject: removed src/math/linreg/ X-Git-Tag: v0.7.0~27 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=51010fcf5cc308367c1fe3fafa785a3d2216b03d;p=pspp-builds.git removed src/math/linreg/ --- diff --git a/src/math/linreg/ChangeLog b/src/math/linreg/ChangeLog deleted file mode 100644 index a93f3b3f..00000000 --- a/src/math/linreg/ChangeLog +++ /dev/null @@ -1,141 +0,0 @@ -2008-04-06 Jason Stover - - * linreg.c (pspp_linreg): For the QR method of getting the least - squares estimates, store intercept in cache->intercept - instead of coefficienc structure. Partial fix of bug 22788. - -2008-03-10 Jason Stover - - * linreg.c (pspp_linreg): Remove use of coefficient 0 as intercept. - -2008-03-08 Jason Stover - - * linreg.c (pspp_linreg_get_vars): Clean up the loop that searches - for coefficient/variable matches. Fix for bug 22419. - -2008-02-28 Jason Stover - - * linreg.c (has_coefficient): New function. - - * linreg.c (pspp_linreg_predict): Fix bug 22419. - -2007-09-21 Jason Stover - - * linreg.c (pspp_linreg_cache_free): Partial fix of memory leak, - bug 21056. - -2007-08-11 Jason Stover - - * linreg.h (enum): Dropped ambiguous PSPP_LINREG_SVD in favor of - PSPP_LINREG_QR. Added PSPP_LINREG_CONDITIONAL_INVERSE. - - * linreg.c (pspp_linreg): Added scope and comment for use of - generalized inverse to solve normal equations. - -2007-04-12 Jason Stover - - * linreg.c: (pspp_linreg_cache_free) Check for null pointer before - freeing. - -Wed Dec 6 21:16:22 2006 Ben Pfaff - - * linreg.c: (pspp_linreg_get_vars) Compare variable pointers - instead of variable indexes. - -2006-05-19 Jason Stover - - * coefficient.c: Moved to src/math. - - * coefficient.h: Moved to src/math. - -2006-05-11 Jason Stover - - * linreg.h: Made linreg_cache's coeff a pspp_linreg_coeff**. - -2006-04-28 Jason Stover - - * linreg.c (pspp_linreg_get_vars): New function. Fills an array with - pointers to the variables used in the model. - - * linreg.h: Add a pointer called get_vars to pspp_linreg_get_vars() - to access variables in a model. - - * coefficient.c (pspp_linreg_coeff_init): Set the variable - associated with the intercept term to NULL. - -2006-04-26 Jason Stover - - * linreg.h: Added a pointer to predicted values. - - * linreg.c (pspp_linreg_cache_alloc): Initialized pointer to - predicted values. - -2006-04-22 Jason Stover - - * predict.c (pspp_linreg_residual): Removed unnecessary cast of c_ - to pspp_linreg_cache. - -2006-04-18 Jason Stover - - * linreg.h: Added a pointer to a variable to store the resduals. - - * linreg.c (pspp_linreg_cache_alloc): Initialize the residual - variable pointer to NULL. - -2006-04-17 Jason Stover - - * linreg.c (pspp_linreg_cache_free): Accept a pointer of type - void to match definition of trns_proc_func. - -2006-04-17 Jason Stover - - * linreg.c (pspp_linreg_cache_free): Return a bool. - -Sat Apr 15 18:03:01 2006 Ben Pfaff - - * predict.c (pspp_linreg_predict): Change prototype to match - struct pspp_linreg_cache_struct's predict member. - * predict.c (pspp_linreg_residual): Change prototype to match - struct pspp_linreg_cache_struct's residual member. - -Sat Apr 15 18:01:39 2006 Ben Pfaff - - * coefficient.c (pspp_linreg_get_coeff): Compare n_indeps against - 0, instead of NULL, because it's an integer type. - -Mon Apr 3 13:20:54 2006 Ben Pfaff - - * linreg.c (pspp_linreg): Move declaration before code for C90 - compliance. - -2006-04-09 Jason Stover - - * predict.c (pspp_linreg_predict): Do something sensible even with - bad input. - -2006-04-07 Jason Stover - - * predict.c (pspp_linreg_predict): Improved handling of null - pointer args. - - * predict.c (pspp_linreg_residuals): New function. - - * coefficient.c: Improved handling of null pointer args. - -2006-04-05 Jason Stover - - * predict.c: New file. pspp_linreg_predict() uses a linear model - to return a predicted value of the dependent variable. - - * coefficient.c: Added pspp_linreg_coeff_get_est() and - pspp_linreg_coeff_get_std_err() to access estimated values of - coefficients and standard errors. - -2006-04-03 Jason Stover - - * coefficient.c: Added pspp_linreg_get_coeff() to find a - coefficient corresponding to a variable/value combination. - -Thu Mar 2 08:40:33 WST 2006 John Darrington - - * Moved files from src directory diff --git a/src/math/linreg/automake.mk b/src/math/linreg/automake.mk deleted file mode 100644 index 38da4a06..00000000 --- a/src/math/linreg/automake.mk +++ /dev/null @@ -1,9 +0,0 @@ -## Process this file with automake to produce Makefile.in -*- makefile -*- - -noinst_LIBRARIES += src/math/linreg/libpspp_linreg.a - -src_math_linreg_libpspp_linreg_a_SOURCES = \ - src/math/linreg/predict.c \ - src/math/linreg/linreg.c \ - src/math/linreg/linreg.h - diff --git a/src/math/linreg/linreg.c b/src/math/linreg/linreg.c deleted file mode 100644 index 659d6ca4..00000000 --- a/src/math/linreg/linreg.c +++ /dev/null @@ -1,434 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include -#include -#include - -#include -#include - - - -/* - Find the least-squares estimate of b for the linear model: - - Y = Xb + Z - - where Y is an n-by-1 column vector, X is an n-by-p matrix of - independent variables, b is a p-by-1 vector of regression coefficients, - and Z is an n-by-1 normally-distributed random vector with independent - identically distributed components with mean 0. - - This estimate is found via the sweep operator or singular-value - decomposition with gsl. - - - References: - - 1. Matrix Computations, third edition. GH Golub and CF Van Loan. - The Johns Hopkins University Press. 1996. ISBN 0-8018-5414-8. - - 2. Numerical Analysis for Statisticians. K Lange. Springer. 1999. - ISBN 0-387-94979-8. - - 3. Numerical Linear Algebra for Applications in Statistics. JE Gentle. - Springer. 1998. ISBN 0-387-98542-5. -*/ - -#include -#include -#include -#include -/* - Get the mean and standard deviation of a vector - of doubles via a form of the Kalman filter as - described on page 32 of [3]. - */ -static int -linreg_mean_std (gsl_vector_const_view v, double *mp, double *sp, double *ssp) -{ - size_t i; - double j = 0.0; - double d; - double tmp; - double mean; - double variance; - - mean = gsl_vector_get (&v.vector, 0); - variance = 0; - for (i = 1; i < v.vector.size; i++) - { - j = (double) i + 1.0; - tmp = gsl_vector_get (&v.vector, i); - d = (tmp - mean) / j; - mean += d; - variance += j * (j - 1.0) * d * d; - } - *mp = mean; - *sp = sqrt (variance / (j - 1.0)); - *ssp = variance; - - return GSL_SUCCESS; -} - -/* - Set V to contain an array of pointers to the variables - used in the model. V must be at least C->N_COEFFS in length. - The return value is the number of distinct variables found. - */ -int -pspp_linreg_get_vars (const void *c_, const struct variable **v) -{ - const pspp_linreg_cache *c = c_; - const struct variable *tmp; - int i; - int j; - int result = 0; - - /* - Make sure the caller doesn't try to sneak a variable - into V that is not in the model. - */ - for (i = 0; i < c->n_coeffs; i++) - { - v[i] = NULL; - } - for (j = 0; j < c->n_coeffs; j++) - { - tmp = pspp_coeff_get_var (c->coeff[j], 0); - assert (tmp != NULL); - /* Repeated variables are likely to bunch together, at the end - of the array. */ - i = result - 1; - while (i >= 0 && v[i] != tmp) - { - i--; - } - if (i < 0 && result < c->n_coeffs) - { - v[result] = tmp; - result++; - } - } - return result; -} - -/* - Allocate a pspp_linreg_cache and return a pointer - to it. n is the number of cases, p is the number of - independent variables. - */ -pspp_linreg_cache * -pspp_linreg_cache_alloc (size_t n, size_t p) -{ - pspp_linreg_cache *c; - - c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache)); - c->depvar = NULL; - c->indep_means = gsl_vector_alloc (p); - c->indep_std = gsl_vector_alloc (p); - c->ssx = gsl_vector_alloc (p); /* Sums of squares for the - independent variables. - */ - c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the - model parameters. - */ - c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ - c->n_obs = n; - c->n_indeps = p; - /* - Default settings. - */ - c->method = PSPP_LINREG_SWEEP; - c->predict = pspp_linreg_predict; - c->residual = pspp_linreg_residual; /* The procedure to compute my - residuals. */ - c->get_vars = pspp_linreg_get_vars; /* The procedure that returns - pointers to model - variables. */ - c->resid = NULL; /* The variable storing my residuals. */ - c->pred = NULL; /* The variable storing my predicted values. */ - - return c; -} - -bool -pspp_linreg_cache_free (void *m) -{ - int i; - - pspp_linreg_cache *c = m; - if (c != NULL) - { - gsl_vector_free (c->indep_means); - gsl_vector_free (c->indep_std); - gsl_vector_free (c->ss_indeps); - gsl_matrix_free (c->cov); - gsl_vector_free (c->ssx); - for (i = 0; i < c->n_coeffs; i++) - { - pspp_coeff_free (c->coeff[i]); - } - free (c->coeff); - free (c); - } - return true; -} - -/* - Fit the linear model via least squares. All pointers passed to pspp_linreg - are assumed to be allocated to the correct size and initialized to the - values as indicated by opts. - */ -int -pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, - const pspp_linreg_opts * opts, pspp_linreg_cache * cache) -{ - int rc; - gsl_matrix *design = NULL; - gsl_matrix_view xtx; - gsl_matrix_view xm; - gsl_matrix_view xmxtx; - gsl_vector_view xty; - gsl_vector_view xi; - gsl_vector_view xj; - gsl_vector *param_estimates; - - size_t i; - size_t j; - double tmp; - double m; - double s; - double ss; - - if (cache == NULL) - { - return GSL_EFAULT; - } - if (opts->get_depvar_mean_std) - { - linreg_mean_std (gsl_vector_const_subvector (Y, 0, Y->size), - &m, &s, &ss); - cache->depvar_mean = m; - cache->depvar_std = s; - cache->sst = ss; - } - for (i = 0; i < cache->n_indeps; i++) - { - if (opts->get_indep_mean_std[i]) - { - linreg_mean_std (gsl_matrix_const_column (X, i), &m, &s, &ss); - gsl_vector_set (cache->indep_means, i, m); - gsl_vector_set (cache->indep_std, i, s); - gsl_vector_set (cache->ssx, i, ss); - } - } - cache->dft = cache->n_obs - 1; - cache->dfm = cache->n_indeps; - cache->dfe = cache->dft - cache->dfm; - cache->n_coeffs = X->size2; - cache->intercept = 0.0; - - if (cache->method == PSPP_LINREG_SWEEP) - { - gsl_matrix *sw; - /* - Subtract the means to improve the condition of the design - matrix. This requires copying X and Y. We do not divide by the - standard deviations of the independent variables here since doing - so would cause a miscalculation of the residual sums of - squares. Dividing by the standard deviation is done GSL's linear - regression functions, so if the design matrix has a poor - condition, use QR decomposition. - - The design matrix here does not include a column for the intercept - (i.e., a column of 1's). If using PSPP_LINREG_QR, we need that column, - so design is allocated here when sweeping, or below if using QR. - */ - design = gsl_matrix_alloc (X->size1, X->size2); - for (i = 0; i < X->size2; i++) - { - m = gsl_vector_get (cache->indep_means, i); - for (j = 0; j < X->size1; j++) - { - tmp = (gsl_matrix_get (X, j, i) - m); - gsl_matrix_set (design, j, i, tmp); - } - } - sw = gsl_matrix_calloc (cache->n_indeps + 1, cache->n_indeps + 1); - xtx = gsl_matrix_submatrix (sw, 0, 0, cache->n_indeps, cache->n_indeps); - - for (i = 0; i < xtx.matrix.size1; i++) - { - tmp = gsl_vector_get (cache->ssx, i); - gsl_matrix_set (&(xtx.matrix), i, i, tmp); - xi = gsl_matrix_column (design, i); - for (j = (i + 1); j < xtx.matrix.size2; j++) - { - xj = gsl_matrix_column (design, j); - gsl_blas_ddot (&(xi.vector), &(xj.vector), &tmp); - gsl_matrix_set (&(xtx.matrix), i, j, tmp); - } - } - - gsl_matrix_set (sw, cache->n_indeps, cache->n_indeps, cache->sst); - xty = gsl_matrix_column (sw, cache->n_indeps); - /* - This loop starts at 1, with i=0 outside the loop, so we can get - the model sum of squares due to the first independent variable. - */ - xi = gsl_matrix_column (design, 0); - gsl_blas_ddot (&(xi.vector), Y, &tmp); - gsl_vector_set (&(xty.vector), 0, tmp); - tmp *= tmp / gsl_vector_get (cache->ssx, 0); - gsl_vector_set (cache->ss_indeps, 0, tmp); - for (i = 1; i < cache->n_indeps; i++) - { - xi = gsl_matrix_column (design, i); - gsl_blas_ddot (&(xi.vector), Y, &tmp); - gsl_vector_set (&(xty.vector), i, tmp); - } - - /* - Sweep on the matrix sw, which contains XtX, XtY and YtY. - */ - reg_sweep (sw); - cache->sse = gsl_matrix_get (sw, cache->n_indeps, cache->n_indeps); - cache->mse = cache->sse / cache->dfe; - /* - Get the intercept. - */ - m = cache->depvar_mean; - for (i = 0; i < cache->n_indeps; i++) - { - tmp = gsl_matrix_get (sw, i, cache->n_indeps); - cache->coeff[i]->estimate = tmp; - m -= tmp * gsl_vector_get (cache->indep_means, i); - } - /* - Get the covariance matrix of the parameter estimates. - Only the upper triangle is necessary. - */ - - /* - The loops below do not compute the entries related - to the estimated intercept. - */ - for (i = 0; i < cache->n_indeps; i++) - for (j = i; j < cache->n_indeps; j++) - { - tmp = -1.0 * cache->mse * gsl_matrix_get (sw, i, j); - gsl_matrix_set (cache->cov, i + 1, j + 1, tmp); - } - /* - Get the covariances related to the intercept. - */ - xtx = gsl_matrix_submatrix (sw, 0, 0, cache->n_indeps, cache->n_indeps); - xmxtx = gsl_matrix_submatrix (cache->cov, 0, 1, 1, cache->n_indeps); - xm = gsl_matrix_view_vector (cache->indep_means, 1, cache->n_indeps); - rc = gsl_blas_dsymm (CblasRight, CblasUpper, cache->mse, - &xtx.matrix, &xm.matrix, 0.0, &xmxtx.matrix); - if (rc == GSL_SUCCESS) - { - tmp = cache->mse / cache->n_obs; - for (i = 1; i < 1 + cache->n_indeps; i++) - { - tmp -= gsl_matrix_get (cache->cov, 0, i) - * gsl_vector_get (cache->indep_means, i - 1); - } - gsl_matrix_set (cache->cov, 0, 0, tmp); - - cache->intercept = m; - } - else - { - fprintf (stderr, "%s:%d:gsl_blas_dsymm: %s\n", - __FILE__, __LINE__, gsl_strerror (rc)); - exit (rc); - } - gsl_matrix_free (sw); - } - else if (cache->method == PSPP_LINREG_CONDITIONAL_INVERSE) - { - /* - Use the SVD of X^T X to find a conditional inverse of X^TX. If - the SVD is X^T X = U D V^T, then set the conditional inverse - to (X^T X)^c = V D^- U^T. D^- is defined as follows: If entry - (i, i) has value sigma_i, then entry (i, i) of D^- is 1 / - sigma_i if sigma_i > 0, and 0 otherwise. Then solve the normal - equations by setting the estimated parameter vector to - (X^TX)^c X^T Y. - */ - } - else - { - gsl_multifit_linear_workspace *wk; - /* - Use QR decomposition via GSL. - */ - - param_estimates = gsl_vector_alloc (1 + X->size2); - design = gsl_matrix_alloc (X->size1, 1 + X->size2); - - for (j = 0; j < X->size1; j++) - { - gsl_matrix_set (design, j, 0, 1.0); - for (i = 0; i < X->size2; i++) - { - tmp = gsl_matrix_get (X, j, i); - gsl_matrix_set (design, j, i + 1, tmp); - } - } - - wk = gsl_multifit_linear_alloc (design->size1, design->size2); - rc = gsl_multifit_linear (design, Y, param_estimates, - cache->cov, &(cache->sse), wk); - for (i = 0; i < cache->n_coeffs; i++) - { - cache->coeff[i]->estimate = gsl_vector_get (param_estimates, i + 1); - } - cache->intercept = gsl_vector_get (param_estimates, 0); - if (rc == GSL_SUCCESS) - { - gsl_multifit_linear_free (wk); - gsl_vector_free (param_estimates); - } - else - { - fprintf (stderr, "%s:%d: gsl_multifit_linear returned %d\n", - __FILE__, __LINE__, rc); - } - } - - - cache->ssm = cache->sst - cache->sse; - /* - Get the remaining sums of squares for the independent - variables. - */ - m = 0; - for (i = 1; i < cache->n_indeps; i++) - { - j = i - 1; - m += gsl_vector_get (cache->ss_indeps, j); - tmp = cache->ssm - m; - gsl_vector_set (cache->ss_indeps, i, tmp); - } - - gsl_matrix_free (design); - return GSL_SUCCESS; -} diff --git a/src/math/linreg/linreg.h b/src/math/linreg/linreg.h deleted file mode 100644 index a7d408af..00000000 --- a/src/math/linreg/linreg.h +++ /dev/null @@ -1,205 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#ifndef LINREG_H -#define LINREG_H -#include -#include -#include -#include - -struct variable; -struct pspp_coeff; -union value; - -enum -{ - PSPP_LINREG_CONDITIONAL_INVERSE, - PSPP_LINREG_QR, - PSPP_LINREG_SWEEP, -}; - - - -/* - Options describing what special values should be computed. - */ -struct pspp_linreg_opts_struct -{ - int get_depvar_mean_std; - int *get_indep_mean_std; /* Array of booleans - dictating which - independent variables need - their means and standard - deviations computed within - pspp_linreg. This array - MUST be of length - n_indeps. If element i is - 1, pspp_linreg will - compute the mean and - variance of indpendent - variable i. If element i - is 0, it will not compute - the mean and standard - deviation, and assume the - values are stored. - cache->indep_mean[i] is - the mean and - cache->indep_std[i] is the - sample standard deviation. */ -}; -typedef struct pspp_linreg_opts_struct pspp_linreg_opts; - - -/* - Find the least-squares estimate of b for the linear model: - - Y = Xb + Z - - where Y is an n-by-1 column vector, X is an n-by-p matrix of - independent variables, b is a p-by-1 vector of regression coefficients, - and Z is an n-by-1 normally-distributed random vector with independent - identically distributed components with mean 0. - - This estimate is found via the sweep operator or singular-value - decomposition with gsl. - - - References: - - 1. Matrix Computations, third edition. GH Golub and CF Van Loan. - The Johns Hopkins University Press. 1996. ISBN 0-8018-5414-8. - - 2. Numerical Analysis for Statisticians. K Lange. Springer. 1999. - ISBN 0-387-94979-8. - - 3. Numerical Linear Algebra for Applications in Statistics. JE Gentle. - Springer. 1998. ISBN 0-387-98542-5. -*/ - - -struct pspp_linreg_cache_struct -{ - int n_obs; /* Number of observations. */ - int n_indeps; /* Number of independent variables. */ - int n_coeffs; /* The intercept is not considered a - coefficient here. */ - - /* - The variable struct is ignored during estimation. It is here so - the calling procedure can find the variable used in the model. - */ - const struct variable *depvar; - - gsl_vector *residuals; - struct pspp_coeff **coeff; - double intercept; - int method; /* Method to use to estimate parameters. */ - /* - Means and standard deviations of the variables. - If these pointers are null when pspp_linreg() is - called, pspp_linreg() will compute their values. - - Entry i of indep_means is the mean of independent - variable i, whose observations are stored in the ith - column of the design matrix. - */ - double depvar_mean; - double depvar_std; - gsl_vector *indep_means; - gsl_vector *indep_std; - - /* - Sums of squares. - */ - double ssm; /* Sums of squares for the overall model. */ - gsl_vector *ss_indeps; /* Sums of squares from each - independent variable. */ - double sst; /* Sum of squares total. */ - double sse; /* Sum of squares error. */ - double mse; /* Mean squared error. This is just sse / - dfe, but since it is the best unbiased - estimate of the population variance, it - has its own entry here. */ - gsl_vector *ssx; /* Centered sums of squares for independent - variables, i.e. \sum (x[i] - mean(x))^2. */ - double ssy; /* Centered sums of squares for dependent - variable. - */ - /* - Covariance matrix of the parameter estimates. - */ - gsl_matrix *cov; - /* - Degrees of freedom. - */ - double dft; - double dfe; - double dfm; - - /* - 'Hat' or Hessian matrix, i.e. (X'X)^{-1}, where X is our - design matrix. - */ - gsl_matrix *hat; - - double (*predict) (const struct variable **, const union value **, - const void *, int); - double (*residual) (const struct variable **, - const union value **, - const union value *, const void *, int); - /* - Returns pointers to the variables used in the model. - */ - int (*get_vars) (const void *, const struct variable **); - struct variable *resid; - struct variable *pred; - -}; - -typedef struct pspp_linreg_cache_struct pspp_linreg_cache; - - - -/* - Allocate a pspp_linreg_cache and return a pointer - to it. n is the number of cases, p is the number of - independent variables. - */ -pspp_linreg_cache *pspp_linreg_cache_alloc (size_t n, size_t p); - -bool pspp_linreg_cache_free (void *); - -/* - Fit the linear model via least squares. All pointers passed to pspp_linreg - are assumed to be allocated to the correct size and initialized to the - values as indicated by opts. - */ -int -pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, - const pspp_linreg_opts * opts, pspp_linreg_cache * cache); - -double -pspp_linreg_predict (const struct variable **, const union value **, - const void *, int); -double -pspp_linreg_residual (const struct variable **, const union value **, - const union value *, const void *, int); -/* - All variables used in the model. - */ -int pspp_linreg_get_vars (const void *, const struct variable **); -#endif diff --git a/src/math/linreg/predict.c b/src/math/linreg/predict.c deleted file mode 100644 index 91184687..00000000 --- a/src/math/linreg/predict.c +++ /dev/null @@ -1,112 +0,0 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#include -#include -#include -#include - -/* - Is the coefficient COEF contained in the list of coefficients - COEF_LIST? - */ -static int -has_coefficient (const struct pspp_coeff **coef_list, const struct pspp_coeff *coef, - size_t n) -{ - size_t i = 0; - - while (i < n) - { - if (coef_list[i] == coef) - { - return 1; - } - i++; - } - return 0; -} -/* - Predict the value of the dependent variable with the - new set of predictors. PREDICTORS must point to a list - of variables, each of whose values are stored in VALS, - in the same order. - */ -double -pspp_linreg_predict (const struct variable **predictors, - const union value **vals, const void *c_, int n_vals) -{ - const pspp_linreg_cache *c = c_; - int j; - size_t next_coef = 0; - const struct pspp_coeff **coef_list; - const struct pspp_coeff *coe; - double result; - double tmp; - - if (predictors == NULL || vals == NULL || c == NULL) - { - return GSL_NAN; - } - if (c->coeff == NULL) - { - /* The stupid model: just guess the mean. */ - return c->depvar_mean; - } - coef_list = xnmalloc (c->n_coeffs, sizeof (*coef_list)); - result = c->intercept; - - /* - The loops guard against the possibility that the caller passed us - inadequate information, such as too few or too many values, or - a redundant list of variable names. - */ - for (j = 0; j < n_vals; j++) - { - coe = pspp_linreg_get_coeff (c, predictors[j], vals[j]); - if (!has_coefficient (coef_list, coe, next_coef)) - { - tmp = pspp_coeff_get_est (coe); - if (var_is_numeric (predictors[j])) - { - tmp *= vals[j]->f; - } - result += tmp; - coef_list[next_coef++] = coe; - } - } - free (coef_list); - - return result; -} - -double -pspp_linreg_residual (const struct variable **predictors, - const union value **vals, - const union value *obs, const void *c, int n_vals) -{ - double pred; - double result; - - if (predictors == NULL || vals == NULL || c == NULL || obs == NULL) - { - return GSL_NAN; - } - pred = pspp_linreg_predict (predictors, vals, c, n_vals); - - result = gsl_isnan (pred) ? GSL_NAN : (obs->f - pred); - return result; -}