X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg%2Flinreg.h;h=a7d408af56bde0c605c648963882545d9bec2967;hb=b5b474193e450bba97610065df0518c08074a7fb;hp=63f980b1876569b27dd65b840be9abd1cf56946e;hpb=4f8599928787c4b9da99caff29b27724c2e3298d;p=pspp-builds.git diff --git a/src/math/linreg/linreg.h b/src/math/linreg/linreg.h index 63f980b1..a7d408af 100644 --- a/src/math/linreg/linreg.h +++ b/src/math/linreg/linreg.h @@ -1,39 +1,35 @@ -/* lib/linreg/linreg.h +/* PSPP - a program for statistical analysis. + Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover. - Copyright (C) 2005 Free Software Foundation, Inc. - Written by Jason H. Stover. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or (at - your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02111-1307, USA. -*/ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #ifndef LINREG_H #define LINREG_H - - +#include +#include #include #include -struct variable ; -struct pspp_linreg_coeff; - +struct variable; +struct pspp_coeff; +union value; enum { + PSPP_LINREG_CONDITIONAL_INVERSE, + PSPP_LINREG_QR, PSPP_LINREG_SWEEP, - PSPP_LINREG_SVD }; @@ -43,23 +39,27 @@ enum */ struct pspp_linreg_opts_struct { - int resid; /* Should the residuals be returned? */ - int get_depvar_mean_std; - int *get_indep_mean_std; /* Array of booleans dictating which - independent variables need their means - and standard deviations computed within - pspp_linreg. This array MUST be of - length n_indeps. If element i is 1, - pspp_linreg will compute the mean and - variance of indpendent variable i. If - element i is 0, it will not compute the - mean and standard deviation, and assume - the values are stored. - cache->indep_mean[i] is the mean and - cache->indep_std[i] is the sample - standard deviation. - */ + int *get_indep_mean_std; /* Array of booleans + dictating which + independent variables need + their means and standard + deviations computed within + pspp_linreg. This array + MUST be of length + n_indeps. If element i is + 1, pspp_linreg will + compute the mean and + variance of indpendent + variable i. If element i + is 0, it will not compute + the mean and standard + deviation, and assume the + values are stored. + cache->indep_mean[i] is + the mean and + cache->indep_std[i] is the + sample standard deviation. */ }; typedef struct pspp_linreg_opts_struct pspp_linreg_opts; @@ -69,7 +69,7 @@ typedef struct pspp_linreg_opts_struct pspp_linreg_opts; Y = Xb + Z - where Y is an n-by-1 column vector, X is an n-by-p matrix of + where Y is an n-by-1 column vector, X is an n-by-p matrix of independent variables, b is a p-by-1 vector of regression coefficients, and Z is an n-by-1 normally-distributed random vector with independent identically distributed components with mean 0. @@ -95,17 +95,18 @@ struct pspp_linreg_cache_struct { int n_obs; /* Number of observations. */ int n_indeps; /* Number of independent variables. */ - int n_coeffs; + int n_coeffs; /* The intercept is not considered a + coefficient here. */ - /* - The variable struct is ignored during estimation. - It is here so the calling procedure can - find the variable used in the model. - */ + /* + The variable struct is ignored during estimation. It is here so + the calling procedure can find the variable used in the model. + */ const struct variable *depvar; gsl_vector *residuals; - struct pspp_linreg_coeff *coeff; + struct pspp_coeff **coeff; + double intercept; int method; /* Method to use to estimate parameters. */ /* Means and standard deviations of the variables. @@ -125,19 +126,19 @@ struct pspp_linreg_cache_struct Sums of squares. */ double ssm; /* Sums of squares for the overall model. */ - gsl_vector *ss_indeps; /* Sums of squares from each - independent variable. - */ + gsl_vector *ss_indeps; /* Sums of squares from each + independent variable. */ double sst; /* Sum of squares total. */ double sse; /* Sum of squares error. */ - double mse; /* Mean squared error. This is just sse / dfe, but - since it is the best unbiased estimate of the population - variance, it has its own entry here. + double mse; /* Mean squared error. This is just sse / + dfe, but since it is the best unbiased + estimate of the population variance, it + has its own entry here. */ + gsl_vector *ssx; /* Centered sums of squares for independent + variables, i.e. \sum (x[i] - mean(x))^2. */ + double ssy; /* Centered sums of squares for dependent + variable. */ - gsl_vector *ssx; /* Centered sums of squares for independent variables, - i.e. \sum (x[i] - mean(x))^2. - */ - double ssy; /* Centered sums of squares for dependent variable. */ /* Covariance matrix of the parameter estimates. */ @@ -154,6 +155,19 @@ struct pspp_linreg_cache_struct design matrix. */ gsl_matrix *hat; + + double (*predict) (const struct variable **, const union value **, + const void *, int); + double (*residual) (const struct variable **, + const union value **, + const union value *, const void *, int); + /* + Returns pointers to the variables used in the model. + */ + int (*get_vars) (const void *, const struct variable **); + struct variable *resid; + struct variable *pred; + }; typedef struct pspp_linreg_cache_struct pspp_linreg_cache; @@ -162,21 +176,30 @@ typedef struct pspp_linreg_cache_struct pspp_linreg_cache; /* Allocate a pspp_linreg_cache and return a pointer - to it. n is the number of cases, p is the number of + to it. n is the number of cases, p is the number of independent variables. */ -pspp_linreg_cache * pspp_linreg_cache_alloc (size_t n, size_t p); +pspp_linreg_cache *pspp_linreg_cache_alloc (size_t n, size_t p); -void pspp_linreg_cache_free (pspp_linreg_cache * c); +bool pspp_linreg_cache_free (void *); /* Fit the linear model via least squares. All pointers passed to pspp_linreg are assumed to be allocated to the correct size and initialized to the - values as indicated by opts. + values as indicated by opts. */ -int pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, - const pspp_linreg_opts * opts, - pspp_linreg_cache * cache); - - +int +pspp_linreg (const gsl_vector * Y, const gsl_matrix * X, + const pspp_linreg_opts * opts, pspp_linreg_cache * cache); + +double +pspp_linreg_predict (const struct variable **, const union value **, + const void *, int); +double +pspp_linreg_residual (const struct variable **, const union value **, + const union value *, const void *, int); +/* + All variables used in the model. + */ +int pspp_linreg_get_vars (const void *, const struct variable **); #endif