X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Flinreg.h;h=88da7f6227f2ede2b7080883e2d28e1127c2bbe2;hb=91dc3fe3d4ae4e0f9dee5adb11dd0d7c4b78c515;hp=f9d8c9b3f740abfc161ea7df2ecf20131985f700;hpb=3bbb4370239deb29ebbf813d258aef6249e2a431;p=pspp diff --git a/src/math/linreg.h b/src/math/linreg.h index f9d8c9b3f7..88da7f6227 100644 --- a/src/math/linreg.h +++ b/src/math/linreg.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover. + Copyright (C) 2005, 2011 Free Software Foundation, Inc. Written by Jason H. Stover. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,51 +16,9 @@ #ifndef LINREG_H #define LINREG_H -#include -#include -#include -#include -#include -#include - -enum -{ - PSPP_LINREG_CONDITIONAL_INVERSE, - PSPP_LINREG_QR, - PSPP_LINREG_SWEEP, -}; - - - -/* - Options describing what special values should be computed. - */ -struct pspp_linreg_opts_struct -{ - int get_depvar_mean_std; - int *get_indep_mean_std; /* Array of booleans - dictating which - independent variables need - their means and standard - deviations computed within - pspp_linreg. This array - MUST be of length - n_indeps. If element i is - 1, pspp_linreg will - compute the mean and - variance of indpendent - variable i. If element i - is 0, it will not compute - the mean and standard - deviation, and assume the - values are stored. - cache->indep_mean[i] is - the mean and - cache->indep_std[i] is the - sample standard deviation. */ -}; -typedef struct pspp_linreg_opts_struct pspp_linreg_opts; +#include +#include /* Find the least-squares estimate of b for the linear model: @@ -88,139 +46,48 @@ typedef struct pspp_linreg_opts_struct pspp_linreg_opts; Springer. 1998. ISBN 0-387-98542-5. */ +struct variable; -struct pspp_linreg_cache_struct -{ - int n_obs; /* Number of observations. */ - int n_indeps; /* Number of independent variables. */ - int n_coeffs; /* The intercept is not considered a - coefficient here. */ - - /* - Pointers to the variables. - */ - const struct variable *depvar; - const struct variable **indep_vars; - - gsl_vector *residuals; - struct pspp_coeff **coeff; - double intercept; - int method; /* Method to use to estimate parameters. */ - /* - Means and standard deviations of the variables. - If these pointers are null when pspp_linreg() is - called, pspp_linreg() will compute their values. - - Entry i of indep_means is the mean of independent - variable i, whose observations are stored in the ith - column of the design matrix. - */ - double depvar_mean; - double depvar_std; - gsl_vector *indep_means; - gsl_vector *indep_std; - - /* - Sums of squares. - */ - double ssm; /* Sums of squares for the overall model. */ - gsl_vector *ss_indeps; /* Sums of squares from each - independent variable. */ - double sst; /* Sum of squares total. */ - double sse; /* Sum of squares error. */ - double mse; /* Mean squared error. This is just sse / - dfe, but since it is the best unbiased - estimate of the population variance, it - has its own entry here. */ - gsl_vector *ssx; /* Centered sums of squares for independent - variables, i.e. \sum (x[i] - mean(x))^2. */ - double ssy; /* Centered sums of squares for dependent - variable. - */ - /* - Covariance matrix of the parameter estimates. - */ - gsl_matrix *cov; - /* - Degrees of freedom. - */ - double dft; - double dfe; - double dfm; - - /* - 'Hat' or Hessian matrix, i.e. (X'X)^{-1}, where X is our - design matrix. - */ - gsl_matrix *hat; - - double (*predict) (const struct variable **, const union value **, - const void *, int); - double (*residual) (const struct variable **, - const union value **, - const union value *, const void *, int); - /* - Returns pointers to the variables used in the model. - */ - int (*get_vars) (const void *, const struct variable **); - struct variable *resid; - struct variable *pred; - -}; - -typedef struct pspp_linreg_cache_struct pspp_linreg_cache; +struct linreg *linreg_alloc (const struct variable *, const struct variable **, + double, size_t, bool); +void linreg_unref (struct linreg *); +void linreg_ref (struct linreg *); +int linreg_n_indeps (const struct linreg *c); /* - Allocate a pspp_linreg_cache and return a pointer - to it. n is the number of cases, p is the number of - independent variables. - */ -pspp_linreg_cache *pspp_linreg_cache_alloc (const struct variable *, const struct variable **, - size_t, size_t); - -bool pspp_linreg_cache_free (void *); + Fit the linear model via least squares. +*/ +void linreg_fit (const gsl_matrix *, struct linreg *); -/* - Fit the linear model via least squares. All pointers passed to pspp_linreg - are assumed to be allocated to the correct size and initialized to the - values as indicated by opts. - */ -int -pspp_linreg (const gsl_vector *, const struct design_matrix *, - const pspp_linreg_opts *, pspp_linreg_cache *); - -double -pspp_linreg_predict (const struct variable **, const union value **, - const void *, int); -double -pspp_linreg_residual (const struct variable **, const union value **, - const union value *, const void *, int); -/* - All variables used in the model. - */ -int pspp_linreg_get_vars (const void *, const struct variable **); +double linreg_predict (const struct linreg *, const double *, size_t); +double linreg_residual (const struct linreg *, double, const double *, size_t); +const struct variable ** linreg_get_vars (const struct linreg *); -struct pspp_coeff *pspp_linreg_get_coeff (const pspp_linreg_cache - *, - const struct variable - *, - const union value *); -/* - Return or set the standard deviation of the independent variable. - */ -double pspp_linreg_get_indep_variable_sd (pspp_linreg_cache *, const struct variable *); -void pspp_linreg_set_indep_variable_sd (pspp_linreg_cache *, const struct variable *, double); /* Mean of the independent variable. */ -double pspp_linreg_get_indep_variable_mean (pspp_linreg_cache *, const struct variable *); -void pspp_linreg_set_indep_variable_mean (pspp_linreg_cache *, const struct variable *, double); +double linreg_get_indep_variable_mean (const struct linreg *, size_t); +void linreg_set_indep_variable_mean (struct linreg *, size_t, double); + +double linreg_mse (const struct linreg *); + +double linreg_intercept (const struct linreg *); + +const gsl_matrix * linreg_cov (const struct linreg *); +double linreg_coeff (const struct linreg *, size_t); +const struct variable * linreg_indep_var (const struct linreg *, size_t); +const struct variable * linreg_dep_var (const struct linreg *); +size_t linreg_n_coeffs (const struct linreg *); +double linreg_n_obs (const struct linreg *); +double linreg_sse (const struct linreg *); +double linreg_ssreg (const struct linreg *); +double linreg_dfmodel (const struct linreg *); +double linreg_dferror (const struct linreg *); +double linreg_dftotal (const struct linreg *); +double linreg_sst (const struct linreg *); +void linreg_set_depvar_mean (struct linreg *, double); +double linreg_get_depvar_mean (const struct linreg *); -/* - Regression using only the covariance matrix. - */ -void pspp_linreg_with_cov (const struct covariance_matrix *, pspp_linreg_cache *); -double pspp_linreg_mse (const pspp_linreg_cache *); #endif