/* PSPP - a program for statistical analysis.
- Copyright (C) 2005 Free Software Foundation, Inc. Written by Jason H. Stover.
+ Copyright (C) 2005, 2011 Free Software Foundation, Inc. Written by Jason H. Stover.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#ifndef LINREG_H
#define LINREG_H
-#include <stdbool.h>
-#include <gsl/gsl_math.h>
-#include <gsl/gsl_vector.h>
-#include <gsl/gsl_matrix.h>
-#include <src/math/coefficient.h>
-
-enum
-{
- PSPP_LINREG_CONDITIONAL_INVERSE,
- PSPP_LINREG_QR,
- PSPP_LINREG_SWEEP,
-};
-
-
-
-/*
- Options describing what special values should be computed.
- */
-struct pspp_linreg_opts_struct
-{
- int get_depvar_mean_std;
- int *get_indep_mean_std; /* Array of booleans
- dictating which
- independent variables need
- their means and standard
- deviations computed within
- pspp_linreg. This array
- MUST be of length
- n_indeps. If element i is
- 1, pspp_linreg will
- compute the mean and
- variance of indpendent
- variable i. If element i
- is 0, it will not compute
- the mean and standard
- deviation, and assume the
- values are stored.
- cache->indep_mean[i] is
- the mean and
- cache->indep_std[i] is the
- sample standard deviation. */
-};
-typedef struct pspp_linreg_opts_struct pspp_linreg_opts;
+#include <gsl/gsl_matrix.h>
+#include <stdbool.h>
/*
Find the least-squares estimate of b for the linear model:
Springer. 1998. ISBN 0-387-98542-5.
*/
+struct variable;
-struct pspp_linreg_cache_struct
-{
- int n_obs; /* Number of observations. */
- int n_indeps; /* Number of independent variables. */
- int n_coeffs; /* The intercept is not considered a
- coefficient here. */
-
- /*
- Pointer to the dependent variable.
- */
- const struct variable *depvar;
-
- gsl_vector *residuals;
- struct pspp_coeff **coeff;
- double intercept;
- int method; /* Method to use to estimate parameters. */
- /*
- Means and standard deviations of the variables.
- If these pointers are null when pspp_linreg() is
- called, pspp_linreg() will compute their values.
-
- Entry i of indep_means is the mean of independent
- variable i, whose observations are stored in the ith
- column of the design matrix.
- */
- double depvar_mean;
- double depvar_std;
- gsl_vector *indep_means;
- gsl_vector *indep_std;
-
- /*
- Sums of squares.
- */
- double ssm; /* Sums of squares for the overall model. */
- gsl_vector *ss_indeps; /* Sums of squares from each
- independent variable. */
- double sst; /* Sum of squares total. */
- double sse; /* Sum of squares error. */
- double mse; /* Mean squared error. This is just sse /
- dfe, but since it is the best unbiased
- estimate of the population variance, it
- has its own entry here. */
- gsl_vector *ssx; /* Centered sums of squares for independent
- variables, i.e. \sum (x[i] - mean(x))^2. */
- double ssy; /* Centered sums of squares for dependent
- variable.
- */
- /*
- Covariance matrix of the parameter estimates.
- */
- gsl_matrix *cov;
- /*
- Degrees of freedom.
- */
- double dft;
- double dfe;
- double dfm;
-
- /*
- 'Hat' or Hessian matrix, i.e. (X'X)^{-1}, where X is our
- design matrix.
- */
- gsl_matrix *hat;
-
- double (*predict) (const struct variable **, const union value **,
- const void *, int);
- double (*residual) (const struct variable **,
- const union value **,
- const union value *, const void *, int);
- /*
- Returns pointers to the variables used in the model.
- */
- int (*get_vars) (const void *, const struct variable **);
- struct variable *resid;
- struct variable *pred;
-
-};
-
-typedef struct pspp_linreg_cache_struct pspp_linreg_cache;
+struct linreg *linreg_alloc (const struct variable *, const struct variable **,
+ double, size_t, bool);
+void linreg_unref (struct linreg *);
+void linreg_ref (struct linreg *);
+int linreg_n_indeps (const struct linreg *c);
/*
- Allocate a pspp_linreg_cache and return a pointer
- to it. n is the number of cases, p is the number of
- independent variables.
- */
-pspp_linreg_cache *pspp_linreg_cache_alloc (size_t n, size_t p);
-
-bool pspp_linreg_cache_free (void *);
+ Fit the linear model via least squares.
+*/
+void linreg_fit (const gsl_matrix *, struct linreg *);
-/*
- Fit the linear model via least squares. All pointers passed to pspp_linreg
- are assumed to be allocated to the correct size and initialized to the
- values as indicated by opts.
- */
-int
-pspp_linreg (const gsl_vector *, const struct design_matrix *,
- const pspp_linreg_opts *, pspp_linreg_cache *);
-
-double
-pspp_linreg_predict (const struct variable **, const union value **,
- const void *, int);
-double
-pspp_linreg_residual (const struct variable **, const union value **,
- const union value *, const void *, int);
-/*
- All variables used in the model.
- */
-int pspp_linreg_get_vars (const void *, const struct variable **);
+double linreg_predict (const struct linreg *, const double *, size_t);
+double linreg_residual (const struct linreg *, double, const double *, size_t);
+const struct variable ** linreg_get_vars (const struct linreg *);
-struct pspp_coeff *pspp_linreg_get_coeff (const pspp_linreg_cache
- *,
- const struct variable
- *,
- const union value *);
-/*
- Return or set the standard deviation of the independent variable.
- */
-double pspp_linreg_get_indep_variable_sd (pspp_linreg_cache *, const struct variable *);
-void pspp_linreg_set_indep_variable_sd (pspp_linreg_cache *, const struct variable *, double);
/*
Mean of the independent variable.
*/
-double pspp_linreg_get_indep_variable_mean (pspp_linreg_cache *, const struct variable *);
-void pspp_linreg_set_indep_variable_mean (pspp_linreg_cache *, const struct variable *, double);
+double linreg_get_indep_variable_mean (const struct linreg *, size_t);
+void linreg_set_indep_variable_mean (struct linreg *, size_t, double);
+
+double linreg_mse (const struct linreg *);
+
+double linreg_intercept (const struct linreg *);
+
+const gsl_matrix * linreg_cov (const struct linreg *);
+double linreg_coeff (const struct linreg *, size_t);
+const struct variable * linreg_indep_var (const struct linreg *, size_t);
+const struct variable * linreg_dep_var (const struct linreg *);
+size_t linreg_n_coeffs (const struct linreg *);
+double linreg_n_obs (const struct linreg *);
+double linreg_sse (const struct linreg *);
+double linreg_ssreg (const struct linreg *);
+double linreg_dfmodel (const struct linreg *);
+double linreg_dferror (const struct linreg *);
+double linreg_dftotal (const struct linreg *);
+double linreg_sst (const struct linreg *);
+void linreg_set_depvar_mean (struct linreg *, double);
+double linreg_get_depvar_mean (const struct linreg *);
+
#endif