--- /dev/null
+/* lib/linreg/coefficient.c
+
+ Copyright (C) 2005 Free Software Foundation, Inc.
+ Written by Jason H Stover.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or (at
+ your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02111-1307, USA.
+ */
+
+/*
+ Accessor functions for matching coefficients and variables.
+ */
+#include <assert.h>
+#include "pspp_linreg.h"
+#include <gl/xalloc.h>
+
+
+struct varinfo
+{
+ const struct variable *v; /* Variable associated with this
+ coefficient. Note this variable may not
+ be unique. In other words, a
+ coefficient structure may have other
+ v_info's, each with its own variable.
+ */
+ const union value *val; /* Value of the variable v which this
+ varinfo refers to. This member is relevant
+ only to categorical variables.
+ */
+};
+
+void pspp_linreg_coeff_free (struct pspp_linreg_coeff *c)
+{
+ free (c->v_info);
+ free (c);
+}
+
+/*
+ Initialize the variable and value pointers inside the
+ coefficient structures for the linear model.
+ */
+void
+pspp_linreg_coeff_init (pspp_linreg_cache *c, struct design_matrix *X)
+{
+ size_t i;
+ size_t j;
+ int n_vals = 1;
+ struct pspp_linreg_coeff *coeff;
+
+ c->coeff = xnmalloc (X->m->size2 + 1, sizeof (*c->coeff));
+ for (i = 0; i < X->m->size2; i++)
+ {
+ j = i + 1; /* The first coefficient is the intercept. */
+ coeff = c->coeff + j;
+ coeff->n_vars = n_vals; /* Currently, no procedures allow interactions.
+ This will have to change when procedures that
+ allow interaction terms are written.
+ */
+ coeff->v_info = xnmalloc (coeff->n_vars, sizeof (*coeff->v_info));
+ assert (coeff->v_info != NULL);
+ coeff->v_info->v = (const struct variable *) design_matrix_col_to_var (X, i);
+
+ if (coeff->v_info->v->type == ALPHA)
+ {
+ size_t k;
+ k = design_matrix_var_to_column (X, coeff->v_info->v);
+ assert (k <= i);
+ k = i - k;
+ coeff->v_info->val = cat_subscript_to_value (k, (struct variable *) coeff->v_info->v);
+ }
+ }
+}
+void
+pspp_linreg_coeff_set_estimate (struct pspp_linreg_coeff *c,
+ double estimate)
+{
+ c->estimate = estimate;
+}
+void
+pspp_linreg_coeff_set_std_err (struct pspp_linreg_coeff *c,
+ double std_err)
+{
+ c->std_err = std_err;
+}
+/*
+ How many variables are associated with this coefficient?
+ */
+int
+pspp_linreg_coeff_get_n_vars (struct pspp_linreg_coeff *c)
+{
+ return c->n_vars;
+}
+/*
+ Which variable does this coefficient match?
+ */
+const struct variable *
+pspp_linreg_coeff_get_var (struct pspp_linreg_coeff *c, int i)
+{
+ assert (i < c->n_vars);
+ return (c->v_info + i)->v;
+}
+/*
+ Which value is associated with this coefficient/variable comination?
+*/
+const union value *
+pspp_linreg_coeff_get_value (struct pspp_linreg_coeff *c,
+ const struct variable *v)
+{
+ int i = 0;
+ const struct variable *candidate;
+
+ while (i < c->n_vars)
+ {
+ candidate = pspp_linreg_coeff_get_var (c, i);
+ if (v->index == candidate->index)
+ {
+ return (c->v_info + i)->val;
+ }
+ i++;
+ }
+ return NULL;
+}
#include <gsl/gsl_multifit.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_cblas.h>
+#include <src/design-matrix.h>
#include <src/var.h>
+#define PSPP_LINREG_VAL_NOT_FOUND -1
enum
{
PSPP_LINREG_SWEEP,
struct pspp_linreg_coeff
{
double estimate; /* Estimated coefficient. */
- const struct variable *v; /* The variable associated with this coefficient.
- The calling function should supply the variable
- when it creates the design matrix. The estimation
- procedure ignores the struct variable *. It is here so
- the caller can match parameters with relevant
- variables.
- */
+ double std_err; /* Standard error of the estimate. */
+ struct varinfo *v_info; /* Information pertaining to the
+ variable(s) associated with this
+ coefficient. The calling function
+ should initialize this value with the
+ functions in coefficient.c. The
+ estimation procedure ignores this
+ member. It is here so the caller can
+ match parameters with relevant variables
+ and values. If the coefficient is
+ associated with an interaction, then
+ v_info contains information for multiple
+ variables.
+ */
+ int n_vars; /* Number of variables associated with this coefficient.
+ Coefficients corresponding to interaction terms will
+ have more than one variable.
+ */
};
struct pspp_linreg_cache_struct
{
int pspp_linreg (const gsl_vector * Y, const gsl_matrix * X,
const pspp_linreg_opts * opts, pspp_linreg_cache * cache);
+void pspp_linreg_coeff_init (pspp_linreg_cache *, struct design_matrix *);
+
+void pspp_linreg_coeff_free (struct pspp_linreg_coeff *);
+
+void pspp_linreg_coeff_set_estimate (struct pspp_linreg_coeff *, double);
+
+void pspp_linreg_coeff_set_std_err (struct pspp_linreg_coeff *, double);
+
+int pspp_linreg_coeff_get_n_vars (struct pspp_linreg_coeff *);
+
+const struct variable *pspp_linreg_coeff_get_var (struct pspp_linreg_coeff *, int);
+
+const union value *pspp_linreg_coeff_get_value (struct pspp_linreg_coeff *, const struct variable *);
#endif
int pspp_reg_rc = CMD_SUCCESS;
static void run_regression (const struct casefile *, void *);
-static union value *pspp_reg_coefficient_to_value (const pspp_linreg_cache *,
- const struct
- pspp_linreg_coeff *,
- size_t);
-
-/*
- Which value of a categorical variable corresponds to the
- coefficient? This routine provides the answer IF the
- categorical variable is encoded in the usual way: The
- first value maps to the zero vector, the second value to
- the vector (0, 1, 0, ...), the third value (0, 0, 1, 0, ...),
- etc.
- */
-static union value *
-pspp_reg_coefficient_to_value (const pspp_linreg_cache * cache,
- const struct pspp_linreg_coeff *coeff,
- size_t j)
-{
- size_t which = 0;
- int k = 1; /* First coefficient is the intercept. */
- int found = 0;
- const union value *result;
- struct pspp_linreg_coeff c;
-
- assert (cache != NULL);
- assert (coeff != NULL);
- assert (coeff->v->type == ALPHA);
- while (!found && k < cache->n_coeffs)
- {
- c = cache->coeff[k];
- /*
- compare_var_names returns 0 if the variable
- names match.
- */
- if (!compare_var_names (coeff->v, c.v, NULL))
- {
- if (j == k)
- {
- found = 1;
- }
- else
- {
- which++;
- }
- }
- k++;
- }
- result = cat_subscript_to_value ((const size_t) which, coeff->v);
- return result;
-}
/*
STATISTICS subcommand output functions.
double beta;
const char *label;
char *tmp;
+ const struct variable *v;
const union value *val;
const char *val_s;
struct tab_table *t;
for (j = 1; j <= c->n_indeps; j++)
{
i = indep_vars[j];
- label = var_to_string (c->coeff[j].v);
+ v = pspp_linreg_coeff_get_var (c->coeff + j, 0);
+ label = var_to_string (v);
/* Do not overwrite the variable's name. */
strncpy (tmp, label, MAX_STRING);
- if (c->coeff[j].v->type == ALPHA)
+ if (v->type == ALPHA)
{
/*
Append the value associated with this coefficient.
This makes sense only if we us the usual binary encoding
for that value.
*/
- val = pspp_reg_coefficient_to_value (c, &(c->coeff[j]), j);
- val_s = value_to_string (val, c->coeff[j].v);
+
+ val = pspp_linreg_coeff_get_value (c->coeff + j, v);
+ val_s = value_to_string (val, v);
strncat (tmp, val_s, MAX_STRING);
}
statistics_keyword_output (reg_stats_selection, keywords[selection], c);
}
static int
-reg_inserted (struct variable *v, struct variable **varlist, int n_vars)
+reg_inserted (const struct variable *v, struct variable **varlist, int n_vars)
{
int i;
size_t j;
int n_vars = 0;
struct variable **varlist;
- struct pspp_linreg_coeff coeff;
+ struct pspp_linreg_coeff *coeff;
+ const struct variable *v;
union value *val;
fprintf (fp, "%s", reg_export_categorical_encode_1);
varlist = xnmalloc (c->n_indeps, sizeof (*varlist));
for (i = 1; i < c->n_indeps; i++) /* c->coeff[0] is the intercept. */
{
- coeff = c->coeff[i];
- if (coeff.v->type == ALPHA)
+ coeff = c->coeff + i;
+ v = pspp_linreg_coeff_get_var (coeff, 0);
+ if (v->type == ALPHA)
{
- if (!reg_inserted (coeff.v, varlist, n_vars))
+ if (!reg_inserted (v, varlist, n_vars))
{
fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t",
- coeff.v->name);
- varlist[n_vars] = coeff.v;
+ v->name);
+ varlist[n_vars] = (struct variable *) v;
n_vars++;
}
}
for (i = 0; i < n_vars; i++)
{
- coeff = c->coeff[i];
+ coeff = c->coeff + i;
fprintf (fp, "%s.name = \"%s\";\n\t", varlist[i]->name,
varlist[i]->name);
fprintf (fp, "%s.n_vals = %d;\n\t", varlist[i]->name,
reg_print_depvars (FILE * fp, pspp_linreg_cache * c)
{
int i;
- struct pspp_linreg_coeff coeff;
+ struct pspp_linreg_coeff *coeff;
+ const struct variable *v;
fprintf (fp, "char *model_depvars[%d] = {", c->n_indeps);
for (i = 1; i < c->n_indeps; i++)
{
- coeff = c->coeff[i];
- fprintf (fp, "\"%s\",\n\t\t", coeff.v->name);
+ coeff = c->coeff + i;
+ v = pspp_linreg_coeff_get_var (coeff, 0);
+ fprintf (fp, "\"%s\",\n\t\t", v->name);
}
- coeff = c->coeff[i];
- fprintf (fp, "\"%s\"};\n\t", coeff.v->name);
+ coeff = c->coeff + i;
+ v = pspp_linreg_coeff_get_var (coeff, 0);
+ fprintf (fp, "\"%s\"};\n\t", v->name);
}
static void
reg_print_getvar (FILE * fp, pspp_linreg_cache * c)
static void
subcommand_export (int export, pspp_linreg_cache * c)
{
- FILE *fp;
size_t i;
size_t j;
int n_quantiles = 100;
if (export)
{
+ FILE *fp;
assert (c != NULL);
assert (model_file != NULL);
assert (fp != NULL);
*/
static size_t
mark_missing_cases (const struct casefile *cf, struct variable *v,
- double *is_missing_case, double n_data)
+ int *is_missing_case, double n_data)
{
struct casereader *r;
struct ccase c;
size_t row;
- union value *val;
+ const union value *val;
for (r = casefile_get_reader (cf);
casereader_read (r, &c); case_destroy (&c))
return n_data;
}
+
static void
run_regression (const struct casefile *cf, void *cmd_ UNUSED)
{
and store pointers to the variables that correspond to the
coefficients.
*/
- lcache->coeff = xnmalloc (X->m->size2 + 1, sizeof (*lcache->coeff));
- for (i = 0; i < X->m->size2; i++)
- {
- j = i + 1; /* The first coeff is the intercept. */
- lcache->coeff[j].v =
- (const struct variable *) design_matrix_col_to_var (X, i);
- assert (lcache->coeff[j].v != NULL);
- }
+ pspp_linreg_coeff_init (lcache, X);
/*
Find the least-squares estimates and other statistics.