/* PSPP - a program for statistical analysis.
- Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2009, 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <data/case.h>
#include <data/casegrouper.h>
#include <data/casereader.h>
-#include <data/category.h>
#include <data/dictionary.h>
#include <data/missing-values.h>
#include <data/procedure.h>
#include <math/covariance.h>
#include <math/linreg.h>
#include <math/moments.h>
-#include <output/table.h>
+#include <output/tab.h>
#include "xalloc.h"
/*
STATISTICS subcommand output functions.
*/
-static void reg_stats_r (linreg *);
-static void reg_stats_coeff (linreg *);
-static void reg_stats_anova (linreg *);
-static void reg_stats_outs (linreg *);
-static void reg_stats_zpp (linreg *);
-static void reg_stats_label (linreg *);
-static void reg_stats_sha (linreg *);
-static void reg_stats_ci (linreg *);
-static void reg_stats_f (linreg *);
-static void reg_stats_bcov (linreg *);
-static void reg_stats_ses (linreg *);
-static void reg_stats_xtx (linreg *);
-static void reg_stats_collin (linreg *);
-static void reg_stats_tol (linreg *);
-static void reg_stats_selection (linreg *);
-static void statistics_keyword_output (void (*)(linreg *),
- int, linreg *);
+static void reg_stats_r (linreg *, void *);
+static void reg_stats_coeff (linreg *, void *);
+static void reg_stats_anova (linreg *, void *);
+static void reg_stats_outs (linreg *, void *);
+static void reg_stats_zpp (linreg *, void *);
+static void reg_stats_label (linreg *, void *);
+static void reg_stats_sha (linreg *, void *);
+static void reg_stats_ci (linreg *, void *);
+static void reg_stats_f (linreg *, void *);
+static void reg_stats_bcov (linreg *, void *);
+static void reg_stats_ses (linreg *, void *);
+static void reg_stats_xtx (linreg *, void *);
+static void reg_stats_collin (linreg *, void *);
+static void reg_stats_tol (linreg *, void *);
+static void reg_stats_selection (linreg *, void *);
+static void statistics_keyword_output (void (*)(linreg *, void *),
+ int, linreg *, void *);
static void
-reg_stats_r (linreg * c)
+reg_stats_r (linreg *c, void *aux UNUSED)
{
struct tab_table *t;
int n_rows = 2;
rsq = linreg_ssreg (c) / linreg_sst (c);
adjrsq = 1.0 - (1.0 - rsq) * (linreg_n_obs (c) - 1.0) / (linreg_n_obs (c) - linreg_n_coeffs (c));
std_error = sqrt (linreg_mse (c));
- t = tab_create (n_cols, n_rows, 0);
- tab_dim (t, tab_natural_dimensions, NULL);
+ t = tab_create (n_cols, n_rows);
tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
tab_hline (t, TAL_2, 0, n_cols - 1, 1);
tab_vline (t, TAL_2, 2, 0, n_rows - 1);
Table showing estimated regression coefficients.
*/
static void
-reg_stats_coeff (linreg * c)
+reg_stats_coeff (linreg * c, void *aux_)
{
size_t j;
int n_cols = 7;
const struct variable *v;
struct tab_table *t;
+ gsl_matrix *cov = aux_;
assert (c != NULL);
n_rows = linreg_n_coeffs (c) + 3;
- t = tab_create (n_cols, n_rows, 0);
+ t = tab_create (n_cols, n_rows);
tab_headers (t, 2, 0, 1, 0);
- tab_dim (t, tab_natural_dimensions, NULL);
tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
tab_hline (t, TAL_2, 0, n_cols - 1, 1);
tab_vline (t, TAL_2, 2, 0, n_rows - 1);
tab_double (t, 4, 1, 0, 0.0, NULL);
t_stat = linreg_intercept (c) / std_err;
tab_double (t, 5, 1, 0, t_stat, NULL);
- pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0);
+ pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
tab_double (t, 6, 1, 0, pval, NULL);
for (j = 0; j < linreg_n_coeffs (c); j++)
{
Standardized coefficient, i.e., regression coefficient
if all variables had unit variance.
*/
- beta = sqrt (gsl_matrix_get (linreg_cov (c), j, j));
- beta *= linreg_coeff (c, j) / c->depvar_std;
+ beta = sqrt (gsl_matrix_get (cov, j, j));
+ beta *= linreg_coeff (c, j) /
+ sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1));
tab_double (t, 4, this_row, 0, beta, NULL);
/*
Display the ANOVA table.
*/
static void
-reg_stats_anova (linreg * c)
+reg_stats_anova (linreg * c, void *aux UNUSED)
{
int n_cols = 7;
int n_rows = 4;
struct tab_table *t;
assert (c != NULL);
- t = tab_create (n_cols, n_rows, 0);
+ t = tab_create (n_cols, n_rows);
tab_headers (t, 2, 0, 1, 0);
- tab_dim (t, tab_natural_dimensions, NULL);
tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
}
static void
-reg_stats_outs (linreg * c)
+reg_stats_outs (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_zpp (linreg * c)
+reg_stats_zpp (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_label (linreg * c)
+reg_stats_label (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_sha (linreg * c)
+reg_stats_sha (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_ci (linreg * c)
+reg_stats_ci (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_f (linreg * c)
+reg_stats_f (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_bcov (linreg * c)
+reg_stats_bcov (linreg * c, void *aux UNUSED)
{
int n_cols;
int n_rows;
assert (c != NULL);
n_cols = c->n_indeps + 1 + 2;
n_rows = 2 * (c->n_indeps + 1);
- t = tab_create (n_cols, n_rows, 0);
+ t = tab_create (n_cols, n_rows);
tab_headers (t, 2, 0, 1, 0);
- tab_dim (t, tab_natural_dimensions, NULL);
tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
tab_hline (t, TAL_2, 0, n_cols - 1, 1);
tab_vline (t, TAL_2, 2, 0, n_rows - 1);
tab_submit (t);
}
static void
-reg_stats_ses (linreg * c)
+reg_stats_ses (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_xtx (linreg * c)
+reg_stats_xtx (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_collin (linreg * c)
+reg_stats_collin (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_tol (linreg * c)
+reg_stats_tol (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-reg_stats_selection (linreg * c)
+reg_stats_selection (linreg * c, void *aux UNUSED)
{
assert (c != NULL);
}
static void
-statistics_keyword_output (void (*function) (linreg *),
- int keyword, linreg * c)
+statistics_keyword_output (void (*function) (linreg *, void *),
+ int keyword, linreg * c, void *aux)
{
if (keyword)
{
- (*function) (c);
+ (*function) (c, aux);
}
}
static void
-subcommand_statistics (int *keywords, linreg * c)
+subcommand_statistics (int *keywords, linreg * c, void *aux)
{
/*
The order here must match the order in which the STATISTICS
keywords[r] = 1;
}
}
- statistics_keyword_output (reg_stats_r, keywords[r], c);
- statistics_keyword_output (reg_stats_anova, keywords[anova], c);
- statistics_keyword_output (reg_stats_coeff, keywords[coeff], c);
- statistics_keyword_output (reg_stats_outs, keywords[outs], c);
- statistics_keyword_output (reg_stats_zpp, keywords[zpp], c);
- statistics_keyword_output (reg_stats_label, keywords[label], c);
- statistics_keyword_output (reg_stats_sha, keywords[sha], c);
- statistics_keyword_output (reg_stats_ci, keywords[ci], c);
- statistics_keyword_output (reg_stats_f, keywords[f], c);
- statistics_keyword_output (reg_stats_bcov, keywords[bcov], c);
- statistics_keyword_output (reg_stats_ses, keywords[ses], c);
- statistics_keyword_output (reg_stats_xtx, keywords[xtx], c);
- statistics_keyword_output (reg_stats_collin, keywords[collin], c);
- statistics_keyword_output (reg_stats_tol, keywords[tol], c);
- statistics_keyword_output (reg_stats_selection, keywords[selection], c);
+ statistics_keyword_output (reg_stats_r, keywords[r], c, aux);
+ statistics_keyword_output (reg_stats_anova, keywords[anova], c, aux);
+ statistics_keyword_output (reg_stats_coeff, keywords[coeff], c, aux);
+ statistics_keyword_output (reg_stats_outs, keywords[outs], c, aux);
+ statistics_keyword_output (reg_stats_zpp, keywords[zpp], c, aux);
+ statistics_keyword_output (reg_stats_label, keywords[label], c, aux);
+ statistics_keyword_output (reg_stats_sha, keywords[sha], c, aux);
+ statistics_keyword_output (reg_stats_ci, keywords[ci], c, aux);
+ statistics_keyword_output (reg_stats_f, keywords[f], c, aux);
+ statistics_keyword_output (reg_stats_bcov, keywords[bcov], c, aux);
+ statistics_keyword_output (reg_stats_ses, keywords[ses], c, aux);
+ statistics_keyword_output (reg_stats_xtx, keywords[xtx], c, aux);
+ statistics_keyword_output (reg_stats_collin, keywords[collin], c, aux);
+ statistics_keyword_output (reg_stats_tol, keywords[tol], c, aux);
+ statistics_keyword_output (reg_stats_selection, keywords[selection], c, aux);
}
/*
{
const struct dictionary *dict = dataset_dict (ds);
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if ((lex_token (lexer) != T_ID
- || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
+ || dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL)
&& lex_token (lexer) != T_ALL)
return 2;
const gsl_matrix *ssizes;
const gsl_matrix *cm;
const gsl_matrix *mean_matrix;
+ const gsl_matrix *ssize_matrix;
double result = 0.0;
- cm = covariance_calculate (all_cov);
+ cm = covariance_calculate_unnormalized (all_cov);
rows = xnmalloc (cov->size1 - 1, sizeof (*rows));
for (i = 0; i < n_all_vars; i++)
}
}
mean_matrix = covariance_moments (all_cov, MOMENT_MEAN);
+ ssize_matrix = covariance_moments (all_cov, MOMENT_NONE);
for (i = 0; i < cov->size1 - 1; i++)
{
- means[i] = gsl_matrix_get (mean_matrix, rows[i], 0);
+ means[i] = gsl_matrix_get (mean_matrix, rows[i], 0)
+ / gsl_matrix_get (ssize_matrix, rows[i], 0);
for (j = 0; j < cov->size2 - 1; j++)
{
gsl_matrix_set (cov, i, j, gsl_matrix_get (cm, rows[i], rows[j]));
gsl_matrix_set (cov, j, i, gsl_matrix_get (cm, rows[j], rows[i]));
}
}
- means[cov->size1 - 1] = gsl_matrix_get (mean_matrix, dep_subscript, 0);
+ means[cov->size1 - 1] = gsl_matrix_get (mean_matrix, dep_subscript, 0)
+ / gsl_matrix_get (ssize_matrix, dep_subscript, 0);
ssizes = covariance_moments (all_cov, MOMENT_NONE);
result = gsl_matrix_get (ssizes, dep_subscript, rows[0]);
for (i = 0; i < cov->size1 - 1; i++)
gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1,
gsl_matrix_get (cm, dep_subscript, dep_subscript));
free (rows);
+ gsl_matrix_free (cm);
return result;
}
+static size_t
+get_n_all_vars (struct cmd_regression *cmd)
+{
+ size_t result = n_variables;
+ size_t i;
+ size_t j;
+ result += cmd->n_dependent;
+ for (i = 0; i < cmd->n_dependent; i++)
+ {
+ for (j = 0; j < n_variables; j++)
+ {
+ if (v_variables[j] == cmd->v_dependent[i])
+ {
+ result--;
+ }
+ }
+ }
+ return result;
+}
+static void
+fill_all_vars (const struct variable **vars, struct cmd_regression *cmd)
+{
+ size_t i;
+ size_t j;
+ bool absent;
+
+ for (i = 0; i < n_variables; i++)
+ {
+ vars[i] = v_variables[i];
+ }
+ for (i = 0; i < cmd->n_dependent; i++)
+ {
+ absent = true;
+ for (j = 0; j < n_variables; j++)
+ {
+ if (cmd->v_dependent[i] == v_variables[j])
+ {
+ absent = false;
+ break;
+ }
+ }
+ if (absent)
+ {
+ vars[i + n_variables] = cmd->v_dependent[i];
+ }
+ }
+}
static bool
run_regression (struct casereader *input, struct cmd_regression *cmd,
struct dataset *ds, linreg **models)
struct ccase *c;
struct covariance *cov;
const struct variable **vars;
+ const struct variable **all_vars;
const struct variable *dep_var;
struct casereader *reader;
const struct dictionary *dict;
- gsl_matrix *this_cm;
+ size_t n_all_vars;
assert (models != NULL);
{
dict_get_vars (dict, &v_variables, &n_variables, 0);
}
+ n_all_vars = get_n_all_vars (cmd);
+ all_vars = xnmalloc (n_all_vars, sizeof (*all_vars));
+ fill_all_vars (all_vars, cmd);
vars = xnmalloc (n_variables, sizeof (*vars));
- means = xnmalloc (n_variables, sizeof (*means));
- cov = covariance_1pass_create (n_variables, v_variables,
+ means = xnmalloc (n_all_vars, sizeof (*means));
+ cov = covariance_1pass_create (n_all_vars, all_vars,
dict_get_weight (dict), MV_ANY);
reader = casereader_clone (input);
for (k = 0; k < cmd->n_dependent; k++)
{
+ gsl_matrix *this_cm;
dep_var = cmd->v_dependent[k];
n_indep = identify_indep_vars (vars, dep_var);
this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1);
n_data = fill_covariance (this_cm, cov, vars, n_indep,
- dep_var, v_variables, n_variables, means);
+ dep_var, all_vars, n_all_vars, means);
models[k] = linreg_alloc (dep_var, (const struct variable **) vars,
n_data, n_indep);
models[k]->depvar = dep_var;
{
linreg_set_indep_variable_mean (models[k], i, means[i]);
}
+ linreg_set_depvar_mean (models[k], means[i]);
/*
For large data sets, use QR decomposition.
*/
if (!taint_has_tainted_successor (casereader_get_taint (input)))
{
- subcommand_statistics (cmd->a_statistics, models[k]);
+ subcommand_statistics (cmd->a_statistics, models[k], this_cm);
}
}
else
linreg_free (models[k]);
models[k] = NULL;
}
+ gsl_matrix_free (this_cm);
}
casereader_destroy (reader);
free (vars);
+ free (all_vars);
free (means);
casereader_destroy (input);
covariance_destroy (cov);