X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.c;h=b4448dd9e58ead03cb61cf3d5e5b3c97201b1edf;hb=60c545e6e958d868db3399a8989d37d8f9e0c131;hp=f7ec679a1c5c4180aa244e5e0f2e8b344654e582;hpb=6e097c89af440da90b43ce90864394c4d0c843d5;p=pspp diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index f7ec679a1c..b4448dd9e5 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc. + Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014, + 2016, 2017, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,6 +20,7 @@ #include #include +#include #include #include @@ -42,23 +44,23 @@ #include "libpspp/message.h" #include "libpspp/taint.h" -#include "output/tab.h" +#include "output/pivot-table.h" + +#include "gl/intprops.h" +#include "gl/minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid -#include - -#define REG_LARGE_DATA 1000 - #define STATS_R 1 #define STATS_COEFF 2 #define STATS_ANOVA 4 #define STATS_OUTS 8 #define STATS_CI 16 #define STATS_BCOV 32 +#define STATS_TOL 64 #define STATS_DEFAULT (STATS_R | STATS_COEFF | STATS_ANOVA | STATS_OUTS) @@ -79,6 +81,8 @@ struct regression bool resid; bool pred; + + bool origin; }; struct regression_workspace @@ -135,7 +139,7 @@ create_aux_var (struct dataset *ds, const char *prefix) return var; } -/* Auxilliary data for transformation when /SAVE is entered */ +/* Auxiliary data for transformation when /SAVE is entered */ struct save_trans_data { int n_dep_vars; @@ -155,7 +159,7 @@ save_trans_free (void *aux) return true; } -static int +static enum trns_result save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) { struct save_trans_data *save_trans_data = aux; @@ -171,14 +175,14 @@ save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) { if (ws->pred_idx != -1) { - double pred = case_data_idx (in, ws->extras * k + ws->pred_idx)->f; - case_data_rw (*c, ws->predvars[k])->f = pred; + double pred = case_num_idx (in, ws->extras * k + ws->pred_idx); + *case_num_rw (*c, ws->predvars[k]) = pred; } if (ws->res_idx != -1) { - double resid = case_data_idx (in, ws->extras * k + ws->res_idx)->f; - case_data_rw (*c, ws->residvars[k])->f = resid; + double resid = case_num_idx (in, ws->extras * k + ws->res_idx); + *case_num_rw (*c, ws->residvars[k]) = resid; } } case_unref (in); @@ -187,7 +191,6 @@ save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) return TRNS_CONTINUE; } - int cmd_regression (struct lexer *lexer, struct dataset *ds) { @@ -204,27 +207,38 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) regression.resid = false; regression.ds = ds; + regression.origin = false; - /* Accept an optional, completely pointless "/VARIABLES=" */ - lex_match (lexer, T_SLASH); - if (lex_match_id (lexer, "VARIABLES")) - { - if (!lex_force_match (lexer, T_EQUALS)) - goto error; - } - - if (!parse_variables_const (lexer, dict, - ®ression.vars, ®ression.n_vars, - PV_NO_DUPLICATE | PV_NUMERIC)) - goto error; - - + bool variables_seen = false; + bool method_seen = false; + bool dependent_seen = false; while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); - if (lex_match_id (lexer, "DEPENDENT")) + if (lex_match_id (lexer, "VARIABLES")) + { + if (method_seen) + { + msg (SE, _("VARIABLES may not appear after %s"), "METHOD"); + goto error; + } + if (dependent_seen) + { + msg (SE, _("VARIABLES may not appear after %s"), "DEPENDENT"); + goto error; + } + variables_seen = true; + lex_match (lexer, T_EQUALS); + + if (!parse_variables_const (lexer, dict, + ®ression.vars, ®ression.n_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + } + else if (lex_match_id (lexer, "DEPENDENT")) { + dependent_seen = true; lex_match (lexer, T_EQUALS); free (regression.dep_vars); @@ -236,14 +250,31 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) PV_NO_DUPLICATE | PV_NUMERIC)) goto error; } + else if (lex_match_id (lexer, "ORIGIN")) + { + regression.origin = true; + } + else if (lex_match_id (lexer, "NOORIGIN")) + { + regression.origin = false; + } else if (lex_match_id (lexer, "METHOD")) { + method_seen = true; lex_match (lexer, T_EQUALS); if (!lex_force_match_id (lexer, "ENTER")) { goto error; } + + if (! variables_seen) + { + if (!parse_variables_const (lexer, dict, + ®ression.vars, ®ression.n_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + } } else if (lex_match_id (lexer, "STATISTICS")) { @@ -277,6 +308,10 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) { statistics |= STATS_BCOV; } + else if (lex_match_id (lexer, "TOL")) + { + statistics |= STATS_TOL; + } else if (lex_match_id (lexer, "CI")) { statistics |= STATS_CI; @@ -414,7 +449,12 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) memcpy (save_trans_data->ws, &workspace, sizeof (workspace)); save_trans_data->n_dep_vars = regression.n_dep_vars; - add_transformation (ds, save_trans_func, save_trans_free, save_trans_data); + static const struct trns_class trns_class = { + .name = "REGRESSION", + .execute = save_trans_func, + .destroy = save_trans_free, + }; + add_transformation (ds, &trns_class, save_trans_data); } @@ -481,6 +521,24 @@ fill_all_vars (const struct variable **vars, const struct regression *cmd) } } + +/* Fill the array VARS, with all the predictor variables from CMD, except + variable X */ +static void +fill_predictor_x (const struct variable **vars, const struct variable *x, const struct regression *cmd) +{ + size_t i; + size_t n = 0; + + for (i = 0; i < cmd->n_vars; i++) + { + if (cmd->vars[i] == x) + continue; + + vars[n++] = cmd->vars[i]; + } +} + /* Is variable k the dependent variable? */ @@ -521,7 +579,6 @@ identify_indep_vars (const struct regression *cmd, return n_indep_vars; } - static double fill_covariance (gsl_matrix * cov, struct covariance *all_cov, const struct variable **vars, @@ -531,7 +588,7 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, { size_t i; size_t j; - size_t dep_subscript; + size_t dep_subscript = SIZE_MAX; size_t *rows; const gsl_matrix *ssizes; const gsl_matrix *mean_matrix; @@ -559,6 +616,8 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, dep_subscript = i; } } + assert (dep_subscript != SIZE_MAX); + mean_matrix = covariance_moments (all_cov, MOMENT_MEAN); ssize_matrix = covariance_moments (all_cov, MOMENT_NONE); for (i = 0; i < cov->size1 - 1; i++) @@ -594,61 +653,78 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, +struct model_container +{ + struct linreg **models; +}; + /* STATISTICS subcommand output functions. */ -static void reg_stats_r (const linreg *, const struct variable *); -static void reg_stats_coeff (const linreg *, const gsl_matrix *, const struct variable *, const struct regression *); -static void reg_stats_anova (const linreg *, const struct variable *); -static void reg_stats_bcov (const linreg *, const struct variable *); - - -static void -subcommand_statistics (const struct regression *cmd, const linreg * c, const gsl_matrix * cm, - const struct variable *var) -{ - if (cmd->stats & STATS_R) - reg_stats_r (c, var); - - if (cmd->stats & STATS_ANOVA) - reg_stats_anova (c, var); - - if (cmd->stats & STATS_COEFF) - reg_stats_coeff (c, cm, var, cmd); - - if (cmd->stats & STATS_BCOV) - reg_stats_bcov (c, var); -} - - -static void -run_regression (const struct regression *cmd, - struct regression_workspace *ws, - struct casereader *input) +static void reg_stats_r (const struct linreg *, const struct variable *); +static void reg_stats_coeff (const struct regression *, const struct linreg *, + const struct model_container *, const gsl_matrix *, + const struct variable *); +static void reg_stats_anova (const struct linreg *, const struct variable *); +static void reg_stats_bcov (const struct linreg *, const struct variable *); + + +static struct linreg ** +run_regression_get_models (const struct regression *cmd, + struct casereader *input, + bool output) { size_t i; - linreg **models; + struct model_container *model_container = XCALLOC (cmd->n_vars, struct model_container); - int k; struct ccase *c; struct covariance *cov; struct casereader *reader; + + if (cmd->stats & STATS_TOL) + { + for (i = 0; i < cmd->n_vars; i++) + { + struct regression subreg; + subreg.origin = cmd->origin; + subreg.ds = cmd->ds; + subreg.n_vars = cmd->n_vars - 1; + subreg.n_dep_vars = 1; + subreg.vars = xmalloc (sizeof (*subreg.vars) * cmd->n_vars - 1); + subreg.dep_vars = xmalloc (sizeof (*subreg.dep_vars)); + fill_predictor_x (subreg.vars, cmd->vars[i], cmd); + subreg.dep_vars[0] = cmd->vars[i]; + subreg.stats = STATS_R; + subreg.ci = 0; + subreg.resid = false; + subreg.pred = false; + + model_container[i].models = + run_regression_get_models (&subreg, input, false); + free (subreg.vars); + free (subreg.dep_vars); + } + } + size_t n_all_vars = get_n_all_vars (cmd); const struct variable **all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); - double *means = xnmalloc (n_all_vars, sizeof (*means)); - + /* In the (rather pointless) case where the dependent variable is + the independent variable, n_all_vars == 1. + However this would result in a buffer overflow so we must + over-allocate the space required in this malloc call. + See bug #58599 */ + double *means = xnmalloc (n_all_vars <= 1 ? 2 : n_all_vars, + sizeof (*means)); fill_all_vars (all_vars, cmd); cov = covariance_1pass_create (n_all_vars, all_vars, dict_get_weight (dataset_dict (cmd->ds)), - MV_ANY); + MV_ANY, cmd->origin == false); reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, all_vars, n_all_vars, MV_ANY, NULL, NULL); - - - { +{ struct casereader *r = casereader_clone (reader); for (; (c = casereader_read (r)) != NULL; case_unref (c)) @@ -658,65 +734,94 @@ run_regression (const struct regression *cmd, casereader_destroy (r); } - models = xcalloc (cmd->n_dep_vars, sizeof (*models)); - for (k = 0; k < cmd->n_dep_vars; k++) + struct linreg **models = XCALLOC (cmd->n_dep_vars, struct linreg*); + + for (int k = 0; k < cmd->n_dep_vars; k++) { const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars)); const struct variable *dep_var = cmd->dep_vars[k]; int n_indep = identify_indep_vars (cmd, vars, dep_var); - gsl_matrix *this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1); - double n_data = fill_covariance (this_cm, cov, vars, n_indep, + gsl_matrix *cov_matrix = gsl_matrix_alloc (n_indep + 1, n_indep + 1); + double n_data = fill_covariance (cov_matrix, cov, vars, n_indep, dep_var, all_vars, n_all_vars, means); - models[k] = linreg_alloc (dep_var, vars, n_data, n_indep); - models[k]->depvar = dep_var; + models[k] = linreg_alloc (dep_var, vars, n_data, n_indep, cmd->origin); for (i = 0; i < n_indep; i++) { linreg_set_indep_variable_mean (models[k], i, means[i]); } linreg_set_depvar_mean (models[k], means[i]); - /* - For large data sets, use QR decomposition. - */ - if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA) - { - models[k]->method = LINREG_QR; - } - if (n_data > 0) { - /* - Find the least-squares estimates and other statistics. - */ - linreg_fit (this_cm, models[k]); + linreg_fit (cov_matrix, models[k]); - if (!taint_has_tainted_successor (casereader_get_taint (input))) + if (output && !taint_has_tainted_successor (casereader_get_taint (input))) { - subcommand_statistics (cmd, models[k], this_cm, dep_var); - } + /* + Find the least-squares estimates and other statistics. + */ + if (cmd->stats & STATS_R) + reg_stats_r (models[k], dep_var); + + if (cmd->stats & STATS_ANOVA) + reg_stats_anova (models[k], dep_var); + + if (cmd->stats & STATS_COEFF) + reg_stats_coeff (cmd, models[k], + model_container, + cov_matrix, dep_var); + + if (cmd->stats & STATS_BCOV) + reg_stats_bcov (models[k], dep_var); + } } else { msg (SE, _("No valid data found. This command was skipped.")); } - gsl_matrix_free (this_cm); free (vars); + gsl_matrix_free (cov_matrix); } + casereader_destroy (reader); + + for (int i = 0; i < cmd->n_vars; i++) + { + if (model_container[i].models) + { + linreg_unref (model_container[i].models[0]); + } + free (model_container[i].models); + } + free (model_container); + + free (all_vars); + free (means); + covariance_destroy (cov); + return models; +} + +static void +run_regression (const struct regression *cmd, + struct regression_workspace *ws, + struct casereader *input) +{ + struct linreg **models = run_regression_get_models (cmd, input, true); if (ws->extras > 0) { - struct casereader *r = casereader_clone (reader); + struct ccase *c; + struct casereader *r = casereader_clone (input); for (; (c = casereader_read (r)) != NULL; case_unref (c)) { struct ccase *outc = case_create (casewriter_get_proto (ws->writer)); - for (k = 0; k < cmd->n_dep_vars; k++) + for (int k = 0; k < cmd->n_dep_vars; k++) { const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars)); const struct variable *dep_var = cmd->dep_vars[k]; int n_indep = identify_indep_vars (cmd, vars, dep_var); double *vals = xnmalloc (n_indep, sizeof (*vals)); - for (i = 0; i < n_indep; i++) + for (int i = 0; i < n_indep; i++) { const union value *tmp = case_data (c, vars[i]); vals[i] = tmp->f; @@ -725,14 +830,14 @@ run_regression (const struct regression *cmd, if (cmd->pred) { double pred = linreg_predict (models[k], vals, n_indep); - case_data_rw_idx (outc, k * ws->extras + ws->pred_idx)->f = pred; + *case_num_rw_idx (outc, k * ws->extras + ws->pred_idx) = pred; } if (cmd->resid) { - double obs = case_data (c, models[k]->depvar)->f; + double obs = case_num (c, linreg_dep_var (models[k])); double res = linreg_residual (models[k], obs, vals, n_indep); - case_data_rw_idx (outc, k * ws->extras + ws->res_idx)->f = res; + *case_num_rw_idx (outc, k * ws->extras + ws->res_idx) = res; } free (vals); free (vars); @@ -742,279 +847,257 @@ run_regression (const struct regression *cmd, casereader_destroy (r); } - casereader_destroy (reader); - - for (k = 0; k < cmd->n_dep_vars; k++) + for (int k = 0; k < cmd->n_dep_vars; k++) { linreg_unref (models[k]); } - free (models); - free (all_vars); - free (means); + free (models); casereader_destroy (input); - covariance_destroy (cov); } static void -reg_stats_r (const linreg * c, const struct variable *var) +reg_stats_r (const struct linreg * c, const struct variable *var) { - struct tab_table *t; - int n_rows = 2; - int n_cols = 5; - double rsq; - double adjrsq; - double std_error; - - assert (c != NULL); - rsq = linreg_ssreg (c) / linreg_sst (c); - adjrsq = rsq - - (1.0 - rsq) * linreg_n_coeffs (c) / (linreg_n_obs (c) - - linreg_n_coeffs (c) - 1); - std_error = sqrt (linreg_mse (c)); - t = tab_create (n_cols, n_rows); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - - tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("R")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate")); - tab_double (t, 1, 1, TAB_RIGHT, sqrt (rsq), NULL, RC_OTHER); - tab_double (t, 2, 1, TAB_RIGHT, rsq, NULL, RC_OTHER); - tab_double (t, 3, 1, TAB_RIGHT, adjrsq, NULL, RC_OTHER); - tab_double (t, 4, 1, TAB_RIGHT, std_error, NULL, RC_OTHER); - tab_title (t, _("Model Summary (%s)"), var_to_string (var)); - tab_submit (t); + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Model Summary (%s)"), + var_to_string (var)), + "Model Summary"); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("R"), N_("R Square"), N_("Adjusted R Square"), + N_("Std. Error of the Estimate")); + + double rsq = linreg_ssreg (c) / linreg_sst (c); + double adjrsq = (rsq - + (1.0 - rsq) * linreg_n_coeffs (c) + / (linreg_n_obs (c) - linreg_n_coeffs (c) - 1)); + double std_error = sqrt (linreg_mse (c)); + + double entries[] = { + sqrt (rsq), rsq, adjrsq, std_error + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + pivot_table_put1 (table, i, pivot_value_new_number (entries[i])); + + pivot_table_submit (table); } /* Table showing estimated regression coefficients. */ static void -reg_stats_coeff (const linreg * c, const gsl_matrix *cov, const struct variable *var, const struct regression *cmd) +reg_stats_coeff (const struct regression *cmd, const struct linreg *c, + const struct model_container *mc, const gsl_matrix *cov, + const struct variable *var) { - size_t j; - int n_cols = 7; - const int heading_rows = 2; - int n_rows; - int this_row; - double t_stat; - double pval; - double std_err; - double beta; - const char *label; - - const struct variable *v; - struct tab_table *t; - - const double df = linreg_n_obs (c) - linreg_n_coeffs (c) - 1; - double q = (1 - cmd->ci) / 2.0; /* 2-tailed test */ - double tval = gsl_cdf_tdist_Qinv (q, df); + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Coefficients (%s)"), var_to_string (var)), + "Coefficients"); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Statistics")); + pivot_category_create_group (statistics->root, + N_("Unstandardized Coefficients"), + N_("B"), N_("Std. Error")); + pivot_category_create_group (statistics->root, + N_("Standardized Coefficients"), N_("Beta")); + pivot_category_create_leaves (statistics->root, N_("t"), + N_("Sig."), PIVOT_RC_SIGNIFICANCE); + if (cmd->stats & STATS_CI) + { + struct pivot_category *interval = pivot_category_create_group__ ( + statistics->root, pivot_value_new_text_format ( + N_("%g%% Confidence Interval for B"), + cmd->ci * 100.0)); + pivot_category_create_leaves (interval, N_("Lower Bound"), + N_("Upper Bound")); + } - assert (c != NULL); - n_rows = linreg_n_coeffs (c) + heading_rows + 1; + if (cmd->stats & STATS_TOL) + pivot_category_create_group (statistics->root, + N_("Collinearity Statistics"), + N_("Tolerance"), N_("VIF")); - if (cmd->stats & STATS_CI) - n_cols += 2; - - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - - - tab_hline (t, TAL_1, 2, 4, 1); - tab_joint_text (t, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE, _("Unstandardized Coefficients")); - tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("B")); - tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Std. Error")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Standardized Coefficients")); - tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Beta")); - tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("t")); - tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Sig.")); - tab_text (t, 1, heading_rows, TAB_LEFT | TAT_TITLE, _("(Constant)")); - tab_double (t, 2, heading_rows, 0, linreg_intercept (c), NULL, RC_OTHER); - std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0)); - if (cmd->stats & STATS_CI) + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variables")); + + double df = linreg_n_obs (c) - linreg_n_coeffs (c) - 1; + double q = (1 - cmd->ci) / 2.0; /* 2-tailed test */ + double tval = gsl_cdf_tdist_Qinv (q, df); + + if (!cmd->origin) { - double lower = linreg_intercept (c) - tval * std_err ; - double upper = linreg_intercept (c) + tval * std_err ; - tab_double (t, 7, heading_rows, 0, lower, NULL, RC_OTHER); - tab_double (t, 8, heading_rows, 0, upper, NULL, RC_OTHER); - - tab_joint_text_format (t, 7, 0, 8, 0, TAB_CENTER | TAT_TITLE, _("%g%% Confidence Interval for B"), cmd->ci * 100); - tab_hline (t, TAL_1, 7, 8, 1); - tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound")); - tab_text (t, 8, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound")); + int var_idx = pivot_category_create_leaf ( + variables->root, pivot_value_new_text (N_("(Constant)"))); + + double std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0)); + double t_stat = linreg_intercept (c) / std_err; + double base_entries[] = { + linreg_intercept (c), + std_err, + 0.0, + t_stat, + 2.0 * gsl_cdf_tdist_Q (fabs (t_stat), + linreg_n_obs (c) - linreg_n_coeffs (c)), + }; + + size_t col = 0; + for (size_t i = 0; i < sizeof base_entries / sizeof *base_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (base_entries[i])); + + if (cmd->stats & STATS_CI) + { + double interval_entries[] = { + linreg_intercept (c) - tval * std_err, + linreg_intercept (c) + tval * std_err, + }; + + for (size_t i = 0; i < sizeof interval_entries / sizeof *interval_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (interval_entries[i])); + } } - tab_double (t, 3, heading_rows, 0, std_err, NULL, RC_OTHER); - tab_double (t, 4, heading_rows, 0, 0.0, NULL, RC_OTHER); - t_stat = linreg_intercept (c) / std_err; - tab_double (t, 5, heading_rows, 0, t_stat, NULL, RC_OTHER); - pval = - 2 * gsl_cdf_tdist_Q (fabs (t_stat), - (double) (linreg_n_obs (c) - linreg_n_coeffs (c))); - tab_double (t, 6, heading_rows, 0, pval, NULL, RC_PVALUE); - - for (j = 0; j < linreg_n_coeffs (c); j++) - { - struct string tstr; - ds_init_empty (&tstr); - this_row = j + heading_rows + 1; - - v = linreg_indep_var (c, j); - label = var_to_string (v); - /* Do not overwrite the variable's name. */ - ds_put_cstr (&tstr, label); - tab_text (t, 1, this_row, TAB_LEFT, ds_cstr (&tstr)); - /* - Regression coefficients. - */ - tab_double (t, 2, this_row, 0, linreg_coeff (c, j), NULL, RC_OTHER); - /* - Standard error of the coefficients. - */ - std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)); - tab_double (t, 3, this_row, 0, std_err, NULL, RC_OTHER); - /* - Standardized coefficient, i.e., regression coefficient - if all variables had unit variance. - */ - beta = sqrt (gsl_matrix_get (cov, j, j)); - beta *= linreg_coeff (c, j) / - sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1)); - tab_double (t, 4, this_row, 0, beta, NULL, RC_OTHER); - /* - Test statistic for H0: coefficient is 0. - */ - t_stat = linreg_coeff (c, j) / std_err; - tab_double (t, 5, this_row, 0, t_stat, NULL, RC_OTHER); - /* - P values for the test statistic above. - */ - pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), df); - tab_double (t, 6, this_row, 0, pval, NULL, RC_PVALUE); - ds_destroy (&tstr); + for (size_t j = 0; j < linreg_n_coeffs (c); j++) + { + const struct variable *v = linreg_indep_var (c, j); + int var_idx = pivot_category_create_leaf ( + variables->root, pivot_value_new_variable (v)); + + double std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)); + double t_stat = linreg_coeff (c, j) / std_err; + double base_entries[] = { + linreg_coeff (c, j), + sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)), + (sqrt (gsl_matrix_get (cov, j, j)) * linreg_coeff (c, j) / + sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1))), + t_stat, + 2 * gsl_cdf_tdist_Q (fabs (t_stat), df) + }; + + size_t col = 0; + for (size_t i = 0; i < sizeof base_entries / sizeof *base_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (base_entries[i])); if (cmd->stats & STATS_CI) { - double lower = linreg_coeff (c, j) - tval * std_err ; - double upper = linreg_coeff (c, j) + tval * std_err ; + double interval_entries[] = { + linreg_coeff (c, j) - tval * std_err, + linreg_coeff (c, j) + tval * std_err, + }; - tab_double (t, 7, this_row, 0, lower, NULL, RC_OTHER); - tab_double (t, 8, this_row, 0, upper, NULL, RC_OTHER); + + for (size_t i = 0; i < sizeof interval_entries / sizeof *interval_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (interval_entries[i])); + } + + if (cmd->stats & STATS_TOL) + { + { + struct linreg *m = mc[j].models[0]; + double rsq = linreg_ssreg (m) / linreg_sst (m); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 - rsq)); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 / (1.0 - rsq))); + } } } - tab_title (t, _("Coefficients (%s)"), var_to_string (var)); - tab_submit (t); + + pivot_table_submit (table); } /* Display the ANOVA table. */ static void -reg_stats_anova (const linreg * c, const struct variable *var) +reg_stats_anova (const struct linreg * c, const struct variable *var) { - int n_cols = 7; - int n_rows = 4; - const double msm = linreg_ssreg (c) / linreg_dfmodel (c); - const double mse = linreg_mse (c); - const double F = msm / mse; - const double pval = gsl_cdf_fdist_Q (F, c->dfm, c->dfe); - - struct tab_table *t; - - assert (c != NULL); - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("ANOVA (%s)"), var_to_string (var)), + "ANOVA"); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F")); - tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("Sum of Squares"), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_INTEGER, + N_("Mean Square"), PIVOT_RC_OTHER, + N_("F"), PIVOT_RC_OTHER, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); - tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Regression")); - tab_text (t, 1, 2, TAB_LEFT | TAT_TITLE, _("Residual")); - tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total")); + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Source"), + N_("Regression"), N_("Residual"), N_("Total")); - /* Sums of Squares */ - tab_double (t, 2, 1, 0, linreg_ssreg (c), NULL, RC_OTHER); - tab_double (t, 2, 3, 0, linreg_sst (c), NULL, RC_OTHER); - tab_double (t, 2, 2, 0, linreg_sse (c), NULL, RC_OTHER); + double msm = linreg_ssreg (c) / linreg_dfmodel (c); + double mse = linreg_mse (c); + double F = msm / mse; + struct entry + { + int stat_idx; + int source_idx; + double x; + } + entries[] = { + /* Sums of Squares. */ + { 0, 0, linreg_ssreg (c) }, + { 0, 1, linreg_sse (c) }, + { 0, 2, linreg_sst (c) }, + /* Degrees of freedom. */ + { 1, 0, linreg_dfmodel (c) }, + { 1, 1, linreg_dferror (c) }, + { 1, 2, linreg_dftotal (c) }, + /* Mean Squares. */ + { 2, 0, msm }, + { 2, 1, mse }, + /* F */ + { 3, 0, F }, + /* Significance. */ + { 4, 0, gsl_cdf_fdist_Q (F, linreg_dfmodel (c), linreg_dferror (c)) }, + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + { + const struct entry *e = &entries[i]; + pivot_table_put2 (table, e->stat_idx, e->source_idx, + pivot_value_new_number (e->x)); + } - /* Degrees of freedom */ - tab_text_format (t, 3, 1, TAB_RIGHT, "%.*g", DBL_DIG + 1, c->dfm); - tab_text_format (t, 3, 2, TAB_RIGHT, "%.*g", DBL_DIG + 1, c->dfe); - tab_text_format (t, 3, 3, TAB_RIGHT, "%.*g", DBL_DIG + 1, c->dft); - - /* Mean Squares */ - tab_double (t, 4, 1, TAB_RIGHT, msm, NULL, RC_OTHER); - tab_double (t, 4, 2, TAB_RIGHT, mse, NULL, RC_OTHER); - - tab_double (t, 5, 1, 0, F, NULL, RC_OTHER); - - tab_double (t, 6, 1, 0, pval, NULL, RC_PVALUE); - - tab_title (t, _("ANOVA (%s)"), var_to_string (var)); - tab_submit (t); + pivot_table_submit (table); } static void -reg_stats_bcov (const linreg * c, const struct variable *var) +reg_stats_bcov (const struct linreg * c, const struct variable *var) { - int n_cols; - int n_rows; - int i; - int k; - int row; - int col; - const char *label; - struct tab_table *t; - - assert (c != NULL); - n_cols = c->n_indeps + 1 + 2; - n_rows = 2 * (c->n_indeps + 1); - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Model")); - tab_text (t, 1, 1, TAB_CENTER | TAT_TITLE, _("Covariances")); - for (i = 0; i < linreg_n_coeffs (c); i++) + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Coefficient Correlations (%s)"), + var_to_string (var)), + "Coefficient Correlations"); + + for (size_t i = 0; i < 2; i++) { - const struct variable *v = linreg_indep_var (c, i); - label = var_to_string (v); - tab_text (t, 2, i, TAB_CENTER, label); - tab_text (t, i + 2, 0, TAB_CENTER, label); - for (k = 1; k < linreg_n_coeffs (c); k++) - { - col = (i <= k) ? k : i; - row = (i <= k) ? i : k; - tab_double (t, k + 2, i, TAB_CENTER, - gsl_matrix_get (c->cov, row, col), NULL, RC_OTHER); - } + struct pivot_dimension *models = pivot_dimension_create ( + table, i ? PIVOT_AXIS_ROW : PIVOT_AXIS_COLUMN, N_("Models")); + for (size_t j = 0; j < linreg_n_coeffs (c); j++) + pivot_category_create_leaf ( + models->root, pivot_value_new_variable ( + linreg_indep_var (c, j))); } - tab_title (t, _("Coefficient Correlations (%s)"), var_to_string (var)); - tab_submit (t); -} + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"), + N_("Covariances")); + + for (size_t i = 0; i < linreg_n_coeffs (c); i++) + for (size_t k = 0; k < linreg_n_coeffs (c); k++) + { + double cov = gsl_matrix_get (linreg_cov (c), MIN (i, k), MAX (i, k)); + pivot_table_put3 (table, k, i, 0, pivot_value_new_number (cov)); + } + + pivot_table_submit (table); +}