X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.c;h=b4448dd9e58ead03cb61cf3d5e5b3c97201b1edf;hb=60c545e6e958d868db3399a8989d37d8f9e0c131;hp=e32abb3eb57485313a3a13a6aa94b82ae71f76da;hpb=09a2c6d005e5a94b68653e36d27309e249798173;p=pspp diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index e32abb3eb5..b4448dd9e5 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -1,5 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014, + 2016, 2017, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,12 +17,15 @@ #include +#include #include +#include #include #include #include +#include #include "language/command.h" #include "language/lexer/lexer.h" @@ -40,16 +44,27 @@ #include "libpspp/message.h" #include "libpspp/taint.h" -#include "output/tab.h" +#include "output/pivot-table.h" + +#include "gl/intprops.h" +#include "gl/minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid -#include +#define STATS_R 1 +#define STATS_COEFF 2 +#define STATS_ANOVA 4 +#define STATS_OUTS 8 +#define STATS_CI 16 +#define STATS_BCOV 32 +#define STATS_TOL 64 + +#define STATS_DEFAULT (STATS_R | STATS_COEFF | STATS_ANOVA | STATS_OUTS) + -#define REG_LARGE_DATA 1000 struct regression { @@ -61,118 +76,41 @@ struct regression const struct variable **dep_vars; size_t n_dep_vars; - bool r; - bool coeff; - bool anova; - bool bcov; - + unsigned int stats; + double ci; bool resid; bool pred; - linreg **models; + bool origin; }; +struct regression_workspace +{ + /* The new variables which will be introduced by /SAVE */ + const struct variable **predvars; + const struct variable **residvars; -static void run_regression (const struct regression *cmd, struct casereader *input); - + /* A reader/writer pair to temporarily hold the + values of the new variables */ + struct casewriter *writer; + struct casereader *reader; + /* Indeces of the new values in the reader/writer (-1 if not applicable) */ + int res_idx; + int pred_idx; -/* - Transformations for saving predicted values - and residuals, etc. -*/ -struct reg_trns -{ - int n_trns; /* Number of transformations. */ - int trns_id; /* Which trns is this one? */ - linreg *c; /* Linear model for this trns. */ + /* 0, 1 or 2 depending on what new variables are to be created */ + int extras; }; -/* - Gets the predicted values. -*/ -static int -regression_trns_pred_proc (void *t_, struct ccase **c, - casenumber case_idx UNUSED) -{ - size_t i; - size_t n_vals; - struct reg_trns *trns = t_; - linreg *model; - union value *output = NULL; - const union value *tmp; - double *vals; - const struct variable **vars = NULL; - - assert (trns != NULL); - model = trns->c; - assert (model != NULL); - assert (model->depvar != NULL); - assert (model->pred != NULL); - - vars = linreg_get_vars (model); - n_vals = linreg_n_coeffs (model); - vals = xnmalloc (n_vals, sizeof (*vals)); - *c = case_unshare (*c); - - output = case_data_rw (*c, model->pred); - - for (i = 0; i < n_vals; i++) - { - tmp = case_data (*c, vars[i]); - vals[i] = tmp->f; - } - output->f = linreg_predict (model, vals, n_vals); - free (vals); - return TRNS_CONTINUE; -} - -/* - Gets the residuals. -*/ -static int -regression_trns_resid_proc (void *t_, struct ccase **c, - casenumber case_idx UNUSED) -{ - size_t i; - size_t n_vals; - struct reg_trns *trns = t_; - linreg *model; - union value *output = NULL; - const union value *tmp; - double *vals = NULL; - double obs; - const struct variable **vars = NULL; - - assert (trns != NULL); - model = trns->c; - assert (model != NULL); - assert (model->depvar != NULL); - assert (model->resid != NULL); - - vars = linreg_get_vars (model); - n_vals = linreg_n_coeffs (model); - - vals = xnmalloc (n_vals, sizeof (*vals)); - *c = case_unshare (*c); - output = case_data_rw (*c, model->resid); - assert (output != NULL); - - for (i = 0; i < n_vals; i++) - { - tmp = case_data (*c, vars[i]); - vals[i] = tmp->f; - } - tmp = case_data (*c, model->depvar); - obs = tmp->f; - output->f = linreg_residual (model, obs, vals, n_vals); - free (vals); - - return TRNS_CONTINUE; -} +static void run_regression (const struct regression *cmd, + struct regression_workspace *ws, + struct casereader *input); +/* Return a string based on PREFIX which may be used as the name + of a new variable in DICT */ static char * reg_get_name (const struct dictionary *dict, const char *prefix) { @@ -181,7 +119,7 @@ reg_get_name (const struct dictionary *dict, const char *prefix) /* XXX handle too-long prefixes */ name = xmalloc (strlen (prefix) + INT_BUFSIZE_BOUND (i) + 1); - for (i = 1; ; i++) + for (i = 1;; i++) { sprintf (name, "%s%d", prefix, i); if (dict_lookup_var (dict, name) == NULL) @@ -189,161 +127,203 @@ reg_get_name (const struct dictionary *dict, const char *prefix) } } -/* - Free the transformation. Free its linear model if this - transformation is the last one. -*/ -static bool -regression_trns_free (void *t_) -{ - struct reg_trns *t = t_; - - if (t->trns_id == t->n_trns) - { - linreg_unref (t->c); - } - free (t); - - return true; -} -static void -reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, - linreg * c, struct variable **v, int n_trns) +static const struct variable * +create_aux_var (struct dataset *ds, const char *prefix) { + struct variable *var; struct dictionary *dict = dataset_dict (ds); - static int trns_index = 1; - char *name; - struct variable *new_var; - struct reg_trns *t = NULL; - - t = xmalloc (sizeof (*t)); - t->trns_id = trns_index; - t->n_trns = n_trns; - t->c = c; - - name = reg_get_name (dict, prefix); - new_var = dict_create_var_assert (dict, name, 0); + char *name = reg_get_name (dict, prefix); + var = dict_create_var_assert (dict, name, 0); free (name); - - *v = new_var; - add_transformation (ds, f, regression_trns_free, t); - trns_index++; + return var; } -static void -subcommand_save (const struct regression *cmd) +/* Auxiliary data for transformation when /SAVE is entered */ +struct save_trans_data +{ + int n_dep_vars; + struct regression_workspace *ws; +}; + +static bool +save_trans_free (void *aux) { - linreg **lc; - int n_trns = 0; + struct save_trans_data *save_trans_data = aux; + free (save_trans_data->ws->predvars); + free (save_trans_data->ws->residvars); - if ( cmd->resid ) n_trns++; - if ( cmd->pred ) n_trns++; + casereader_destroy (save_trans_data->ws->reader); + free (save_trans_data->ws); + free (save_trans_data); + return true; +} - n_trns *= cmd->n_dep_vars; +static enum trns_result +save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) +{ + struct save_trans_data *save_trans_data = aux; + struct regression_workspace *ws = save_trans_data->ws; + struct ccase *in = casereader_read (ws->reader); - for (lc = cmd->models; lc < cmd->models + cmd->n_dep_vars; lc++) + if (in) { - if (*lc != NULL) + int k; + *c = case_unshare (*c); + + for (k = 0; k < save_trans_data->n_dep_vars; ++k) { - if ((*lc)->depvar != NULL) + if (ws->pred_idx != -1) { - (*lc)->refcnt++; - if (cmd->resid) - { - reg_save_var (cmd->ds, "RES", regression_trns_resid_proc, *lc, - &(*lc)->resid, n_trns); - } - if (cmd->pred) - { - reg_save_var (cmd->ds, "PRED", regression_trns_pred_proc, *lc, - &(*lc)->pred, n_trns); - } + double pred = case_num_idx (in, ws->extras * k + ws->pred_idx); + *case_num_rw (*c, ws->predvars[k]) = pred; + } + + if (ws->res_idx != -1) + { + double resid = case_num_idx (in, ws->extras * k + ws->res_idx); + *case_num_rw (*c, ws->residvars[k]) = resid; } } + case_unref (in); } + + return TRNS_CONTINUE; } int cmd_regression (struct lexer *lexer, struct dataset *ds) { - int k; + struct regression_workspace workspace; struct regression regression; const struct dictionary *dict = dataset_dict (ds); bool save; memset (®ression, 0, sizeof (struct regression)); - regression.anova = true; - regression.coeff = true; - regression.r = true; - + regression.ci = 0.95; + regression.stats = STATS_DEFAULT; regression.pred = false; regression.resid = false; regression.ds = ds; + regression.origin = false; - /* Accept an optional, completely pointless "/VARIABLES=" */ - lex_match (lexer, T_SLASH); - if (lex_match_id (lexer, "VARIABLES")) - { - if (! lex_force_match (lexer, T_EQUALS) ) - goto error; - } - - if (!parse_variables_const (lexer, dict, - ®ression.vars, ®ression.n_vars, - PV_NO_DUPLICATE | PV_NUMERIC)) - goto error; - - + bool variables_seen = false; + bool method_seen = false; + bool dependent_seen = false; while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); - if (lex_match_id (lexer, "DEPENDENT")) + if (lex_match_id (lexer, "VARIABLES")) { - if (! lex_force_match (lexer, T_EQUALS) ) - goto error; + if (method_seen) + { + msg (SE, _("VARIABLES may not appear after %s"), "METHOD"); + goto error; + } + if (dependent_seen) + { + msg (SE, _("VARIABLES may not appear after %s"), "DEPENDENT"); + goto error; + } + variables_seen = true; + lex_match (lexer, T_EQUALS); + + if (!parse_variables_const (lexer, dict, + ®ression.vars, ®ression.n_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + } + else if (lex_match_id (lexer, "DEPENDENT")) + { + dependent_seen = true; + lex_match (lexer, T_EQUALS); + + free (regression.dep_vars); + regression.n_dep_vars = 0; if (!parse_variables_const (lexer, dict, - ®ression.dep_vars, ®ression.n_dep_vars, + ®ression.dep_vars, + ®ression.n_dep_vars, PV_NO_DUPLICATE | PV_NUMERIC)) goto error; } + else if (lex_match_id (lexer, "ORIGIN")) + { + regression.origin = true; + } + else if (lex_match_id (lexer, "NOORIGIN")) + { + regression.origin = false; + } else if (lex_match_id (lexer, "METHOD")) - { - lex_match (lexer, T_EQUALS); + { + method_seen = true; + lex_match (lexer, T_EQUALS); if (!lex_force_match_id (lexer, "ENTER")) { goto error; } + + if (! variables_seen) + { + if (!parse_variables_const (lexer, dict, + ®ression.vars, ®ression.n_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + } } else if (lex_match_id (lexer, "STATISTICS")) - { - lex_match (lexer, T_EQUALS); + { + unsigned long statistics = 0; + lex_match (lexer, T_EQUALS); - while (lex_token (lexer) != T_ENDCMD - && lex_token (lexer) != T_SLASH) - { + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { if (lex_match (lexer, T_ALL)) { + statistics = ~0; } else if (lex_match_id (lexer, "DEFAULTS")) { + statistics |= STATS_DEFAULT; } else if (lex_match_id (lexer, "R")) { + statistics |= STATS_R; } else if (lex_match_id (lexer, "COEFF")) { + statistics |= STATS_COEFF; } else if (lex_match_id (lexer, "ANOVA")) { + statistics |= STATS_ANOVA; } else if (lex_match_id (lexer, "BCOV")) { + statistics |= STATS_BCOV; + } + else if (lex_match_id (lexer, "TOL")) + { + statistics |= STATS_TOL; + } + else if (lex_match_id (lexer, "CI")) + { + statistics |= STATS_CI; + + if (lex_match (lexer, T_LPAREN) && + lex_force_num (lexer)) + { + regression.ci = lex_number (lexer) / 100.0; + lex_get (lexer); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } } else { @@ -351,14 +331,18 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) goto error; } } + + if (statistics) + regression.stats = statistics; + } else if (lex_match_id (lexer, "SAVE")) - { - lex_match (lexer, T_EQUALS); + { + lex_match (lexer, T_EQUALS); - while (lex_token (lexer) != T_ENDCMD - && lex_token (lexer) != T_SLASH) - { + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { if (lex_match_id (lexer, "PRED")) { regression.pred = true; @@ -386,56 +370,106 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) dict_get_vars (dict, ®ression.vars, ®ression.n_vars, 0); } - - regression.models = xcalloc (regression.n_dep_vars, sizeof *regression.models); - save = regression.pred || regression.resid; + workspace.extras = 0; + workspace.res_idx = -1; + workspace.pred_idx = -1; + workspace.writer = NULL; + workspace.reader = NULL; + workspace.residvars = NULL; + workspace.predvars = NULL; if (save) { + int i; + struct caseproto *proto = caseproto_create (); + + if (regression.resid) + { + workspace.res_idx = workspace.extras ++; + workspace.residvars = xcalloc (regression.n_dep_vars, sizeof (*workspace.residvars)); + + for (i = 0; i < regression.n_dep_vars; ++i) + { + workspace.residvars[i] = create_aux_var (ds, "RES"); + proto = caseproto_add_width (proto, 0); + } + } + + if (regression.pred) + { + workspace.pred_idx = workspace.extras ++; + workspace.predvars = xcalloc (regression.n_dep_vars, sizeof (*workspace.predvars)); + + for (i = 0; i < regression.n_dep_vars; ++i) + { + workspace.predvars[i] = create_aux_var (ds, "PRED"); + proto = caseproto_add_width (proto, 0); + } + } + if (proc_make_temporary_transformations_permanent (ds)) msg (SW, _("REGRESSION with SAVE ignores TEMPORARY. " "Temporary transformations will be made permanent.")); + + if (dict_get_filter (dict)) + msg (SW, _("REGRESSION with SAVE ignores FILTER. " + "All cases will be processed.")); + + workspace.writer = autopaging_writer_create (proto); + caseproto_unref (proto); } + { struct casegrouper *grouper; struct casereader *group; bool ok; - - grouper = casegrouper_create_splits (proc_open_filtering (ds, !save), - dict); + + grouper = casegrouper_create_splits (proc_open_filtering (ds, !save), dict); + + while (casegrouper_get_next_group (grouper, &group)) - run_regression (®ression, group); + { + run_regression (®ression, + &workspace, + group); + + } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } - if (save) + if (workspace.writer) { - subcommand_save (®ression); + struct save_trans_data *save_trans_data = xmalloc (sizeof *save_trans_data); + struct casereader *r = casewriter_make_reader (workspace.writer); + workspace.writer = NULL; + workspace.reader = r; + save_trans_data->ws = xmalloc (sizeof (workspace)); + memcpy (save_trans_data->ws, &workspace, sizeof (workspace)); + save_trans_data->n_dep_vars = regression.n_dep_vars; + + static const struct trns_class trns_class = { + .name = "REGRESSION", + .execute = save_trans_func, + .destroy = save_trans_free, + }; + add_transformation (ds, &trns_class, save_trans_data); } - - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); + free (regression.vars); free (regression.dep_vars); return CMD_SUCCESS; - - error: - if (regression.models) - { - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); - } + +error: + free (regression.vars); free (regression.dep_vars); return CMD_FAILURE; } - +/* Return the size of the union of dependent and independent variables */ static size_t get_n_all_vars (const struct regression *cmd) { @@ -447,42 +481,61 @@ get_n_all_vars (const struct regression *cmd) for (i = 0; i < cmd->n_dep_vars; i++) { for (j = 0; j < cmd->n_vars; j++) - { - if (cmd->vars[j] == cmd->dep_vars[i]) - { - result--; - } - } + { + if (cmd->vars[j] == cmd->dep_vars[i]) + { + result--; + } + } } return result; } +/* Fill VARS with the union of dependent and independent variables */ static void fill_all_vars (const struct variable **vars, const struct regression *cmd) { + size_t x = 0; size_t i; - size_t j; - bool absent; - for (i = 0; i < cmd->n_vars; i++) { vars[i] = cmd->vars[i]; } + for (i = 0; i < cmd->n_dep_vars; i++) { - absent = true; + size_t j; + bool absent = true; for (j = 0; j < cmd->n_vars; j++) - { - if (cmd->dep_vars[i] == cmd->vars[j]) - { - absent = false; - break; - } - } + { + if (cmd->dep_vars[i] == cmd->vars[j]) + { + absent = false; + break; + } + } if (absent) - { - vars[i + cmd->n_vars] = cmd->dep_vars[i]; - } + { + vars[cmd->n_vars + x++] = cmd->dep_vars[i]; + } + } +} + + +/* Fill the array VARS, with all the predictor variables from CMD, except + variable X */ +static void +fill_predictor_x (const struct variable **vars, const struct variable *x, const struct regression *cmd) +{ + size_t i; + size_t n = 0; + + for (i = 0; i < cmd->n_vars; i++) + { + if (cmd->vars[i] == x) + continue; + + vars[n++] = cmd->vars[i]; } } @@ -499,9 +552,9 @@ is_depvar (const struct regression *cmd, size_t k, const struct variable *v) /* Identify the explanatory variables in v_variables. Returns the number of independent variables. */ static int -identify_indep_vars (const struct regression *cmd, +identify_indep_vars (const struct regression *cmd, const struct variable **indep_vars, - const struct variable *depvar) + const struct variable *depvar) { int n_indep_vars = 0; int i; @@ -512,68 +565,70 @@ identify_indep_vars (const struct regression *cmd, if ((n_indep_vars < 1) && is_depvar (cmd, 0, depvar)) { /* - There is only one independent variable, and it is the same - as the dependent variable. Print a warning and continue. - */ + There is only one independent variable, and it is the same + as the dependent variable. Print a warning and continue. + */ msg (SW, - gettext ("The dependent variable is equal to the independent variable." - "The least squares line is therefore Y=X." - "Standard errors and related statistics may be meaningless.")); + gettext + ("The dependent variable is equal to the independent variable. " + "The least squares line is therefore Y=X. " + "Standard errors and related statistics may be meaningless.")); n_indep_vars = 1; indep_vars[0] = cmd->vars[0]; } return n_indep_vars; } - static double -fill_covariance (gsl_matrix *cov, struct covariance *all_cov, - const struct variable **vars, - size_t n_vars, const struct variable *dep_var, - const struct variable **all_vars, size_t n_all_vars, - double *means) +fill_covariance (gsl_matrix * cov, struct covariance *all_cov, + const struct variable **vars, + size_t n_vars, const struct variable *dep_var, + const struct variable **all_vars, size_t n_all_vars, + double *means) { size_t i; size_t j; - size_t dep_subscript; + size_t dep_subscript = SIZE_MAX; size_t *rows; const gsl_matrix *ssizes; const gsl_matrix *mean_matrix; const gsl_matrix *ssize_matrix; double result = 0.0; - - gsl_matrix *cm = covariance_calculate_unnormalized (all_cov); - if ( cm == NULL) + const gsl_matrix *cm = covariance_calculate_unnormalized (all_cov); + + if (cm == NULL) return 0; rows = xnmalloc (cov->size1 - 1, sizeof (*rows)); - + for (i = 0; i < n_all_vars; i++) { for (j = 0; j < n_vars; j++) - { - if (vars[j] == all_vars[i]) - { - rows[j] = i; - } - } + { + if (vars[j] == all_vars[i]) + { + rows[j] = i; + } + } if (all_vars[i] == dep_var) - { - dep_subscript = i; - } + { + dep_subscript = i; + } } + assert (dep_subscript != SIZE_MAX); + mean_matrix = covariance_moments (all_cov, MOMENT_MEAN); ssize_matrix = covariance_moments (all_cov, MOMENT_NONE); for (i = 0; i < cov->size1 - 1; i++) { means[i] = gsl_matrix_get (mean_matrix, rows[i], 0) - / gsl_matrix_get (ssize_matrix, rows[i], 0); + / gsl_matrix_get (ssize_matrix, rows[i], 0); for (j = 0; j < cov->size2 - 1; j++) - { - gsl_matrix_set (cov, i, j, gsl_matrix_get (cm, rows[i], rows[j])); - gsl_matrix_set (cov, j, i, gsl_matrix_get (cm, rows[j], rows[i])); - } + { + gsl_matrix_set (cov, i, j, gsl_matrix_get (cm, rows[i], rows[j])); + gsl_matrix_set (cov, j, i, gsl_matrix_get (cm, rows[j], rows[i])); + } } means[cov->size1 - 1] = gsl_matrix_get (mean_matrix, dep_subscript, 0) / gsl_matrix_get (ssize_matrix, dep_subscript, 0); @@ -581,372 +636,468 @@ fill_covariance (gsl_matrix *cov, struct covariance *all_cov, result = gsl_matrix_get (ssizes, dep_subscript, rows[0]); for (i = 0; i < cov->size1 - 1; i++) { - gsl_matrix_set (cov, i, cov->size1 - 1, - gsl_matrix_get (cm, rows[i], dep_subscript)); - gsl_matrix_set (cov, cov->size1 - 1, i, - gsl_matrix_get (cm, rows[i], dep_subscript)); + gsl_matrix_set (cov, i, cov->size1 - 1, + gsl_matrix_get (cm, rows[i], dep_subscript)); + gsl_matrix_set (cov, cov->size1 - 1, i, + gsl_matrix_get (cm, rows[i], dep_subscript)); if (result > gsl_matrix_get (ssizes, rows[i], dep_subscript)) - { - result = gsl_matrix_get (ssizes, rows[i], dep_subscript); - } + { + result = gsl_matrix_get (ssizes, rows[i], dep_subscript); + } } - gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1, - gsl_matrix_get (cm, dep_subscript, dep_subscript)); + gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1, + gsl_matrix_get (cm, dep_subscript, dep_subscript)); free (rows); - gsl_matrix_free (cm); return result; } + + +struct model_container +{ + struct linreg **models; +}; /* STATISTICS subcommand output functions. */ -static void reg_stats_r (linreg *, void *, const struct variable *); -static void reg_stats_coeff (linreg *, void *, const struct variable *); -static void reg_stats_anova (linreg *, void *, const struct variable *); -static void reg_stats_bcov (linreg *, void *, const struct variable *); - -static void statistics_keyword_output (void (*)(linreg *, void *, const struct variable *), - bool, linreg *, void *, const struct variable *); - - - -static void -subcommand_statistics (const struct regression *cmd , linreg * c, void *aux, - const struct variable *var) -{ - statistics_keyword_output (reg_stats_r, cmd->r, c, aux, var); - statistics_keyword_output (reg_stats_anova, cmd->anova, c, aux, var); - statistics_keyword_output (reg_stats_coeff, cmd->coeff, c, aux, var); - statistics_keyword_output (reg_stats_bcov, cmd->bcov, c, aux, var); -} - - -static void -run_regression (const struct regression *cmd, struct casereader *input) +static void reg_stats_r (const struct linreg *, const struct variable *); +static void reg_stats_coeff (const struct regression *, const struct linreg *, + const struct model_container *, const gsl_matrix *, + const struct variable *); +static void reg_stats_anova (const struct linreg *, const struct variable *); +static void reg_stats_bcov (const struct linreg *, const struct variable *); + + +static struct linreg ** +run_regression_get_models (const struct regression *cmd, + struct casereader *input, + bool output) { size_t i; - int n_indep = 0; - int k; - double *means; + struct model_container *model_container = XCALLOC (cmd->n_vars, struct model_container); + struct ccase *c; struct covariance *cov; - const struct variable **vars; - const struct variable **all_vars; struct casereader *reader; - size_t n_all_vars; - linreg **models = cmd->models; + if (cmd->stats & STATS_TOL) + { + for (i = 0; i < cmd->n_vars; i++) + { + struct regression subreg; + subreg.origin = cmd->origin; + subreg.ds = cmd->ds; + subreg.n_vars = cmd->n_vars - 1; + subreg.n_dep_vars = 1; + subreg.vars = xmalloc (sizeof (*subreg.vars) * cmd->n_vars - 1); + subreg.dep_vars = xmalloc (sizeof (*subreg.dep_vars)); + fill_predictor_x (subreg.vars, cmd->vars[i], cmd); + subreg.dep_vars[0] = cmd->vars[i]; + subreg.stats = STATS_R; + subreg.ci = 0; + subreg.resid = false; + subreg.pred = false; + + model_container[i].models = + run_regression_get_models (&subreg, input, false); + free (subreg.vars); + free (subreg.dep_vars); + } + } + + size_t n_all_vars = get_n_all_vars (cmd); + const struct variable **all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); - n_all_vars = get_n_all_vars (cmd); - all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); + /* In the (rather pointless) case where the dependent variable is + the independent variable, n_all_vars == 1. + However this would result in a buffer overflow so we must + over-allocate the space required in this malloc call. + See bug #58599 */ + double *means = xnmalloc (n_all_vars <= 1 ? 2 : n_all_vars, + sizeof (*means)); fill_all_vars (all_vars, cmd); - vars = xnmalloc (cmd->n_vars, sizeof (*vars)); - means = xnmalloc (n_all_vars, sizeof (*means)); cov = covariance_1pass_create (n_all_vars, all_vars, - dict_get_weight (dataset_dict (cmd->ds)), MV_ANY); + dict_get_weight (dataset_dict (cmd->ds)), + MV_ANY, cmd->origin == false); reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, all_vars, n_all_vars, - MV_ANY, NULL, NULL); + MV_ANY, NULL, NULL); +{ + struct casereader *r = casereader_clone (reader); + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + covariance_accumulate (cov, c); + } + casereader_destroy (r); + } - for (; (c = casereader_read (reader)) != NULL; case_unref (c)) - { - covariance_accumulate (cov, c); - } + struct linreg **models = XCALLOC (cmd->n_dep_vars, struct linreg*); - for (k = 0; k < cmd->n_dep_vars; k++) + for (int k = 0; k < cmd->n_dep_vars; k++) { - double n_data; + const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars)); const struct variable *dep_var = cmd->dep_vars[k]; - gsl_matrix *this_cm; - - n_indep = identify_indep_vars (cmd, vars, dep_var); - - this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1); - n_data = fill_covariance (this_cm, cov, vars, n_indep, - dep_var, all_vars, n_all_vars, means); - models[k] = linreg_alloc (dep_var, (const struct variable **) vars, - n_data, n_indep); - models[k]->depvar = dep_var; + int n_indep = identify_indep_vars (cmd, vars, dep_var); + gsl_matrix *cov_matrix = gsl_matrix_alloc (n_indep + 1, n_indep + 1); + double n_data = fill_covariance (cov_matrix, cov, vars, n_indep, + dep_var, all_vars, n_all_vars, means); + models[k] = linreg_alloc (dep_var, vars, n_data, n_indep, cmd->origin); for (i = 0; i < n_indep; i++) - { - linreg_set_indep_variable_mean (models[k], i, means[i]); - } + { + linreg_set_indep_variable_mean (models[k], i, means[i]); + } linreg_set_depvar_mean (models[k], means[i]); - /* - For large data sets, use QR decomposition. - */ - if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA) - { - models[k]->method = LINREG_QR; - } - if (n_data > 0) - { - /* - Find the least-squares estimates and other statistics. - */ - linreg_fit (this_cm, models[k]); - - if (!taint_has_tainted_successor (casereader_get_taint (input))) - { - subcommand_statistics (cmd, models[k], this_cm, dep_var); + { + linreg_fit (cov_matrix, models[k]); + + if (output && !taint_has_tainted_successor (casereader_get_taint (input))) + { + /* + Find the least-squares estimates and other statistics. + */ + if (cmd->stats & STATS_R) + reg_stats_r (models[k], dep_var); + + if (cmd->stats & STATS_ANOVA) + reg_stats_anova (models[k], dep_var); + + if (cmd->stats & STATS_COEFF) + reg_stats_coeff (cmd, models[k], + model_container, + cov_matrix, dep_var); + + if (cmd->stats & STATS_BCOV) + reg_stats_bcov (models[k], dep_var); } - } + } else + { + msg (SE, _("No valid data found. This command was skipped.")); + } + free (vars); + gsl_matrix_free (cov_matrix); + } + + casereader_destroy (reader); + + for (int i = 0; i < cmd->n_vars; i++) + { + if (model_container[i].models) { - msg (SE, - _("No valid data found. This command was skipped.")); - linreg_unref (models[k]); - models[k] = NULL; + linreg_unref (model_container[i].models[0]); } - gsl_matrix_free (this_cm); + free (model_container[i].models); } - - casereader_destroy (reader); - free (vars); + free (model_container); + free (all_vars); free (means); - casereader_destroy (input); covariance_destroy (cov); + return models; } +static void +run_regression (const struct regression *cmd, + struct regression_workspace *ws, + struct casereader *input) +{ + struct linreg **models = run_regression_get_models (cmd, input, true); + + if (ws->extras > 0) + { + struct ccase *c; + struct casereader *r = casereader_clone (input); + + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + struct ccase *outc = case_create (casewriter_get_proto (ws->writer)); + for (int k = 0; k < cmd->n_dep_vars; k++) + { + const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars)); + const struct variable *dep_var = cmd->dep_vars[k]; + int n_indep = identify_indep_vars (cmd, vars, dep_var); + double *vals = xnmalloc (n_indep, sizeof (*vals)); + for (int i = 0; i < n_indep; i++) + { + const union value *tmp = case_data (c, vars[i]); + vals[i] = tmp->f; + } + + if (cmd->pred) + { + double pred = linreg_predict (models[k], vals, n_indep); + *case_num_rw_idx (outc, k * ws->extras + ws->pred_idx) = pred; + } + + if (cmd->resid) + { + double obs = case_num (c, linreg_dep_var (models[k])); + double res = linreg_residual (models[k], obs, vals, n_indep); + *case_num_rw_idx (outc, k * ws->extras + ws->res_idx) = res; + } + free (vals); + free (vars); + } + casewriter_write (ws->writer, outc); + } + casereader_destroy (r); + } + + for (int k = 0; k < cmd->n_dep_vars; k++) + { + linreg_unref (models[k]); + } + + free (models); + casereader_destroy (input); +} static void -reg_stats_r (linreg *c, void *aux UNUSED, const struct variable *var) +reg_stats_r (const struct linreg * c, const struct variable *var) { - struct tab_table *t; - int n_rows = 2; - int n_cols = 5; - double rsq; - double adjrsq; - double std_error; - - assert (c != NULL); - rsq = linreg_ssreg (c) / linreg_sst (c); - adjrsq = rsq - - (1.0 - rsq) * linreg_n_coeffs (c) / (linreg_n_obs (c) - linreg_n_coeffs (c) - 1); - std_error = sqrt (linreg_mse (c)); - t = tab_create (n_cols, n_rows); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - - tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("R")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate")); - tab_double (t, 1, 1, TAB_RIGHT, sqrt (rsq), NULL); - tab_double (t, 2, 1, TAB_RIGHT, rsq, NULL); - tab_double (t, 3, 1, TAB_RIGHT, adjrsq, NULL); - tab_double (t, 4, 1, TAB_RIGHT, std_error, NULL); - tab_title (t, _("Model Summary (%s)"), var_to_string (var)); - tab_submit (t); + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Model Summary (%s)"), + var_to_string (var)), + "Model Summary"); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("R"), N_("R Square"), N_("Adjusted R Square"), + N_("Std. Error of the Estimate")); + + double rsq = linreg_ssreg (c) / linreg_sst (c); + double adjrsq = (rsq - + (1.0 - rsq) * linreg_n_coeffs (c) + / (linreg_n_obs (c) - linreg_n_coeffs (c) - 1)); + double std_error = sqrt (linreg_mse (c)); + + double entries[] = { + sqrt (rsq), rsq, adjrsq, std_error + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + pivot_table_put1 (table, i, pivot_value_new_number (entries[i])); + + pivot_table_submit (table); } /* Table showing estimated regression coefficients. */ static void -reg_stats_coeff (linreg * c, void *aux_, const struct variable *var) +reg_stats_coeff (const struct regression *cmd, const struct linreg *c, + const struct model_container *mc, const gsl_matrix *cov, + const struct variable *var) { - size_t j; - int n_cols = 7; - int n_rows; - int this_row; - double t_stat; - double pval; - double std_err; - double beta; - const char *label; - - const struct variable *v; - struct tab_table *t; - gsl_matrix *cov = aux_; - - assert (c != NULL); - n_rows = linreg_n_coeffs (c) + 3; - - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("B")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Beta")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t")); - tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(Constant)")); - tab_double (t, 2, 1, 0, linreg_intercept (c), NULL); - std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0)); - tab_double (t, 3, 1, 0, std_err, NULL); - tab_double (t, 4, 1, 0, 0.0, NULL); - t_stat = linreg_intercept (c) / std_err; - tab_double (t, 5, 1, 0, t_stat, NULL); - pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (linreg_n_obs (c) - linreg_n_coeffs (c))); - tab_double (t, 6, 1, 0, pval, NULL); - for (j = 0; j < linreg_n_coeffs (c); j++) + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Coefficients (%s)"), var_to_string (var)), + "Coefficients"); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Statistics")); + pivot_category_create_group (statistics->root, + N_("Unstandardized Coefficients"), + N_("B"), N_("Std. Error")); + pivot_category_create_group (statistics->root, + N_("Standardized Coefficients"), N_("Beta")); + pivot_category_create_leaves (statistics->root, N_("t"), + N_("Sig."), PIVOT_RC_SIGNIFICANCE); + if (cmd->stats & STATS_CI) { - struct string tstr; - ds_init_empty (&tstr); - this_row = j + 2; - - v = linreg_indep_var (c, j); - label = var_to_string (v); - /* Do not overwrite the variable's name. */ - ds_put_cstr (&tstr, label); - tab_text (t, 1, this_row, TAB_CENTER, ds_cstr (&tstr)); - /* - Regression coefficients. - */ - tab_double (t, 2, this_row, 0, linreg_coeff (c, j), NULL); - /* - Standard error of the coefficients. - */ - std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)); - tab_double (t, 3, this_row, 0, std_err, NULL); - /* - Standardized coefficient, i.e., regression coefficient - if all variables had unit variance. - */ - beta = sqrt (gsl_matrix_get (cov, j, j)); - beta *= linreg_coeff (c, j) / - sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1)); - tab_double (t, 4, this_row, 0, beta, NULL); - - /* - Test statistic for H0: coefficient is 0. - */ - t_stat = linreg_coeff (c, j) / std_err; - tab_double (t, 5, this_row, 0, t_stat, NULL); - /* - P values for the test statistic above. - */ - pval = - 2 * gsl_cdf_tdist_Q (fabs (t_stat), - (double) (linreg_n_obs (c) - linreg_n_coeffs (c) - 1)); - tab_double (t, 6, this_row, 0, pval, NULL); - ds_destroy (&tstr); + struct pivot_category *interval = pivot_category_create_group__ ( + statistics->root, pivot_value_new_text_format ( + N_("%g%% Confidence Interval for B"), + cmd->ci * 100.0)); + pivot_category_create_leaves (interval, N_("Lower Bound"), + N_("Upper Bound")); } - tab_title (t, _("Coefficients (%s)"), var_to_string (var)); - tab_submit (t); -} - -/* - Display the ANOVA table. -*/ -static void -reg_stats_anova (linreg * c, void *aux UNUSED, const struct variable *var) -{ - int n_cols = 7; - int n_rows = 4; - const double msm = linreg_ssreg (c) / linreg_dfmodel (c); - const double mse = linreg_mse (c); - const double F = msm / mse; - const double pval = gsl_cdf_fdist_Q (F, c->dfm, c->dfe); - struct tab_table *t; + if (cmd->stats & STATS_TOL) + pivot_category_create_group (statistics->root, + N_("Collinearity Statistics"), + N_("Tolerance"), N_("VIF")); - assert (c != NULL); - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variables")); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); + double df = linreg_n_obs (c) - linreg_n_coeffs (c) - 1; + double q = (1 - cmd->ci) / 2.0; /* 2-tailed test */ + double tval = gsl_cdf_tdist_Qinv (q, df); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F")); - tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); + if (!cmd->origin) + { + int var_idx = pivot_category_create_leaf ( + variables->root, pivot_value_new_text (N_("(Constant)"))); + + double std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0)); + double t_stat = linreg_intercept (c) / std_err; + double base_entries[] = { + linreg_intercept (c), + std_err, + 0.0, + t_stat, + 2.0 * gsl_cdf_tdist_Q (fabs (t_stat), + linreg_n_obs (c) - linreg_n_coeffs (c)), + }; + + size_t col = 0; + for (size_t i = 0; i < sizeof base_entries / sizeof *base_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (base_entries[i])); + + if (cmd->stats & STATS_CI) + { + double interval_entries[] = { + linreg_intercept (c) - tval * std_err, + linreg_intercept (c) + tval * std_err, + }; + + for (size_t i = 0; i < sizeof interval_entries / sizeof *interval_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (interval_entries[i])); + } + } - tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Regression")); - tab_text (t, 1, 2, TAB_LEFT | TAT_TITLE, _("Residual")); - tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total")); + for (size_t j = 0; j < linreg_n_coeffs (c); j++) + { + const struct variable *v = linreg_indep_var (c, j); + int var_idx = pivot_category_create_leaf ( + variables->root, pivot_value_new_variable (v)); + + double std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)); + double t_stat = linreg_coeff (c, j) / std_err; + double base_entries[] = { + linreg_coeff (c, j), + sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1)), + (sqrt (gsl_matrix_get (cov, j, j)) * linreg_coeff (c, j) / + sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1))), + t_stat, + 2 * gsl_cdf_tdist_Q (fabs (t_stat), df) + }; + + size_t col = 0; + for (size_t i = 0; i < sizeof base_entries / sizeof *base_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (base_entries[i])); + + if (cmd->stats & STATS_CI) + { + double interval_entries[] = { + linreg_coeff (c, j) - tval * std_err, + linreg_coeff (c, j) + tval * std_err, + }; - /* Sums of Squares */ - tab_double (t, 2, 1, 0, linreg_ssreg (c), NULL); - tab_double (t, 2, 3, 0, linreg_sst (c), NULL); - tab_double (t, 2, 2, 0, linreg_sse (c), NULL); + for (size_t i = 0; i < sizeof interval_entries / sizeof *interval_entries; i++) + pivot_table_put2 (table, col++, var_idx, + pivot_value_new_number (interval_entries[i])); + } - /* Degrees of freedom */ - tab_text_format (t, 3, 1, TAB_RIGHT, "%g", c->dfm); - tab_text_format (t, 3, 2, TAB_RIGHT, "%g", c->dfe); - tab_text_format (t, 3, 3, TAB_RIGHT, "%g", c->dft); + if (cmd->stats & STATS_TOL) + { + { + struct linreg *m = mc[j].models[0]; + double rsq = linreg_ssreg (m) / linreg_sst (m); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 - rsq)); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 / (1.0 - rsq))); + } + } + } - /* Mean Squares */ - tab_double (t, 4, 1, TAB_RIGHT, msm, NULL); - tab_double (t, 4, 2, TAB_RIGHT, mse, NULL); + pivot_table_submit (table); +} - tab_double (t, 5, 1, 0, F, NULL); +/* + Display the ANOVA table. +*/ +static void +reg_stats_anova (const struct linreg * c, const struct variable *var) +{ + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("ANOVA (%s)"), var_to_string (var)), + "ANOVA"); - tab_double (t, 6, 1, 0, pval, NULL); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("Sum of Squares"), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_INTEGER, + N_("Mean Square"), PIVOT_RC_OTHER, + N_("F"), PIVOT_RC_OTHER, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); - tab_title (t, _("ANOVA (%s)"), var_to_string (var)); - tab_submit (t); -} + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Source"), + N_("Regression"), N_("Residual"), N_("Total")); + double msm = linreg_ssreg (c) / linreg_dfmodel (c); + double mse = linreg_mse (c); + double F = msm / mse; -static void -reg_stats_bcov (linreg * c, void *aux UNUSED, const struct variable *var) -{ - int n_cols; - int n_rows; - int i; - int k; - int row; - int col; - const char *label; - struct tab_table *t; - - assert (c != NULL); - n_cols = c->n_indeps + 1 + 2; - n_rows = 2 * (c->n_indeps + 1); - t = tab_create (n_cols, n_rows); - tab_headers (t, 2, 0, 1, 0); - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, 1); - tab_vline (t, TAL_2, 2, 0, n_rows - 1); - tab_vline (t, TAL_0, 1, 0, 0); - tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Model")); - tab_text (t, 1, 1, TAB_CENTER | TAT_TITLE, _("Covariances")); - for (i = 0; i < linreg_n_coeffs (c); i++) + struct entry { - const struct variable *v = linreg_indep_var (c, i); - label = var_to_string (v); - tab_text (t, 2, i, TAB_CENTER, label); - tab_text (t, i + 2, 0, TAB_CENTER, label); - for (k = 1; k < linreg_n_coeffs (c); k++) - { - col = (i <= k) ? k : i; - row = (i <= k) ? i : k; - tab_double (t, k + 2, i, TAB_CENTER, - gsl_matrix_get (c->cov, row, col), NULL); - } + int stat_idx; + int source_idx; + double x; + } + entries[] = { + /* Sums of Squares. */ + { 0, 0, linreg_ssreg (c) }, + { 0, 1, linreg_sse (c) }, + { 0, 2, linreg_sst (c) }, + /* Degrees of freedom. */ + { 1, 0, linreg_dfmodel (c) }, + { 1, 1, linreg_dferror (c) }, + { 1, 2, linreg_dftotal (c) }, + /* Mean Squares. */ + { 2, 0, msm }, + { 2, 1, mse }, + /* F */ + { 3, 0, F }, + /* Significance. */ + { 4, 0, gsl_cdf_fdist_Q (F, linreg_dfmodel (c), linreg_dferror (c)) }, + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + { + const struct entry *e = &entries[i]; + pivot_table_put2 (table, e->stat_idx, e->source_idx, + pivot_value_new_number (e->x)); } - tab_title (t, _("Coefficient Correlations (%s)"), var_to_string (var)); - tab_submit (t); + + pivot_table_submit (table); } + static void -statistics_keyword_output (void (*function) (linreg *, void *, const struct variable *var), - bool keyword, linreg * c, void *aux, const struct variable *var) +reg_stats_bcov (const struct linreg * c, const struct variable *var) { - if (keyword) + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_text_format (N_("Coefficient Correlations (%s)"), + var_to_string (var)), + "Coefficient Correlations"); + + for (size_t i = 0; i < 2; i++) { - (*function) (c, aux, var); + struct pivot_dimension *models = pivot_dimension_create ( + table, i ? PIVOT_AXIS_ROW : PIVOT_AXIS_COLUMN, N_("Models")); + for (size_t j = 0; j < linreg_n_coeffs (c); j++) + pivot_category_create_leaf ( + models->root, pivot_value_new_variable ( + linreg_indep_var (c, j))); } + + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"), + N_("Covariances")); + + for (size_t i = 0; i < linreg_n_coeffs (c); i++) + for (size_t k = 0; k < linreg_n_coeffs (c); k++) + { + double cov = gsl_matrix_get (linreg_cov (c), MIN (i, k), MAX (i, k)); + pivot_table_put3 (table, k, i, 0, pivot_value_new_number (cov)); + } + + pivot_table_submit (table); }