X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.c;h=b4448dd9e58ead03cb61cf3d5e5b3c97201b1edf;hb=60c545e6e958d868db3399a8989d37d8f9e0c131;hp=3e0871df9eb176f819ccfad6ae2917f33aaa2f6b;hpb=d983263edf596ae855af70d0580b7406f8be4268;p=pspp diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index 3e0871df9e..b4448dd9e5 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014, - 2016, 2017 Free Software Foundation, Inc. + 2016, 2017, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,7 +60,7 @@ #define STATS_OUTS 8 #define STATS_CI 16 #define STATS_BCOV 32 -#define STATS_COLLIN 64 +#define STATS_TOL 64 #define STATS_DEFAULT (STATS_R | STATS_COEFF | STATS_ANOVA | STATS_OUTS) @@ -159,7 +159,7 @@ save_trans_free (void *aux) return true; } -static int +static enum trns_result save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) { struct save_trans_data *save_trans_data = aux; @@ -175,14 +175,14 @@ save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) { if (ws->pred_idx != -1) { - double pred = case_data_idx (in, ws->extras * k + ws->pred_idx)->f; - case_data_rw (*c, ws->predvars[k])->f = pred; + double pred = case_num_idx (in, ws->extras * k + ws->pred_idx); + *case_num_rw (*c, ws->predvars[k]) = pred; } if (ws->res_idx != -1) { - double resid = case_data_idx (in, ws->extras * k + ws->res_idx)->f; - case_data_rw (*c, ws->residvars[k])->f = resid; + double resid = case_num_idx (in, ws->extras * k + ws->res_idx); + *case_num_rw (*c, ws->residvars[k]) = resid; } } case_unref (in); @@ -191,7 +191,6 @@ save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) return TRNS_CONTINUE; } - int cmd_regression (struct lexer *lexer, struct dataset *ds) { @@ -309,9 +308,9 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) { statistics |= STATS_BCOV; } - else if (lex_match_id (lexer, "COLLIN")) + else if (lex_match_id (lexer, "TOL")) { - statistics |= STATS_COLLIN; + statistics |= STATS_TOL; } else if (lex_match_id (lexer, "CI")) { @@ -450,7 +449,12 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) memcpy (save_trans_data->ws, &workspace, sizeof (workspace)); save_trans_data->n_dep_vars = regression.n_dep_vars; - add_transformation (ds, save_trans_func, save_trans_free, save_trans_data); + static const struct trns_class trns_class = { + .name = "REGRESSION", + .execute = save_trans_func, + .destroy = save_trans_free, + }; + add_transformation (ds, &trns_class, save_trans_data); } @@ -649,32 +653,35 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, +struct model_container +{ + struct linreg **models; +}; + /* STATISTICS subcommand output functions. */ static void reg_stats_r (const struct linreg *, const struct variable *); -static void reg_stats_coeff (const struct regression *, const struct regression_workspace *, - const struct linreg *, const struct linreg *, - const gsl_matrix *, const struct variable *); +static void reg_stats_coeff (const struct regression *, const struct linreg *, + const struct model_container *, const gsl_matrix *, + const struct variable *); static void reg_stats_anova (const struct linreg *, const struct variable *); static void reg_stats_bcov (const struct linreg *, const struct variable *); static struct linreg ** run_regression_get_models (const struct regression *cmd, - struct regression_workspace *ws, struct casereader *input, bool output) { size_t i; - struct linreg **models = NULL; - struct linreg **models_x = NULL; + struct model_container *model_container = XCALLOC (cmd->n_vars, struct model_container); struct ccase *c; struct covariance *cov; struct casereader *reader; - if (cmd->stats & STATS_COLLIN) + if (cmd->stats & STATS_TOL) { for (i = 0; i < cmd->n_vars; i++) { @@ -692,24 +699,23 @@ run_regression_get_models (const struct regression *cmd, subreg.resid = false; subreg.pred = false; - struct regression_workspace subws; - subws.extras = 0; - subws.res_idx = -1; - subws.pred_idx = -1; - subws.writer = NULL; - subws.reader = NULL; - subws.residvars = NULL; - subws.predvars = NULL; - - models_x = run_regression_get_models (&subreg, &subws, input, false); + model_container[i].models = + run_regression_get_models (&subreg, input, false); + free (subreg.vars); + free (subreg.dep_vars); } } size_t n_all_vars = get_n_all_vars (cmd); const struct variable **all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); - double *means = xnmalloc (n_all_vars, sizeof (*means)); - + /* In the (rather pointless) case where the dependent variable is + the independent variable, n_all_vars == 1. + However this would result in a buffer overflow so we must + over-allocate the space required in this malloc call. + See bug #58599 */ + double *means = xnmalloc (n_all_vars <= 1 ? 2 : n_all_vars, + sizeof (*means)); fill_all_vars (all_vars, cmd); cov = covariance_1pass_create (n_all_vars, all_vars, dict_get_weight (dataset_dict (cmd->ds)), @@ -718,9 +724,7 @@ run_regression_get_models (const struct regression *cmd, reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, all_vars, n_all_vars, MV_ANY, NULL, NULL); - - - { +{ struct casereader *r = casereader_clone (reader); for (; (c = casereader_read (r)) != NULL; case_unref (c)) @@ -730,7 +734,7 @@ run_regression_get_models (const struct regression *cmd, casereader_destroy (r); } - models = xcalloc (cmd->n_dep_vars, sizeof (*models)); + struct linreg **models = XCALLOC (cmd->n_dep_vars, struct linreg*); for (int k = 0; k < cmd->n_dep_vars; k++) { @@ -762,32 +766,33 @@ run_regression_get_models (const struct regression *cmd, reg_stats_anova (models[k], dep_var); if (cmd->stats & STATS_COEFF) - reg_stats_coeff (cmd, ws, models[k], - models_x ? models_x[k] : NULL, + reg_stats_coeff (cmd, models[k], + model_container, cov_matrix, dep_var); if (cmd->stats & STATS_BCOV) reg_stats_bcov (models[k], dep_var); - } + } } else { msg (SE, _("No valid data found. This command was skipped.")); } free (vars); + gsl_matrix_free (cov_matrix); } - casereader_destroy (reader); - - if (models_x) + for (int i = 0; i < cmd->n_vars; i++) { - for (int k = 0; k < cmd->n_dep_vars; k++) - linreg_unref (models_x[k]); - - free (models_x); + if (model_container[i].models) + { + linreg_unref (model_container[i].models[0]); + } + free (model_container[i].models); } + free (model_container); free (all_vars); free (means); @@ -800,7 +805,7 @@ run_regression (const struct regression *cmd, struct regression_workspace *ws, struct casereader *input) { - struct linreg **models = run_regression_get_models (cmd, ws, input, true); + struct linreg **models = run_regression_get_models (cmd, input, true); if (ws->extras > 0) { @@ -825,14 +830,14 @@ run_regression (const struct regression *cmd, if (cmd->pred) { double pred = linreg_predict (models[k], vals, n_indep); - case_data_rw_idx (outc, k * ws->extras + ws->pred_idx)->f = pred; + *case_num_rw_idx (outc, k * ws->extras + ws->pred_idx) = pred; } if (cmd->resid) { - double obs = case_data (c, linreg_dep_var (models[k]))->f; + double obs = case_num (c, linreg_dep_var (models[k])); double res = linreg_residual (models[k], obs, vals, n_indep); - case_data_rw_idx (outc, k * ws->extras + ws->res_idx)->f = res; + *case_num_rw_idx (outc, k * ws->extras + ws->res_idx) = res; } free (vals); free (vars); @@ -859,7 +864,8 @@ reg_stats_r (const struct linreg * c, const struct variable *var) { struct pivot_table *table = pivot_table_create__ ( pivot_value_new_text_format (N_("Model Summary (%s)"), - var_to_string (var))); + var_to_string (var)), + "Model Summary"); pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), N_("R"), N_("R Square"), N_("Adjusted R Square"), @@ -884,13 +890,13 @@ reg_stats_r (const struct linreg * c, const struct variable *var) Table showing estimated regression coefficients. */ static void -reg_stats_coeff (const struct regression *cmd, const struct regression_workspace *ws, - const struct linreg *c, const struct linreg *c_x, - const gsl_matrix *cov, const struct variable *var) +reg_stats_coeff (const struct regression *cmd, const struct linreg *c, + const struct model_container *mc, const gsl_matrix *cov, + const struct variable *var) { struct pivot_table *table = pivot_table_create__ ( - pivot_value_new_text_format (N_("Coefficients (%s)"), - var_to_string (var))); + pivot_value_new_text_format (N_("Coefficients (%s)"), var_to_string (var)), + "Coefficients"); struct pivot_dimension *statistics = pivot_dimension_create ( table, PIVOT_AXIS_COLUMN, N_("Statistics")); @@ -911,7 +917,7 @@ reg_stats_coeff (const struct regression *cmd, const struct regression_workspace N_("Upper Bound")); } - if (cmd->stats & STATS_COLLIN) + if (cmd->stats & STATS_TOL) pivot_category_create_group (statistics->root, N_("Collinearity Statistics"), N_("Tolerance"), N_("VIF")); @@ -993,19 +999,14 @@ reg_stats_coeff (const struct regression *cmd, const struct regression_workspace pivot_value_new_number (interval_entries[i])); } - if (cmd->stats & STATS_COLLIN) + if (cmd->stats & STATS_TOL) { - assert (c_x); - double rsq = linreg_ssreg (c_x) / linreg_sst (c_x); - - double collin_entries[] = { - 1.0 - rsq, - 1.0 / (1.0 - rsq), - }; - - for (size_t i = 0; i < sizeof collin_entries / sizeof *collin_entries; i++) - pivot_table_put2 (table, col++, var_idx, - pivot_value_new_number (collin_entries[i])); + { + struct linreg *m = mc[j].models[0]; + double rsq = linreg_ssreg (m) / linreg_sst (m); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 - rsq)); + pivot_table_put2 (table, col++, var_idx, pivot_value_new_number (1.0 / (1.0 - rsq))); + } } } @@ -1019,7 +1020,8 @@ static void reg_stats_anova (const struct linreg * c, const struct variable *var) { struct pivot_table *table = pivot_table_create__ ( - pivot_value_new_text_format (N_("ANOVA (%s)"), var_to_string (var))); + pivot_value_new_text_format (N_("ANOVA (%s)"), var_to_string (var)), + "ANOVA"); pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), N_("Sum of Squares"), PIVOT_RC_OTHER, @@ -1074,7 +1076,8 @@ reg_stats_bcov (const struct linreg * c, const struct variable *var) { struct pivot_table *table = pivot_table_create__ ( pivot_value_new_text_format (N_("Coefficient Correlations (%s)"), - var_to_string (var))); + var_to_string (var)), + "Coefficient Correlations"); for (size_t i = 0; i < 2; i++) {