X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.q;h=7b02e4b9dcfdaed3dc669efdbc5129071cc28d97;hb=c806348911641248369536c11e48ed59757865da;hp=d5008b5681178b5890584ca08b20ee04169a6e31;hpb=26791c51431aa1b848b6e3997d2402680513c448;p=pspp diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index d5008b5681..7b02e4b9dc 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -92,9 +92,6 @@ struct moments_var const struct variable *v; }; -/* Linear regression models. */ -static pspp_linreg_cache **models = NULL; - /* Transformations for saving predicted values and residuals, etc. @@ -122,7 +119,7 @@ static size_t n_variables; static struct file_handle *model_file; static bool run_regression (struct casereader *, struct cmd_regression *, - struct dataset *); + struct dataset *, pspp_linreg_cache **); /* STATISTICS subcommand output functions. @@ -948,6 +945,7 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) { struct casegrouper *grouper; struct casereader *group; + pspp_linreg_cache **models; bool ok; size_t i; @@ -963,13 +961,15 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) /* Data pass. */ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) - run_regression (group, &cmd, ds); + run_regression (group, &cmd, ds, models); ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; subcommand_save (ds, cmd.sbc_save, models); free (v_variables); free (models); + free_regression (&cmd); + return ok ? CMD_SUCCESS : CMD_FAILURE; } @@ -1021,7 +1021,19 @@ identify_indep_vars (const struct variable **indep_vars, for (i = 0; i < n_variables; i++) if (!is_depvar (i, depvar)) indep_vars[n_indep_vars++] = v_variables[i]; - + if ((n_indep_vars < 2) && is_depvar (0, depvar)) + { + /* + There is only one independent variable, and it is the same + as the dependent variable. Print a warning and continue. + */ + msg (SE, + gettext ("The dependent variable is equal to the independent variable." + "The least squares line is therefore Y=X." + "Standard errors and related statistics may be meaningless.")); + n_indep_vars = 1; + indep_vars[0] = v_variables[0]; + } return n_indep_vars; } @@ -1036,6 +1048,9 @@ prepare_categories (struct casereader *input, struct ccase c; size_t i; + assert (vars != NULL); + assert (mom != NULL); + for (i = 0; i < n_vars; i++) if (var_is_alpha (vars[i])) cat_stored_values_create (vars[i]); @@ -1107,7 +1122,7 @@ compute_moments (pspp_linreg_cache * c, struct moments_var *mom, static bool run_regression (struct casereader *input, struct cmd_regression *cmd, - struct dataset *ds) + struct dataset *ds, pspp_linreg_cache **models) { size_t i; int n_indep = 0; @@ -1123,7 +1138,10 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, assert (models != NULL); if (!casereader_peek (input, 0, &c)) - return true; + { + casereader_destroy (input); + return true; + } output_split_file_values (ds, &c); case_destroy (&c); @@ -1163,7 +1181,6 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, dep_var = cmd->v_dependent[k]; n_indep = identify_indep_vars (indep_vars, dep_var); - reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, indep_vars, n_indep, MV_ANY, NULL); @@ -1184,8 +1201,6 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, lopts.get_indep_mean_std[i] = 1; } models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2); - models[k]->indep_means = gsl_vector_alloc (X->m->size2); - models[k]->indep_std = gsl_vector_alloc (X->m->size2); models[k]->depvar = dep_var; /* For large data sets, use QR decomposition. @@ -1241,6 +1256,11 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, } casereader_destroy (reader); } + for (i = 0; i < n_variables; i++) + { + moments1_destroy ((mom + i)->m); + } + free (mom); free (indep_vars); free (lopts.get_indep_mean_std); casereader_destroy (input);