From cfb1b82ee049a562ea4c5b0784a45d5e324d35cb Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 29 Jun 2013 07:44:17 +0200 Subject: [PATCH] Partial fix for regression vs. splits issue Split the struct regression into information which is parsed from the command, and data which is calculated during execution of the command. Bug #39070 --- src/language/stats/regression.c | 166 +++++++++++++++++--------------- src/math/linreg.c | 6 +- src/math/linreg.h | 15 +-- 3 files changed, 98 insertions(+), 89 deletions(-) diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index a4e9619d95..a9d1a50b63 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -69,12 +69,15 @@ struct regression bool resid; bool pred; +}; +struct regression_workspace +{ linreg **models; }; - static void run_regression (const struct regression *cmd, + linreg **models, struct casereader *input); @@ -85,9 +88,8 @@ static void run_regression (const struct regression *cmd, */ struct reg_trns { - int n_trns; /* Number of transformations. */ - int trns_id; /* Which trns is this one? */ linreg *c; /* Linear model for this trns. */ + const struct variable *var; }; /* @@ -100,7 +102,7 @@ regression_trns_pred_proc (void *t_, struct ccase **c, size_t i; size_t n_vals; struct reg_trns *trns = t_; - linreg *model; + const linreg *model; union value *output = NULL; const union value *tmp; double *vals; @@ -110,14 +112,13 @@ regression_trns_pred_proc (void *t_, struct ccase **c, model = trns->c; assert (model != NULL); assert (model->depvar != NULL); - assert (model->pred != NULL); vars = linreg_get_vars (model); n_vals = linreg_n_coeffs (model); vals = xnmalloc (n_vals, sizeof (*vals)); *c = case_unshare (*c); - output = case_data_rw (*c, model->pred); + output = case_data_rw (*c, trns->var); for (i = 0; i < n_vals; i++) { @@ -139,7 +140,7 @@ regression_trns_resid_proc (void *t_, struct ccase **c, size_t i; size_t n_vals; struct reg_trns *trns = t_; - linreg *model; + const linreg *model; union value *output = NULL; const union value *tmp; double *vals = NULL; @@ -150,14 +151,13 @@ regression_trns_resid_proc (void *t_, struct ccase **c, model = trns->c; assert (model != NULL); assert (model->depvar != NULL); - assert (model->resid != NULL); vars = linreg_get_vars (model); n_vals = linreg_n_coeffs (model); vals = xnmalloc (n_vals, sizeof (*vals)); *c = case_unshare (*c); - output = case_data_rw (*c, model->resid); + output = case_data_rw (*c, trns->var); assert (output != NULL); for (i = 0; i < n_vals; i++) @@ -199,78 +199,85 @@ regression_trns_free (void *t_) { struct reg_trns *t = t_; - if (t->trns_id == t->n_trns) - { - linreg_unref (t->c); - } + linreg_unref (t->c); + free (t); return true; } -static void -reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, - linreg * c, struct variable **v, int n_trns) + +static const struct variable * +create_aux_var (struct dataset *ds, const char *prefix) { + struct variable *var; struct dictionary *dict = dataset_dict (ds); - static int trns_index = 1; - char *name; - struct variable *new_var; - struct reg_trns *t = NULL; + char *name = reg_get_name (dict, prefix); + var = dict_create_var_assert (dict, name, 0); + free (name); + return var; +} - t = xmalloc (sizeof (*t)); - t->trns_id = trns_index; - t->n_trns = n_trns; +static void +reg_save_var (struct dataset *ds, trns_proc_func * f, + const struct variable *var, + linreg *c) +{ + struct reg_trns *t = xmalloc (sizeof (*t)); t->c = c; + t->var = var; + linreg_ref (c); - name = reg_get_name (dict, prefix); - new_var = dict_create_var_assert (dict, name, 0); - free (name); - - *v = new_var; add_transformation (ds, f, regression_trns_free, t); - trns_index++; } static void -subcommand_save (const struct regression *cmd) +subcommand_save (const struct regression *cmd, + struct regression_workspace *workspace, + size_t n_m) { - linreg **lc; - int n_trns = 0; - - if (cmd->resid) - n_trns++; - if (cmd->pred) - n_trns++; - - n_trns *= cmd->n_dep_vars; - - for (lc = cmd->models; lc < cmd->models + cmd->n_dep_vars; lc++) + int i; + for (i = 0; i < cmd->n_dep_vars; ++i) { - if (*lc != NULL) - { - if ((*lc)->depvar != NULL) - { - (*lc)->refcnt++; - if (cmd->resid) - { - reg_save_var (cmd->ds, "RES", regression_trns_resid_proc, - *lc, &(*lc)->resid, n_trns); - } - if (cmd->pred) - { - reg_save_var (cmd->ds, "PRED", regression_trns_pred_proc, - *lc, &(*lc)->pred, n_trns); - } - } - } + int w; + const struct variable *resvar = NULL; + const struct variable *predvar = NULL; + + if (cmd->resid) + resvar = create_aux_var (cmd->ds, "RES"); + + if (cmd->pred) + predvar = create_aux_var (cmd->ds, "PRED"); + + for (w = 0 ; w < n_m; ++w) + { + linreg **models = workspace[w].models; + linreg *lc = models[i]; + if (lc == NULL) + continue; + + if (lc->depvar == NULL) + continue; + + if (cmd->resid) + { + reg_save_var (cmd->ds, regression_trns_resid_proc, resvar, lc); + } + + if (cmd->pred) + { + reg_save_var (cmd->ds, regression_trns_pred_proc, predvar, lc); + } + } } } int cmd_regression (struct lexer *lexer, struct dataset *ds) { - int k; + int w; + struct regression_workspace *workspace = NULL; + size_t n_workspaces = 0; struct regression regression; const struct dictionary *dict = dataset_dict (ds); bool save; @@ -391,9 +398,6 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) } - regression.models = - xcalloc (regression.n_dep_vars, sizeof *regression.models); - save = regression.pred || regression.resid; if (save) { @@ -410,31 +414,45 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) grouper = casegrouper_create_splits (proc_open_filtering (ds, !save), dict); while (casegrouper_get_next_group (grouper, &group)) - run_regression (®ression, group); + { + workspace = xrealloc (workspace, sizeof (workspace) * (n_workspaces + 1)); + workspace[n_workspaces].models = xcalloc (regression.n_dep_vars, sizeof (linreg *)); + run_regression (®ression, workspace[n_workspaces++].models, group); + } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } if (save) { - subcommand_save (®ression); + subcommand_save (®ression, workspace, n_workspaces); } + for (w = 0 ; w < n_workspaces; ++w) + { + int i; + linreg **models = workspace[w].models; + for (i = 0; i < regression.n_dep_vars; ++i) + linreg_unref (models[i]); + free (models); + } + free (workspace); - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); free (regression.vars); free (regression.dep_vars); return CMD_SUCCESS; error: - if (regression.models) + for (w = 0 ; w < n_workspaces; ++w) { - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); + int i; + linreg **models = workspace[w].models; + for (i = 0; i < regression.n_dep_vars; ++i) + linreg_unref (models[i]); + free (models); } + free (workspace); + free (regression.vars); free (regression.dep_vars); return CMD_FAILURE; @@ -631,7 +649,7 @@ subcommand_statistics (const struct regression *cmd, linreg * c, void *aux, static void -run_regression (const struct regression *cmd, struct casereader *input) +run_regression (const struct regression *cmd, linreg **models, struct casereader *input) { size_t i; int n_indep = 0; @@ -644,8 +662,6 @@ run_regression (const struct regression *cmd, struct casereader *input) struct casereader *reader; size_t n_all_vars; - linreg **models = cmd->models; - n_all_vars = get_n_all_vars (cmd); all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); fill_all_vars (all_vars, cmd); @@ -706,8 +722,6 @@ run_regression (const struct regression *cmd, struct casereader *input) else { msg (SE, _("No valid data found. This command was skipped.")); - linreg_unref (models[k]); - models[k] = NULL; } gsl_matrix_free (this_cm); } diff --git a/src/math/linreg.c b/src/math/linreg.c index 92617aeadf..be43735785 100644 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@ -100,13 +100,13 @@ linreg_alloc (const struct variable *depvar, const struct variable **indep_vars, Default settings. */ c->method = LINREG_SWEEP; - c->pred = NULL; - c->resid = NULL; c->refcnt = 1; + return c; } + void linreg_ref (linreg *c) { @@ -116,7 +116,7 @@ linreg_ref (linreg *c) void linreg_unref (linreg *c) { - if (c && --c->refcnt == 0) + if (--c->refcnt == 0) { gsl_vector_free (c->indep_means); gsl_vector_free (c->indep_std); diff --git a/src/math/linreg.h b/src/math/linreg.h index c4bec321b6..72fe447b5b 100644 --- a/src/math/linreg.h +++ b/src/math/linreg.h @@ -90,7 +90,6 @@ typedef struct pspp_linreg_opts_struct pspp_linreg_opts; struct linreg_struct { - int refcnt; double n_obs; /* Number of observations. */ int n_indeps; /* Number of independent variables. */ int n_coeffs; /* The intercept is not considered a @@ -139,9 +138,8 @@ struct linreg_struct double dfe; double dfm; - struct variable *pred; - struct variable *resid; int dependent_column; /* Column containing the dependent variable. Defaults to last column. */ + int refcnt; }; typedef struct linreg_struct linreg; @@ -152,20 +150,17 @@ linreg *linreg_alloc (const struct variable *, const struct variable **, double, size_t); void linreg_unref (linreg *); -void linreg_ref (linreg *c); +void linreg_ref (linreg *); /* Fit the linear model via least squares. All pointers passed to pspp_linreg are assumed to be allocated to the correct size and initialized to the values as indicated by opts. */ -void -linreg_fit (const gsl_matrix *, linreg *); +void linreg_fit (const gsl_matrix *, linreg *); -double -linreg_predict (const linreg *, const double *, size_t); -double -linreg_residual (const linreg *, double, const double *, size_t); +double linreg_predict (const linreg *, const double *, size_t); +double linreg_residual (const linreg *, double, const double *, size_t); const struct variable ** linreg_get_vars (const linreg *); /* -- 2.30.2