From: John Darrington Date: Sat, 29 Jun 2013 05:44:17 +0000 (+0200) Subject: Partial fix for regression vs. splits issue X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fbuilds%2F20130629010504%2Fpspp;p=pspp Partial fix for regression vs. splits issue Split the struct regression into information which is parsed from the command, and data which is calculated during execution of the command. Bug #39070 --- diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index a4e9619d95..a9d1a50b63 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -69,12 +69,15 @@ struct regression bool resid; bool pred; +}; +struct regression_workspace +{ linreg **models; }; - static void run_regression (const struct regression *cmd, + linreg **models, struct casereader *input); @@ -85,9 +88,8 @@ static void run_regression (const struct regression *cmd, */ struct reg_trns { - int n_trns; /* Number of transformations. */ - int trns_id; /* Which trns is this one? */ linreg *c; /* Linear model for this trns. */ + const struct variable *var; }; /* @@ -100,7 +102,7 @@ regression_trns_pred_proc (void *t_, struct ccase **c, size_t i; size_t n_vals; struct reg_trns *trns = t_; - linreg *model; + const linreg *model; union value *output = NULL; const union value *tmp; double *vals; @@ -110,14 +112,13 @@ regression_trns_pred_proc (void *t_, struct ccase **c, model = trns->c; assert (model != NULL); assert (model->depvar != NULL); - assert (model->pred != NULL); vars = linreg_get_vars (model); n_vals = linreg_n_coeffs (model); vals = xnmalloc (n_vals, sizeof (*vals)); *c = case_unshare (*c); - output = case_data_rw (*c, model->pred); + output = case_data_rw (*c, trns->var); for (i = 0; i < n_vals; i++) { @@ -139,7 +140,7 @@ regression_trns_resid_proc (void *t_, struct ccase **c, size_t i; size_t n_vals; struct reg_trns *trns = t_; - linreg *model; + const linreg *model; union value *output = NULL; const union value *tmp; double *vals = NULL; @@ -150,14 +151,13 @@ regression_trns_resid_proc (void *t_, struct ccase **c, model = trns->c; assert (model != NULL); assert (model->depvar != NULL); - assert (model->resid != NULL); vars = linreg_get_vars (model); n_vals = linreg_n_coeffs (model); vals = xnmalloc (n_vals, sizeof (*vals)); *c = case_unshare (*c); - output = case_data_rw (*c, model->resid); + output = case_data_rw (*c, trns->var); assert (output != NULL); for (i = 0; i < n_vals; i++) @@ -199,78 +199,85 @@ regression_trns_free (void *t_) { struct reg_trns *t = t_; - if (t->trns_id == t->n_trns) - { - linreg_unref (t->c); - } + linreg_unref (t->c); + free (t); return true; } -static void -reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, - linreg * c, struct variable **v, int n_trns) + +static const struct variable * +create_aux_var (struct dataset *ds, const char *prefix) { + struct variable *var; struct dictionary *dict = dataset_dict (ds); - static int trns_index = 1; - char *name; - struct variable *new_var; - struct reg_trns *t = NULL; + char *name = reg_get_name (dict, prefix); + var = dict_create_var_assert (dict, name, 0); + free (name); + return var; +} - t = xmalloc (sizeof (*t)); - t->trns_id = trns_index; - t->n_trns = n_trns; +static void +reg_save_var (struct dataset *ds, trns_proc_func * f, + const struct variable *var, + linreg *c) +{ + struct reg_trns *t = xmalloc (sizeof (*t)); t->c = c; + t->var = var; + linreg_ref (c); - name = reg_get_name (dict, prefix); - new_var = dict_create_var_assert (dict, name, 0); - free (name); - - *v = new_var; add_transformation (ds, f, regression_trns_free, t); - trns_index++; } static void -subcommand_save (const struct regression *cmd) +subcommand_save (const struct regression *cmd, + struct regression_workspace *workspace, + size_t n_m) { - linreg **lc; - int n_trns = 0; - - if (cmd->resid) - n_trns++; - if (cmd->pred) - n_trns++; - - n_trns *= cmd->n_dep_vars; - - for (lc = cmd->models; lc < cmd->models + cmd->n_dep_vars; lc++) + int i; + for (i = 0; i < cmd->n_dep_vars; ++i) { - if (*lc != NULL) - { - if ((*lc)->depvar != NULL) - { - (*lc)->refcnt++; - if (cmd->resid) - { - reg_save_var (cmd->ds, "RES", regression_trns_resid_proc, - *lc, &(*lc)->resid, n_trns); - } - if (cmd->pred) - { - reg_save_var (cmd->ds, "PRED", regression_trns_pred_proc, - *lc, &(*lc)->pred, n_trns); - } - } - } + int w; + const struct variable *resvar = NULL; + const struct variable *predvar = NULL; + + if (cmd->resid) + resvar = create_aux_var (cmd->ds, "RES"); + + if (cmd->pred) + predvar = create_aux_var (cmd->ds, "PRED"); + + for (w = 0 ; w < n_m; ++w) + { + linreg **models = workspace[w].models; + linreg *lc = models[i]; + if (lc == NULL) + continue; + + if (lc->depvar == NULL) + continue; + + if (cmd->resid) + { + reg_save_var (cmd->ds, regression_trns_resid_proc, resvar, lc); + } + + if (cmd->pred) + { + reg_save_var (cmd->ds, regression_trns_pred_proc, predvar, lc); + } + } } } int cmd_regression (struct lexer *lexer, struct dataset *ds) { - int k; + int w; + struct regression_workspace *workspace = NULL; + size_t n_workspaces = 0; struct regression regression; const struct dictionary *dict = dataset_dict (ds); bool save; @@ -391,9 +398,6 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) } - regression.models = - xcalloc (regression.n_dep_vars, sizeof *regression.models); - save = regression.pred || regression.resid; if (save) { @@ -410,31 +414,45 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) grouper = casegrouper_create_splits (proc_open_filtering (ds, !save), dict); while (casegrouper_get_next_group (grouper, &group)) - run_regression (®ression, group); + { + workspace = xrealloc (workspace, sizeof (workspace) * (n_workspaces + 1)); + workspace[n_workspaces].models = xcalloc (regression.n_dep_vars, sizeof (linreg *)); + run_regression (®ression, workspace[n_workspaces++].models, group); + } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } if (save) { - subcommand_save (®ression); + subcommand_save (®ression, workspace, n_workspaces); } + for (w = 0 ; w < n_workspaces; ++w) + { + int i; + linreg **models = workspace[w].models; + for (i = 0; i < regression.n_dep_vars; ++i) + linreg_unref (models[i]); + free (models); + } + free (workspace); - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); free (regression.vars); free (regression.dep_vars); return CMD_SUCCESS; error: - if (regression.models) + for (w = 0 ; w < n_workspaces; ++w) { - for (k = 0; k < regression.n_dep_vars; k++) - linreg_unref (regression.models[k]); - free (regression.models); + int i; + linreg **models = workspace[w].models; + for (i = 0; i < regression.n_dep_vars; ++i) + linreg_unref (models[i]); + free (models); } + free (workspace); + free (regression.vars); free (regression.dep_vars); return CMD_FAILURE; @@ -631,7 +649,7 @@ subcommand_statistics (const struct regression *cmd, linreg * c, void *aux, static void -run_regression (const struct regression *cmd, struct casereader *input) +run_regression (const struct regression *cmd, linreg **models, struct casereader *input) { size_t i; int n_indep = 0; @@ -644,8 +662,6 @@ run_regression (const struct regression *cmd, struct casereader *input) struct casereader *reader; size_t n_all_vars; - linreg **models = cmd->models; - n_all_vars = get_n_all_vars (cmd); all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); fill_all_vars (all_vars, cmd); @@ -706,8 +722,6 @@ run_regression (const struct regression *cmd, struct casereader *input) else { msg (SE, _("No valid data found. This command was skipped.")); - linreg_unref (models[k]); - models[k] = NULL; } gsl_matrix_free (this_cm); } diff --git a/src/math/linreg.c b/src/math/linreg.c index 92617aeadf..be43735785 100644 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@ -100,13 +100,13 @@ linreg_alloc (const struct variable *depvar, const struct variable **indep_vars, Default settings. */ c->method = LINREG_SWEEP; - c->pred = NULL; - c->resid = NULL; c->refcnt = 1; + return c; } + void linreg_ref (linreg *c) { @@ -116,7 +116,7 @@ linreg_ref (linreg *c) void linreg_unref (linreg *c) { - if (c && --c->refcnt == 0) + if (--c->refcnt == 0) { gsl_vector_free (c->indep_means); gsl_vector_free (c->indep_std); diff --git a/src/math/linreg.h b/src/math/linreg.h index c4bec321b6..72fe447b5b 100644 --- a/src/math/linreg.h +++ b/src/math/linreg.h @@ -90,7 +90,6 @@ typedef struct pspp_linreg_opts_struct pspp_linreg_opts; struct linreg_struct { - int refcnt; double n_obs; /* Number of observations. */ int n_indeps; /* Number of independent variables. */ int n_coeffs; /* The intercept is not considered a @@ -139,9 +138,8 @@ struct linreg_struct double dfe; double dfm; - struct variable *pred; - struct variable *resid; int dependent_column; /* Column containing the dependent variable. Defaults to last column. */ + int refcnt; }; typedef struct linreg_struct linreg; @@ -152,20 +150,17 @@ linreg *linreg_alloc (const struct variable *, const struct variable **, double, size_t); void linreg_unref (linreg *); -void linreg_ref (linreg *c); +void linreg_ref (linreg *); /* Fit the linear model via least squares. All pointers passed to pspp_linreg are assumed to be allocated to the correct size and initialized to the values as indicated by opts. */ -void -linreg_fit (const gsl_matrix *, linreg *); +void linreg_fit (const gsl_matrix *, linreg *); -double -linreg_predict (const linreg *, const double *, size_t); -double -linreg_residual (const linreg *, double, const double *, size_t); +double linreg_predict (const linreg *, const double *, size_t); +double linreg_residual (const linreg *, double, const double *, size_t); const struct variable ** linreg_get_vars (const linreg *); /*