X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.c;h=194915198b143ce2fe4b87a181d8f0ad57f50f3d;hb=a1c1a4ca100da5c40fe8637b1d460e61a4a8668e;hp=6cf94c6ad93d3254be30f0ab135c55cface149a6;hpb=3acf6da8d49333d9cc037dfe92a80c1615224a27;p=pspp diff --git a/src/language/stats/regression.c b/src/language/stats/regression.c index 6cf94c6ad9..194915198b 100644 --- a/src/language/stats/regression.c +++ b/src/language/stats/regression.c @@ -72,33 +72,32 @@ struct regression bool pred; }; -struct per_split_ws -{ - linreg **models; -}; - struct regression_workspace { - struct per_split_ws *psw; + /* The new variables which will be introduced by /SAVE */ + const struct variable **predvars; + const struct variable **residvars; + /* A reader/writer pair to temporarily hold the + values of the new variables */ struct casewriter *writer; struct casereader *reader; + /* Indeces of the new values in the reader/writer (-1 if not applicable) */ int res_idx; int pred_idx; - int extras; - const struct variable **predvars; - const struct variable **residvars; + /* 0, 1 or 2 depending on what new variables are to be created */ + int extras; }; static void run_regression (const struct regression *cmd, - struct per_split_ws *psw, struct regression_workspace *ws, struct casereader *input); - +/* Return a string based on PREFIX which may be used as the name + of a new variable in DICT */ static char * reg_get_name (const struct dictionary *dict, const char *prefix) { @@ -127,25 +126,39 @@ create_aux_var (struct dataset *ds, const char *prefix) return var; } -struct thing +/* Auxilliary data for transformation when /SAVE is entered */ +struct save_trans_data { int n_dep_vars; struct regression_workspace *ws; }; +static bool +save_trans_free (void *aux) +{ + struct save_trans_data *save_trans_data = aux; + free (save_trans_data->ws->predvars); + free (save_trans_data->ws->residvars); + + casereader_destroy (save_trans_data->ws->reader); + free (save_trans_data->ws); + free (save_trans_data); + return true; +} + static int -transX (void *aux, struct ccase **c, casenumber x UNUSED) +save_trans_func (void *aux, struct ccase **c, casenumber x UNUSED) { - struct thing *thing = aux; - struct regression_workspace *ws = thing->ws; - const struct ccase *in = casereader_read (ws->reader); + struct save_trans_data *save_trans_data = aux; + struct regression_workspace *ws = save_trans_data->ws; + struct ccase *in = casereader_read (ws->reader); if (in) { int k; *c = case_unshare (*c); - for (k = 0; k < thing->n_dep_vars; ++k) + for (k = 0; k < save_trans_data->n_dep_vars; ++k) { if (ws->pred_idx != -1) { @@ -159,6 +172,7 @@ transX (void *aux, struct ccase **c, casenumber x UNUSED) case_data_rw (*c, ws->residvars[k])->f = resid; } } + case_unref (in); } return TRNS_CONTINUE; @@ -168,12 +182,10 @@ transX (void *aux, struct ccase **c, casenumber x UNUSED) int cmd_regression (struct lexer *lexer, struct dataset *ds) { - int n_splits = 0; struct regression_workspace workspace; struct regression regression; const struct dictionary *dict = dataset_dict (ds); bool save; - workspace.psw = NULL; memset (®ression, 0, sizeof (struct regression)); @@ -209,6 +221,9 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) if (!lex_force_match (lexer, T_EQUALS)) goto error; + free (regression.dep_vars); + regression.n_dep_vars = 0; + if (!parse_variables_const (lexer, dict, ®ression.dep_vars, ®ression.n_dep_vars, @@ -332,10 +347,10 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) "Temporary transformations will be made permanent.")); workspace.writer = autopaging_writer_create (proto); + caseproto_unref (proto); } - n_splits = 0; { struct casegrouper *grouper; struct casereader *group; @@ -346,9 +361,7 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) while (casegrouper_get_next_group (grouper, &group)) { - workspace.psw = xrealloc (workspace.psw, ++n_splits * sizeof (*workspace.psw)); - - run_regression (®ression, &workspace.psw[n_splits - 1], + run_regression (®ression, &workspace, group); @@ -357,19 +370,17 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; } + if (workspace.writer) { - if (workspace.writer) - { - struct thing *thing = xmalloc (sizeof *thing); - struct casereader *r = casewriter_make_reader (workspace.writer); - workspace.writer = NULL; - workspace.reader = r; - thing->ws = xmalloc (sizeof (workspace)); - memcpy (thing->ws, &workspace, sizeof (workspace)); - thing->n_dep_vars = regression.n_dep_vars; + struct save_trans_data *save_trans_data = xmalloc (sizeof *save_trans_data); + struct casereader *r = casewriter_make_reader (workspace.writer); + workspace.writer = NULL; + workspace.reader = r; + save_trans_data->ws = xmalloc (sizeof (workspace)); + memcpy (save_trans_data->ws, &workspace, sizeof (workspace)); + save_trans_data->n_dep_vars = regression.n_dep_vars; - add_transformation (ds, transX, NULL, thing); - } + add_transformation (ds, save_trans_func, save_trans_free, save_trans_data); } @@ -384,7 +395,7 @@ error: return CMD_FAILURE; } - +/* Return the size of the union of dependent and independent variables */ static size_t get_n_all_vars (const struct regression *cmd) { @@ -406,20 +417,21 @@ get_n_all_vars (const struct regression *cmd) return result; } +/* Fill VARS with the union of dependent and independent variables */ static void fill_all_vars (const struct variable **vars, const struct regression *cmd) { + size_t x = 0; size_t i; - size_t j; - bool absent; - for (i = 0; i < cmd->n_vars; i++) { vars[i] = cmd->vars[i]; } + for (i = 0; i < cmd->n_dep_vars; i++) { - absent = true; + size_t j; + bool absent = true; for (j = 0; j < cmd->n_vars; j++) { if (cmd->dep_vars[i] == cmd->vars[j]) @@ -430,7 +442,7 @@ fill_all_vars (const struct variable **vars, const struct regression *cmd) } if (absent) { - vars[i + cmd->n_vars] = cmd->dep_vars[i]; + vars[cmd->n_vars + x++] = cmd->dep_vars[i]; } } } @@ -492,7 +504,7 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, const gsl_matrix *ssize_matrix; double result = 0.0; - gsl_matrix *cm = covariance_calculate_unnormalized (all_cov); + const gsl_matrix *cm = covariance_calculate_unnormalized (all_cov); if (cm == NULL) return 0; @@ -543,44 +555,45 @@ fill_covariance (gsl_matrix * cov, struct covariance *all_cov, gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1, gsl_matrix_get (cm, dep_subscript, dep_subscript)); free (rows); - gsl_matrix_free (cm); return result; } + /* STATISTICS subcommand output functions. */ -static void reg_stats_r (linreg *, void *, const struct variable *); -static void reg_stats_coeff (linreg *, void *, const struct variable *); -static void reg_stats_anova (linreg *, void *, const struct variable *); -static void reg_stats_bcov (linreg *, void *, const struct variable *); - -static void -statistics_keyword_output (void (*) - (linreg *, void *, const struct variable *), bool, - linreg *, void *, const struct variable *); - +static void reg_stats_r (const linreg *, const struct variable *); +static void reg_stats_coeff (const linreg *, const gsl_matrix *, const struct variable *); +static void reg_stats_anova (const linreg *, const struct variable *); +static void reg_stats_bcov (const linreg *, const struct variable *); static void -subcommand_statistics (const struct regression *cmd, linreg * c, void *aux, +subcommand_statistics (const struct regression *cmd, const linreg * c, const gsl_matrix * cm, const struct variable *var) { - statistics_keyword_output (reg_stats_r, cmd->r, c, aux, var); - statistics_keyword_output (reg_stats_anova, cmd->anova, c, aux, var); - statistics_keyword_output (reg_stats_coeff, cmd->coeff, c, aux, var); - statistics_keyword_output (reg_stats_bcov, cmd->bcov, c, aux, var); + if (cmd->r) + reg_stats_r (c, var); + + if (cmd->anova) + reg_stats_anova (c, var); + + if (cmd->coeff) + reg_stats_coeff (c, cm, var); + + if (cmd->bcov) + reg_stats_bcov (c, var); } static void run_regression (const struct regression *cmd, - struct per_split_ws *psw, struct regression_workspace *ws, struct casereader *input) { size_t i; + linreg **models; int k; struct ccase *c; @@ -611,29 +624,28 @@ run_regression (const struct regression *cmd, casereader_destroy (r); } - psw->models = xcalloc (cmd->n_dep_vars, sizeof (*psw->models)); + models = xcalloc (cmd->n_dep_vars, sizeof (*models)); for (k = 0; k < cmd->n_dep_vars; k++) { - const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars)); const struct variable *dep_var = cmd->dep_vars[k]; int n_indep = identify_indep_vars (cmd, vars, dep_var); gsl_matrix *this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1); double n_data = fill_covariance (this_cm, cov, vars, n_indep, dep_var, all_vars, n_all_vars, means); - psw->models[k] = linreg_alloc (dep_var, vars, n_data, n_indep); - psw->models[k]->depvar = dep_var; + models[k] = linreg_alloc (dep_var, vars, n_data, n_indep); + models[k]->depvar = dep_var; for (i = 0; i < n_indep; i++) { - linreg_set_indep_variable_mean (psw->models[k], i, means[i]); + linreg_set_indep_variable_mean (models[k], i, means[i]); } - linreg_set_depvar_mean (psw->models[k], means[i]); + linreg_set_depvar_mean (models[k], means[i]); /* For large data sets, use QR decomposition. */ if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA) { - psw->models[k]->method = LINREG_QR; + models[k]->method = LINREG_QR; } if (n_data > 0) @@ -641,11 +653,11 @@ run_regression (const struct regression *cmd, /* Find the least-squares estimates and other statistics. */ - linreg_fit (this_cm, psw->models[k]); + linreg_fit (this_cm, models[k]); if (!taint_has_tainted_successor (casereader_get_taint (input))) { - subcommand_statistics (cmd, psw->models[k], this_cm, dep_var); + subcommand_statistics (cmd, models[k], this_cm, dep_var); } } else @@ -678,16 +690,18 @@ run_regression (const struct regression *cmd, if (cmd->pred) { - double pred = linreg_predict (psw->models[k], vals, n_indep); + double pred = linreg_predict (models[k], vals, n_indep); case_data_rw_idx (outc, k * ws->extras + ws->pred_idx)->f = pred; } if (cmd->resid) { - double obs = case_data (c, psw->models[k]->depvar)->f; - double res = linreg_residual (psw->models[k], obs, vals, n_indep); + double obs = case_data (c, models[k]->depvar)->f; + double res = linreg_residual (models[k], obs, vals, n_indep); case_data_rw_idx (outc, k * ws->extras + ws->res_idx)->f = res; } + free (vals); + free (vars); } casewriter_write (ws->writer, outc); } @@ -696,19 +710,23 @@ run_regression (const struct regression *cmd, casereader_destroy (reader); + for (k = 0; k < cmd->n_dep_vars; k++) + { + linreg_unref (models[k]); + } + free (models); free (all_vars); free (means); casereader_destroy (input); covariance_destroy (cov); } - - + static void -reg_stats_r (linreg * c, void *aux UNUSED, const struct variable *var) +reg_stats_r (const linreg * c, const struct variable *var) { struct tab_table *t; int n_rows = 2; @@ -745,7 +763,7 @@ reg_stats_r (linreg * c, void *aux UNUSED, const struct variable *var) Table showing estimated regression coefficients. */ static void -reg_stats_coeff (linreg * c, void *aux_, const struct variable *var) +reg_stats_coeff (const linreg * c, const gsl_matrix *cov, const struct variable *var) { size_t j; int n_cols = 7; @@ -759,7 +777,6 @@ reg_stats_coeff (linreg * c, void *aux_, const struct variable *var) const struct variable *v; struct tab_table *t; - gsl_matrix *cov = aux_; assert (c != NULL); n_rows = linreg_n_coeffs (c) + 3; @@ -839,7 +856,7 @@ reg_stats_coeff (linreg * c, void *aux_, const struct variable *var) Display the ANOVA table. */ static void -reg_stats_anova (linreg * c, void *aux UNUSED, const struct variable *var) +reg_stats_anova (const linreg * c, const struct variable *var) { int n_cols = 7; int n_rows = 4; @@ -895,7 +912,7 @@ reg_stats_anova (linreg * c, void *aux UNUSED, const struct variable *var) static void -reg_stats_bcov (linreg * c, void *aux UNUSED, const struct variable *var) +reg_stats_bcov (const linreg * c, const struct variable *var) { int n_cols; int n_rows; @@ -935,14 +952,3 @@ reg_stats_bcov (linreg * c, void *aux UNUSED, const struct variable *var) tab_submit (t); } -static void -statistics_keyword_output (void (*function) - (linreg *, void *, const struct variable * var), - bool keyword, linreg * c, void *aux, - const struct variable *var) -{ - if (keyword) - { - (*function) (c, aux, var); - } -}