X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.q;h=bed4f79dae561bfae4ac2a57e6205f1c33ac10e5;hb=6882ff1359de0c4812a62e1bfcdfbf8e68474de5;hp=fbf9eafeff207ef729031c548b5680e61110d7e8;hpb=60401d43dd6915c6eaa0fc6cf01fd361dcc323d1;p=pspp diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index fbf9eafeff..bed4f79dae 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,8 @@ #include #include +#include "xalloc.h" + #include "gettext.h" #define _(msgid) gettext (msgid) @@ -92,9 +93,6 @@ struct moments_var const struct variable *v; }; -/* Linear regression models. */ -static pspp_linreg_cache **models = NULL; - /* Transformations for saving predicted values and residuals, etc. @@ -122,7 +120,7 @@ static size_t n_variables; static struct file_handle *model_file; static bool run_regression (struct casereader *, struct cmd_regression *, - struct dataset *); + struct dataset *, pspp_linreg_cache **); /* STATISTICS subcommand output functions. @@ -193,14 +191,12 @@ reg_stats_coeff (pspp_linreg_cache * c) double std_err; double beta; const char *label; - char *tmp; + const struct variable *v; const union value *val; - const char *val_s; struct tab_table *t; assert (c != NULL); - tmp = xnmalloc (MAX_STRING, sizeof (*tmp)); n_rows = c->n_coeffs + 2; t = tab_create (n_cols, n_rows, 0); @@ -229,10 +225,13 @@ reg_stats_coeff (pspp_linreg_cache * c) tab_float (t, 6, 1, 0, pval, 10, 2); for (j = 1; j <= c->n_indeps; j++) { + struct string tstr; + ds_init_empty (&tstr); + v = pspp_coeff_get_var (c->coeff[j], 0); label = var_to_string (v); /* Do not overwrite the variable's name. */ - strncpy (tmp, label, MAX_STRING); + ds_put_cstr (&tstr, label); if (var_is_alpha (v)) { /* @@ -242,11 +241,11 @@ reg_stats_coeff (pspp_linreg_cache * c) */ val = pspp_coeff_get_value (c->coeff[j], v); - val_s = var_get_value_name (v, val); - strncat (tmp, val_s, MAX_STRING); + + var_append_value_name (v, val, &tstr); } - tab_text (t, 1, j + 1, TAB_CENTER, tmp); + tab_text (t, 1, j + 1, TAB_CENTER, ds_cstr (&tstr)); /* Regression coefficients. */ @@ -277,10 +276,10 @@ reg_stats_coeff (pspp_linreg_cache * c) 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (c->n_obs - c->n_coeffs)); tab_float (t, 6, j + 1, 0, pval, 10, 2); + ds_destroy (&tstr); } tab_title (t, _("Coefficients")); tab_submit (t); - free (tmp); } /* @@ -639,16 +638,16 @@ try_name (const struct dictionary *dict, const char *name) } static void -reg_get_name (const struct dictionary *dict, char name[LONG_NAME_LEN], - const char prefix[LONG_NAME_LEN]) +reg_get_name (const struct dictionary *dict, char name[VAR_NAME_LEN], + const char prefix[VAR_NAME_LEN]) { int i = 1; - snprintf (name, LONG_NAME_LEN, "%s%d", prefix, i); + snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i); while (!try_name (dict, name)) { i++; - snprintf (name, LONG_NAME_LEN, "%s%d", prefix, i); + snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i); } } @@ -658,7 +657,7 @@ reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, { struct dictionary *dict = dataset_dict (ds); static int trns_index = 1; - char name[LONG_NAME_LEN]; + char name[VAR_NAME_LEN]; struct variable *new_var; struct reg_trns *t = NULL; @@ -784,10 +783,15 @@ reg_print_categorical_encoding (FILE * fp, pspp_linreg_cache * c) for (j = 0; j < n_categories; j++) { + struct string vstr; const union value *val = cat_subscript_to_value (j, varlist[i]); + ds_init_empty (&vstr); + var_append_value_name (varlist[i], val, &vstr); fprintf (fp, "%s.values[%d] = \"%s\";\n\t", var_get_name (varlist[i]), j, - var_get_value_name (varlist[i], val)); + ds_cstr (&vstr)); + + ds_destroy (&vstr); } } fprintf (fp, "%s", reg_export_categorical_encode_2); @@ -932,6 +936,7 @@ regression_custom_export (struct lexer *lexer, struct dataset *ds UNUSED, model_file = NULL; else { + fh_unref (model_file); model_file = fh_parse (lexer, FH_REF_FILE); if (model_file == NULL) return 0; @@ -948,11 +953,16 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) { struct casegrouper *grouper; struct casereader *group; + pspp_linreg_cache **models; bool ok; size_t i; + model_file = NULL; if (!parse_regression (lexer, ds, &cmd, NULL)) - return CMD_FAILURE; + { + fh_unref (model_file); + return CMD_FAILURE; + } models = xnmalloc (cmd.n_dependent, sizeof *models); for (i = 0; i < cmd.n_dependent; i++) @@ -963,13 +973,16 @@ cmd_regression (struct lexer *lexer, struct dataset *ds) /* Data pass. */ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) - run_regression (group, &cmd, ds); + run_regression (group, &cmd, ds, models); ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; subcommand_save (ds, cmd.sbc_save, models); free (v_variables); free (models); + free_regression (&cmd); + fh_unref (model_file); + return ok ? CMD_SUCCESS : CMD_FAILURE; } @@ -1028,7 +1041,9 @@ identify_indep_vars (const struct variable **indep_vars, as the dependent variable. Print a warning and continue. */ msg (SE, - gettext ("The dependent variable is equal to the independent variable. The least squares line is therefore Y=X. Standard errors and related statistics may be meaningless.")); + gettext ("The dependent variable is equal to the independent variable." + "The least squares line is therefore Y=X." + "Standard errors and related statistics may be meaningless.")); n_indep_vars = 1; indep_vars[0] = v_variables[0]; } @@ -1120,7 +1135,7 @@ compute_moments (pspp_linreg_cache * c, struct moments_var *mom, static bool run_regression (struct casereader *input, struct cmd_regression *cmd, - struct dataset *ds) + struct dataset *ds, pspp_linreg_cache **models) { size_t i; int n_indep = 0; @@ -1136,14 +1151,16 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, assert (models != NULL); if (!casereader_peek (input, 0, &c)) - return true; + { + casereader_destroy (input); + return true; + } output_split_file_values (ds, &c); case_destroy (&c); if (!v_variables) { - dict_get_vars (dataset_dict (ds), &v_variables, &n_variables, - 1u << DC_SYSTEM); + dict_get_vars (dataset_dict (ds), &v_variables, &n_variables, 0); } for (i = 0; i < cmd->n_dependent; i++) @@ -1196,8 +1213,6 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, lopts.get_indep_mean_std[i] = 1; } models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2); - models[k]->indep_means = gsl_vector_alloc (X->m->size2); - models[k]->indep_std = gsl_vector_alloc (X->m->size2); models[k]->depvar = dep_var; /* For large data sets, use QR decomposition. @@ -1253,6 +1268,11 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, } casereader_destroy (reader); } + for (i = 0; i < n_variables; i++) + { + moments1_destroy ((mom + i)->m); + } + free (mom); free (indep_vars); free (lopts.get_indep_mean_std); casereader_destroy (input);