From: Jason H Stover Date: Wed, 17 Sep 2008 15:36:41 +0000 (-0400) Subject: coeff_init: Do not use coeff[0] as the intercept. X-Git-Tag: v0.7.1~50^2~58 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6c1c66790acdd2c12cf2cca4555f70f20a4d21d7;p=pspp-builds.git coeff_init: Do not use coeff[0] as the intercept. rearrange_covariance_matrix: Fix permutation of rows/columns. run_glm: Account for categorical values. cmd_glm: Drop double-allocation of model linreg.h: Declare pspp_linreg_with_cov(). --- diff --git a/src/language/stats/glm.q b/src/language/stats/glm.q index ddc2a78b..48c3b221 100644 --- a/src/language/stats/glm.q +++ b/src/language/stats/glm.q @@ -95,19 +95,16 @@ int cmd_glm (struct lexer *lexer, struct dataset *ds); static bool run_glm (struct casereader *, struct cmd_glm *, - const struct dataset *, pspp_linreg_cache *); + const struct dataset *); int cmd_glm (struct lexer *lexer, struct dataset *ds) { struct casegrouper *grouper; struct casereader *group; - pspp_linreg_cache *model = NULL; bool ok; - model = xmalloc (sizeof *model); - if (!parse_glm (lexer, ds, &cmd, NULL)) return CMD_FAILURE; @@ -115,12 +112,11 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) { - run_glm (group, &cmd, ds, model); + run_glm (group, &cmd, ds); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - free (model); free (v_dependent); return ok ? CMD_SUCCESS : CMD_FAILURE; } @@ -160,9 +156,7 @@ coeff_init (pspp_linreg_cache * c, struct design_matrix *cov) { c->coeff = xnmalloc (cov->m->size2, sizeof (*c->coeff)); c->n_coeffs = cov->m->size2 - 1; - c->coeff[0] = xmalloc (sizeof (*(c->coeff[0]))); /* The first coefficient is the intercept. */ - c->coeff[0]->v_info = NULL; /* Intercept has no associated variable. */ - pspp_coeff_init (c->coeff + 1, cov); + pspp_coeff_init (c->coeff, cov); } /* @@ -255,8 +249,9 @@ data_pass_one (struct casereader *input, static bool run_glm (struct casereader *input, struct cmd_glm *cmd, - const struct dataset *ds, pspp_linreg_cache * model) + const struct dataset *ds) { + pspp_linreg_cache *model = NULL; size_t i; size_t j; int n_indep = 0; @@ -272,8 +267,6 @@ run_glm (struct casereader *input, pspp_linreg_opts lopts; - assert (model != NULL); - if (!casereader_peek (input, 0, &c)) { casereader_destroy (input); @@ -288,8 +281,6 @@ run_glm (struct casereader *input, 1u << DC_SYSTEM); } - - lopts.get_depvar_mean_std = 1; lopts.get_indep_mean_std = xnmalloc (n_dependent, sizeof (int)); @@ -321,6 +312,10 @@ run_glm (struct casereader *input, if ((n_data > 0) && (n_indep > 0)) { + for (i = 0; i < n_all_vars; i++) + if (var_is_alpha (all_vars[i])) + cat_stored_values_create (all_vars[i]); + X = covariance_matrix_create (n_all_vars, (const struct variable **) all_vars); @@ -334,6 +329,8 @@ run_glm (struct casereader *input, { const struct variable *v = all_vars[i]; const union value *val_v = case_data (&c, v); + if (var_is_alpha (all_vars[i])) + cat_value_update (all_vars[i], val_v); for (j = i; j < n_all_vars; j++) { const struct variable *w = all_vars[j]; @@ -344,7 +341,7 @@ run_glm (struct casereader *input, } } } - model = pspp_linreg_cache_alloc (v_dependent, indep_vars, n_data, n_indep); + model = pspp_linreg_cache_alloc (v_dependent[0], indep_vars, n_data, n_indep); /* For large data sets, use QR decomposition. */ diff --git a/src/math/linreg.c b/src/math/linreg.c index a7fa1565..306ef313 100644 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@ -674,7 +674,7 @@ rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache assert (c != NULL); assert (cov->m->size1 > 0); assert (cov->m->size2 == cov->m->size1); - permutation = xnmalloc (cov->m->size2, sizeof (*permutation)); + permutation = xnmalloc (1 + c->n_indeps, sizeof (*permutation)); model_vars = xnmalloc (1 + c->n_indeps, sizeof (*model_vars)); /* @@ -688,17 +688,18 @@ rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache result = covariance_matrix_create (1 + c->n_indeps, model_vars); for (j = 0; j < cov->m->size2; j++) { - for (k = 0; k < result->m->size2; k++) + k = 0; + while (k < result->m->size2) { if (design_matrix_col_to_var (cov, j) == design_matrix_col_to_var (result, k)) { permutation[k] = j; - break; } + k++; } } - for (j = 0; j < result->m->size2; j++) - for (i = 0; i < result->m->size1; i++) + for (i = 0; i < result->m->size1; i++) + for (j = 0; j < result->m->size2; j++) { gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, permutation[i], permutation[j])); } diff --git a/src/math/linreg.h b/src/math/linreg.h index 5a5c4c8e..a9577d64 100644 --- a/src/math/linreg.h +++ b/src/math/linreg.h @@ -216,4 +216,9 @@ void pspp_linreg_set_indep_variable_sd (pspp_linreg_cache *, const struct variab */ double pspp_linreg_get_indep_variable_mean (pspp_linreg_cache *, const struct variable *); void pspp_linreg_set_indep_variable_mean (pspp_linreg_cache *, const struct variable *, double); + +/* + Regression using only the covariance matrix. + */ +int pspp_linreg_with_cov (const struct design_matrix *, pspp_linreg_cache *); #endif