X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.q;h=80f9c0293c0386ae51372ae3bf0ac29c666a989f;hb=871f4456a207925fdce3df3150af3f3b263b2776;hp=a5851ff34e4857b2ee086ee8a630ffb9de7091c9;hpb=9f650fc3d2946c216e6cd3c7922a8a63d0f97117;p=pspp-builds.git diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index a5851ff3..80f9c029 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "gettext.h" @@ -82,6 +83,15 @@ /* (functions) */ static struct cmd_regression cmd; +/* + Moments for each of the variables used. + */ +struct moments_var +{ + struct moments1 *m; + struct variable *v; +}; + /* Linear regression models. */ static pspp_linreg_cache **models = NULL; @@ -269,7 +279,7 @@ reg_stats_coeff (pspp_linreg_cache * c) /* P values for the test statistic above. */ - pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0); + pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (c->n_obs - c->n_coeffs)); tab_float (t, 6, j + 1, 0, pval, 10, 2); } tab_title (t, _("Coefficients")); @@ -765,8 +775,8 @@ reg_print_categorical_encoding (FILE * fp, pspp_linreg_cache * c) for (i = 0; i < n_vars; i++) { - size_t n_categories = cat_get_n_categories (varlist[i]); - size_t j; + int n_categories = cat_get_n_categories (varlist[i]); + int j; fprintf (fp, "%s.name = \"%s\";\n\t", var_get_name (varlist[i]), @@ -961,15 +971,17 @@ is_depvar (size_t k, const struct variable *v) /* Mark missing cases. Return the number of non-missing cases. + Compute the first two moments. */ static size_t mark_missing_cases (const struct casefile *cf, struct variable *v, - int *is_missing_case, double n_data) + int *is_missing_case, double n_data, struct moments_var *mom) { struct casereader *r; struct ccase c; size_t row; const union value *val; + double w = 1.0; for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c); case_destroy (&c)) @@ -977,6 +989,10 @@ mark_missing_cases (const struct casefile *cf, struct variable *v, row = casereader_cnum (r) - 1; val = case_data (&c, v); + if (mom != NULL) + { + moments1_add (mom->m, val->f, w); + } cat_value_update (v, val); if (var_is_value_missing (v, val, MV_ANY)) { @@ -1049,7 +1065,7 @@ get_n_indep (const struct variable *v) static int prepare_data (int n_data, int is_missing_case[], struct variable **indep_vars, - struct variable *depvar, const struct casefile *cf) + struct variable *depvar, const struct casefile *cf, struct moments_var *mom) { int i; int j; @@ -1069,13 +1085,13 @@ prepare_data (int n_data, int is_missing_case[], cat_stored_values_create (v_variables[i]); } n_data = - mark_missing_cases (cf, v_variables[i], is_missing_case, n_data); + mark_missing_cases (cf, v_variables[i], is_missing_case, n_data, mom + i); } } /* Mark missing cases for the dependent variable. */ - n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data); + n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data, NULL); return n_data; } @@ -1088,6 +1104,37 @@ coeff_init (pspp_linreg_cache * c, struct design_matrix *dm) pspp_coeff_init (c->coeff + 1, dm); } +/* + Put the moments in the linreg cache. + */ +static void +compute_moments (pspp_linreg_cache *c, struct moments_var *mom, struct design_matrix *dm, size_t n) +{ + size_t i; + size_t j; + double weight; + double mean; + double variance; + double skewness; + double kurtosis; + /* + Scan the variable names in the columns of the design matrix. + When we find the variable we need, insert its mean in the cache. + */ + for (i = 0; i < dm->m->size2; i++) + { + for (j = 0; j < n; j++) + { + if (design_matrix_col_to_var (dm, i) == (mom + j)->v) + { + moments1_calculate ((mom + j)->m, &weight, &mean, &variance, + &skewness, &kurtosis); + gsl_vector_set (c->indep_means, i, mean); + gsl_vector_set (c->indep_std, i, sqrt (variance)); + } + } + } +} static bool run_regression (const struct ccase *first, const struct casefile *cf, void *cmd_ UNUSED, const struct dataset *ds) @@ -1108,6 +1155,7 @@ run_regression (const struct ccase *first, struct ccase c; struct variable **indep_vars; struct design_matrix *X; + struct moments_var *mom; gsl_vector *Y; pspp_linreg_opts lopts; @@ -1135,7 +1183,12 @@ run_regression (const struct ccase *first, } is_missing_case = xnmalloc (n_cases, sizeof (*is_missing_case)); - + mom = xnmalloc (n_variables, sizeof (*mom)); + for (i = 0; i < n_variables; i++) + { + (mom + i)->m = moments1_create (MOMENT_VARIANCE); + (mom + i)->v = v_variables[i]; + } lopts.get_depvar_mean_std = 1; for (k = 0; k < cmd.n_dependent; k++) @@ -1151,7 +1204,7 @@ run_regression (const struct ccase *first, } n_data = prepare_data (n_cases, is_missing_case, indep_vars, cmd.v_dependent[k], - (const struct casefile *) cf); + (const struct casefile *) cf, mom); Y = gsl_vector_alloc (n_data); X = @@ -1228,6 +1281,7 @@ run_regression (const struct ccase *first, Find the least-squares estimates and other statistics. */ pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]); + compute_moments (models[k], mom, X, n_variables); subcommand_statistics (cmd.a_statistics, models[k]); subcommand_export (cmd.sbc_export, models[k]); @@ -1237,7 +1291,11 @@ run_regression (const struct ccase *first, free (lopts.get_indep_mean_std); casereader_destroy (r); } - + for (i = 0; i < n_variables; i++) + { + moments1_destroy ((mom + i)->m); + } + free (mom); free (is_missing_case); return true;