X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=src%2Flanguage%2Fstats%2Fcorrelations.c;h=ea7d6909bb9844e3893ccf343920faa8477dace0;hb=6ccced652fb274a08361fb844fb1c435a4c654d1;hp=3cadb8d1116bf33af6c993107ad83eabb4a0c97f;hpb=d75cdc824bc5170aeaaa76154f22f5c042d5944d;p=pspp-builds.git diff --git a/src/language/stats/correlations.c b/src/language/stats/correlations.c index 3cadb8d1..ea7d6909 100644 --- a/src/language/stats/correlations.c +++ b/src/language/stats/correlations.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -29,8 +29,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -46,21 +45,6 @@ #define N_(msgid) msgid -static double -significance_of_correlation (double rho, double w) -{ - double t = w - 2; - t /= 1 - MIN (1, pow2 (rho)); - t = sqrt (t); - t *= rho; - - if (t > 0) - return gsl_cdf_tdist_Q (t, w - 2); - else - return gsl_cdf_tdist_P (t, w - 2); -} - - struct corr { size_t n_vars_total; @@ -108,9 +92,8 @@ output_descriptives (const struct corr *corr, const gsl_matrix *means, const int heading_columns = 1; const int heading_rows = 1; - struct tab_table *t = tab_create (nc, nr, 0); + struct tab_table *t = tab_create (nc, nr); tab_title (t, _("Descriptive Statistics")); - tab_dim (t, tab_natural_dimensions, NULL); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -174,7 +157,8 @@ output_descriptives (const struct corr *corr, const gsl_matrix *means, static void output_correlation (const struct corr *corr, const struct corr_opts *opts, - const gsl_matrix *cm, const gsl_matrix *samples) + const gsl_matrix *cm, const gsl_matrix *samples, + const gsl_matrix *cv) { int r, c; struct tab_table *t; @@ -188,7 +172,10 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, const int heading_columns = 2; const int heading_rows = 1; - const int rows_per_variable = opts->missing_type == CORR_LISTWISE ? 2 : 3; + int rows_per_variable = opts->missing_type == CORR_LISTWISE ? 2 : 3; + + if (opts->statistics & STATS_XPROD) + rows_per_variable += 2; /* Two header columns */ nc += heading_columns; @@ -199,9 +186,8 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, /* One header row */ nr += heading_rows; - t = tab_create (nc, nr, 0); + t = tab_create (nc, nr); tab_title (t, _("Correlations")); - tab_dim (t, tab_natural_dimensions, NULL); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -230,8 +216,16 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, tab_text (t, 1, 1 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Pearson Correlation")); tab_text (t, 1, 2 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, (opts->tails == 2) ? _("Sig. (2-tailed)") : _("Sig. (1-tailed)")); + + if (opts->statistics & STATS_XPROD) + { + tab_text (t, 1, 3 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Cross-products")); + tab_text (t, 1, 4 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Covariance")); + } + if ( opts->missing_type != CORR_LISTWISE ) - tab_text (t, 1, 3 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("N")); + tab_text (t, 1, rows_per_variable + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("N")); + tab_hline (t, TAL_1, 0, nc - 1, r * rows_per_variable + 1); } @@ -247,13 +241,13 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, for (c = 0 ; c < matrix_cols ; ++c) { unsigned char flags = 0; - int col_index = corr->n_vars_total - corr->n_vars1 + c; + const int col_index = corr->n_vars_total - corr->n_vars1 + c; double pearson = gsl_matrix_get (cm, r, col_index); double w = gsl_matrix_get (samples, r, col_index); double sig = opts->tails * significance_of_correlation (pearson, w); if ( opts->missing_type != CORR_LISTWISE ) - tab_double (t, c + heading_columns, row + 2, 0, w, wfmt); + tab_double (t, c + heading_columns, row + rows_per_variable - 1, 0, w, wfmt); if ( c != r) tab_double (t, c + heading_columns, row + 1, 0, sig, NULL); @@ -262,37 +256,21 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, flags = TAB_EMPH; tab_double (t, c + heading_columns, row, flags, pearson, NULL); - } - } - - tab_submit (t); -} + if (opts->statistics & STATS_XPROD) + { + double cov = gsl_matrix_get (cv, r, col_index); + const double xprod_dev = cov * w; + cov *= w / (w - 1.0); -static gsl_matrix * -correlation_from_covariance (const gsl_matrix *cv, const gsl_matrix *v) -{ - size_t i, j; - gsl_matrix *corr = gsl_matrix_calloc (cv->size1, cv->size2); - - for (i = 0 ; i < cv->size1; ++i) - { - for (j = 0 ; j < cv->size2; ++j) - { - double rho = gsl_matrix_get (cv, i, j); - - rho /= sqrt (gsl_matrix_get (v, i, j)) - * - sqrt (gsl_matrix_get (v, j, i)); - - gsl_matrix_set (corr, i, j, rho); + tab_double (t, c + heading_columns, row + 2, 0, xprod_dev, NULL); + tab_double (t, c + heading_columns, row + 3, 0, cov, NULL); + } } } - - return corr; -} - + tab_submit (t); +} static void @@ -302,16 +280,25 @@ run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr const gsl_matrix *var_matrix, *samples_matrix, *mean_matrix; const gsl_matrix *cov_matrix; gsl_matrix *corr_matrix; - struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars, - opts->wv, opts->exclude); + struct covariance *cov = covariance_2pass_create (corr->n_vars_total, corr->vars, + 0, NULL, + opts->wv, opts->exclude); + struct casereader *rc = casereader_clone (r); for ( ; (c = casereader_read (r) ); case_unref (c)) { - covariance_accumulate (cov, c); + covariance_accumulate_pass1 (cov, c); + } + + for ( ; (c = casereader_read (rc) ); case_unref (c)) + { + covariance_accumulate_pass2 (cov, c); } cov_matrix = covariance_calculate (cov); + casereader_destroy (rc); + samples_matrix = covariance_moments (cov, MOMENT_NONE); var_matrix = covariance_moments (cov, MOMENT_VARIANCE); mean_matrix = covariance_moments (cov, MOMENT_MEAN); @@ -323,7 +310,8 @@ run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr output_correlation (corr, opts, corr_matrix, - samples_matrix ); + samples_matrix, + cov_matrix); covariance_destroy (cov); gsl_matrix_free (corr_matrix); @@ -409,8 +397,16 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) opts.statistics = STATS_DESCRIPTIVES; else if (lex_match_id (lexer, "XPROD")) opts.statistics = STATS_XPROD; - else - opts.statistics = STATS_ALL; + else if (lex_token (lexer) == T_ALL) + { + opts.statistics = STATS_ALL; + lex_get (lexer); + } + else + { + lex_error (lexer, NULL); + goto error; + } lex_match (lexer, ','); } @@ -502,10 +498,13 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) /* Done. */ + free (corr->vars); free (corr); + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: + free (corr->vars); free (corr); return CMD_FAILURE; }