X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fcorrelations.c;h=acb12fb3228f86c1938bc43724d044021ee32119;hb=refs%2Fbuilds%2F20131130030507%2Fpspp;hp=605609a7d68cc559e42cc605cf2543f49e9a441d;hpb=50ac6802fc247814dc4dd6232f6304b928a2d78b;p=pspp diff --git a/src/language/stats/correlations.c b/src/language/stats/correlations.c index 605609a7d6..acb12fb322 100644 --- a/src/language/stats/correlations.c +++ b/src/language/stats/correlations.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,31 +16,30 @@ #include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include -#include "xalloc.h" -#include "minmax.h" -#include -#include + +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/correlation.h" +#include "math/covariance.h" +#include "math/moments.h" +#include "output/tab.h" + +#include "gl/xalloc.h" +#include "gl/minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -94,9 +93,8 @@ output_descriptives (const struct corr *corr, const gsl_matrix *means, const int heading_columns = 1; const int heading_rows = 1; - struct tab_table *t = tab_create (nc, nr, 0); + struct tab_table *t = tab_create (nc, nr); tab_title (t, _("Descriptive Statistics")); - tab_dim (t, tab_natural_dimensions, NULL); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -189,9 +187,8 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, /* One header row */ nr += heading_rows; - t = tab_create (nc, nr, 0); + t = tab_create (nc, nr); tab_title (t, _("Correlations")); - tab_dim (t, tab_natural_dimensions, NULL); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -210,8 +207,10 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, nc - 1, nr - 1); tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + tab_vline (t, TAL_1, 1, heading_rows, nr - 1); + /* Row Headers */ for (r = 0 ; r < corr->n_vars1 ; ++r) { tab_text (t, 0, 1 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, @@ -233,9 +232,11 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, tab_hline (t, TAL_1, 0, nc - 1, r * rows_per_variable + 1); } + /* Column Headers */ for (c = 0 ; c < matrix_cols ; ++c) { - const struct variable *v = corr->n_vars_total > corr->n_vars1 ? corr->vars[corr->n_vars_total - corr->n_vars1 + c] : corr->vars[c]; + const struct variable *v = corr->n_vars_total > corr->n_vars1 ? + corr->vars[corr->n_vars1 + c] : corr->vars[c]; tab_text (t, heading_columns + c, 0, TAB_LEFT | TAT_TITLE, var_to_string (v)); } @@ -245,7 +246,9 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, for (c = 0 ; c < matrix_cols ; ++c) { unsigned char flags = 0; - const int col_index = corr->n_vars_total - corr->n_vars1 + c; + const int col_index = corr->n_vars_total > corr->n_vars1 ? + corr->n_vars1 + c : + c; double pearson = gsl_matrix_get (cm, r, col_index); double w = gsl_matrix_get (samples, r, col_index); double sig = opts->tails * significance_of_correlation (pearson, w); @@ -253,10 +256,10 @@ output_correlation (const struct corr *corr, const struct corr_opts *opts, if ( opts->missing_type != CORR_LISTWISE ) tab_double (t, c + heading_columns, row + rows_per_variable - 1, 0, w, wfmt); - if ( c != r) + if ( col_index != r) tab_double (t, c + heading_columns, row + 1, 0, sig, NULL); - if ( opts->sig && c != r && sig < 0.05) + if ( opts->sig && col_index != r && sig < 0.05) flags = TAB_EMPH; tab_double (t, c + heading_columns, row, flags, pearson, NULL); @@ -282,18 +285,27 @@ run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr { struct ccase *c; const gsl_matrix *var_matrix, *samples_matrix, *mean_matrix; - const gsl_matrix *cov_matrix; + gsl_matrix *cov_matrix; gsl_matrix *corr_matrix; - struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars, - opts->wv, opts->exclude); + struct covariance *cov = covariance_2pass_create (corr->n_vars_total, corr->vars, + NULL, + opts->wv, opts->exclude); + struct casereader *rc = casereader_clone (r); for ( ; (c = casereader_read (r) ); case_unref (c)) { - covariance_accumulate (cov, c); + covariance_accumulate_pass1 (cov, c); + } + + for ( ; (c = casereader_read (rc) ); case_unref (c)) + { + covariance_accumulate_pass2 (cov, c); } cov_matrix = covariance_calculate (cov); + casereader_destroy (rc); + samples_matrix = covariance_moments (cov, MOMENT_NONE); var_matrix = covariance_moments (cov, MOMENT_VARIANCE); mean_matrix = covariance_moments (cov, MOMENT_MEAN); @@ -303,13 +315,12 @@ run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr if ( opts->statistics & STATS_DESCRIPTIVES) output_descriptives (corr, mean_matrix, var_matrix, samples_matrix); - output_correlation (corr, opts, - corr_matrix, - samples_matrix, - cov_matrix); + output_correlation (corr, opts, corr_matrix, + samples_matrix, cov_matrix); covariance_destroy (cov); gsl_matrix_free (corr_matrix); + gsl_matrix_free (cov_matrix); } int @@ -336,13 +347,13 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) opts.statistics = 0; /* Parse CORRELATIONS. */ - while (lex_token (lexer) != '.') + while (lex_token (lexer) != T_ENDCMD) { - lex_match (lexer, '/'); + lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "MISSING")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "PAIRWISE")) opts.missing_type = CORR_PAIRWISE; @@ -358,13 +369,13 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "PRINT")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if ( lex_match_id (lexer, "TWOTAIL")) opts.tails = 2; @@ -380,13 +391,13 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) goto error; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "STATISTICS")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if ( lex_match_id (lexer, "DESCRIPTIVES")) opts.statistics = STATS_DESCRIPTIVES; @@ -403,14 +414,14 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) goto error; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } } else { if (lex_match_id (lexer, "VARIABLES")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); } corr = xrealloc (corr, sizeof (*corr) * (n_corrs + 1)); @@ -493,10 +504,13 @@ cmd_correlation (struct lexer *lexer, struct dataset *ds) /* Done. */ + free (corr->vars); free (corr); + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: + free (corr->vars); free (corr); return CMD_FAILURE; }