From: Ben Pfaff Date: Sat, 24 Oct 2009 15:42:33 +0000 (-0700) Subject: Merge "master" into "output". X-Git-Tag: sid-i386-build98^0 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb586666724d5fcbdb658ce471b85484f0a7babe;p=pspp-builds.git Merge "master" into "output". --- cb586666724d5fcbdb658ce471b85484f0a7babe diff --cc NEWS index 7f6c2a0e,1aac688c..5f952324 --- a/NEWS +++ b/NEWS @@@ -1,5 -1,5 +1,5 @@@ PSPP NEWS -- history of user-visible changes. - Time-stamp: <2009-07-29 20:52:41 blp> -Time-stamp: <2009-09-08 21:08:29 blp> ++Time-stamp: <2009-10-24 08:12:04 blp> Copyright (C) 1996-9, 2000, 2008, 2009 Free Software Foundation, Inc. See the end for copying conditions. diff --cc acinclude.m4 index 6141edd2,307ce149..14d11dc7 --- a/acinclude.m4 +++ b/acinclude.m4 @@@ -34,8 -34,49 +34,15 @@@ AC_DEFUN([PSPP_PERL] if test "$PERL" != no && $PERL -e 'require 5.005_03;'; then :; else PSPP_REQUIRED_PREREQ([Perl 5.005_03 (or later)]) fi + + # The PSPP autobuilder appends a build number to the PSPP version number, + # e.g. "0.7.2-build40". But Perl won't parse version numbers that contain + # anything other than digits and periods, so "-build" causes an error. So we + # define $(VERSION_FOR_PERL) that drops everything from the hyphen onward. + VERSION_FOR_PERL=`echo "$VERSION" | sed 's/-.*//'` + AC_SUBST([VERSION_FOR_PERL]) ]) -dnl Check that libplot is available. -AC_DEFUN([PSPP_LIBPLOT], -[ - AC_ARG_WITH( - libplot, - [AS_HELP_STRING([--without-libplot], - [don't compile in support of charts (using libplot)])]) - - if test x"$with_libplot" != x"no" ; then - # Check whether we can link against libplot without any extra libraries. - AC_CHECK_LIB(plot, pl_newpl_r, [LIBPLOT_LIBS="-lplot"]) - - # Check whether we can link against libplot if we also link X. - if test x"$LIBPLOT_LIBS" = x""; then - AC_PATH_XTRA - extra_libs="-lXaw -lXmu -lXt $X_PRE_LIBS -lXext -lX11 $X_EXTRA_LIBS -lm" - AC_CHECK_LIB(plot, pl_newpl_r, - [LIBPLOT_LIBS="-lplot $extra_libs" - LDFLAGS="$LDFLAGS $X_LIBS"],, - [$extra_libs]) - fi - - # Still can't link? - if test x"$LIBPLOT_LIBS" = x""; then - PSPP_REQUIRED_PREREQ([libplot (or use --without-libplot)]) - fi - - # Set up to make everything work. - LIBS="$LIBPLOT_LIBS $LIBS" - AC_DEFINE(HAVE_LIBPLOT, 1, - [Define to 1 if you have the `libplot' library (-lplot).]) - fi -]) - dnl PSPP_CHECK_CC_OPTION([OPTION], [ACTION-IF-ACCEPTED], [ACTION-IF-REJECTED]) dnl Check whether the given C compiler OPTION is accepted. dnl If so, execute ACTION-IF-ACCEPTED, otherwise ACTION-IF-REJECTED. diff --cc src/language/stats/correlations.c index 00000000,e397dae5..ad90ef40 mode 000000,100644..100644 --- a/src/language/stats/correlations.c +++ b/src/language/stats/correlations.c @@@ -1,0 -1,542 +1,542 @@@ + /* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + #include + + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #include + #include "xalloc.h" + #include "minmax.h" + #include + #include + + #include "gettext.h" + #define _(msgid) gettext (msgid) + #define N_(msgid) msgid + + + static double + significance_of_correlation (double rho, double w) + { + double t = w - 2; + t /= 1 - MIN (1, pow2 (rho)); + t = sqrt (t); + t *= rho; + + if (t > 0) + return gsl_cdf_tdist_Q (t, w - 2); + else + return gsl_cdf_tdist_P (t, w - 2); + } + + + struct corr + { + size_t n_vars_total; + size_t n_vars1; + + const struct variable **vars; + }; + + + /* Handling of missing values. */ + enum corr_missing_type + { + CORR_PAIRWISE, /* Handle missing values on a per-variable-pair basis. */ + CORR_LISTWISE /* Discard entire case if any variable is missing. */ + }; + + enum stats_opts + { + STATS_DESCRIPTIVES = 0x01, + STATS_XPROD = 0x02, + STATS_ALL = STATS_XPROD | STATS_DESCRIPTIVES + }; + + struct corr_opts + { + enum corr_missing_type missing_type; + enum mv_class exclude; /* Classes of missing values to exclude. */ + + bool sig; /* Flag significant values or not */ + int tails; /* Report significance with how many tails ? */ + enum stats_opts statistics; + + const struct variable *wv; /* The weight variable (if any) */ + }; + + + static void + output_descriptives (const struct corr *corr, const gsl_matrix *means, + const gsl_matrix *vars, const gsl_matrix *ns) + { + const int nr = corr->n_vars_total + 1; + const int nc = 4; + int c, r; + + const int heading_columns = 1; + const int heading_rows = 1; + - struct tab_table *t = tab_create (nc, nr, 0); ++ struct tab_table *t = tab_create (nc, nr); + tab_title (t, _("Descriptive Statistics")); - tab_dim (t, tab_natural_dimensions, NULL); ++ tab_dim (t, tab_natural_dimensions, NULL, NULL); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + /* Outline the box */ + tab_box (t, + TAL_2, TAL_2, + -1, -1, + 0, 0, + nc - 1, nr - 1); + + /* Vertical lines */ + tab_box (t, + -1, -1, + -1, TAL_1, + heading_columns, 0, + nc - 1, nr - 1); + + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + tab_hline (t, TAL_1, 0, nc - 1, heading_rows); + + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); + + for (r = 0 ; r < corr->n_vars_total ; ++r) + { + const struct variable *v = corr->vars[r]; + tab_text (t, 0, r + heading_rows, TAB_LEFT | TAT_TITLE, var_to_string (v)); + + for (c = 1 ; c < nc ; ++c) + { + double x ; + double n; + switch (c) + { + case 1: + x = gsl_matrix_get (means, r, 0); + break; + case 2: + x = gsl_matrix_get (vars, r, 0); + + /* Here we want to display the non-biased estimator */ + n = gsl_matrix_get (ns, r, 0); + x *= n / (n -1); + + x = sqrt (x); + break; + case 3: + x = gsl_matrix_get (ns, r, 0); + break; + default: + NOT_REACHED (); + }; + + tab_double (t, c, r + heading_rows, 0, x, NULL); + } + } + + tab_submit (t); + } + + static void + output_correlation (const struct corr *corr, const struct corr_opts *opts, + const gsl_matrix *cm, const gsl_matrix *samples, + const gsl_matrix *cv) + { + int r, c; + struct tab_table *t; + int matrix_cols; + int nr = corr->n_vars1; + int nc = matrix_cols = corr->n_vars_total > corr->n_vars1 ? + corr->n_vars_total - corr->n_vars1 : corr->n_vars1; + + const struct fmt_spec *wfmt = opts->wv ? var_get_print_format (opts->wv) : & F_8_0; + + const int heading_columns = 2; + const int heading_rows = 1; + + int rows_per_variable = opts->missing_type == CORR_LISTWISE ? 2 : 3; + + if (opts->statistics & STATS_XPROD) + rows_per_variable += 2; + + /* Two header columns */ + nc += heading_columns; + + /* Three data per variable */ + nr *= rows_per_variable; + + /* One header row */ + nr += heading_rows; + - t = tab_create (nc, nr, 0); ++ t = tab_create (nc, nr); + tab_title (t, _("Correlations")); - tab_dim (t, tab_natural_dimensions, NULL); ++ tab_dim (t, tab_natural_dimensions, NULL, NULL); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + /* Outline the box */ + tab_box (t, + TAL_2, TAL_2, + -1, -1, + 0, 0, + nc - 1, nr - 1); + + /* Vertical lines */ + tab_box (t, + -1, -1, + -1, TAL_1, + heading_columns, 0, + nc - 1, nr - 1); + + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + tab_vline (t, TAL_1, 1, heading_rows, nr - 1); + + for (r = 0 ; r < corr->n_vars1 ; ++r) + { + tab_text (t, 0, 1 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, + var_to_string (corr->vars[r])); + + tab_text (t, 1, 1 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Pearson Correlation")); + tab_text (t, 1, 2 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, + (opts->tails == 2) ? _("Sig. (2-tailed)") : _("Sig. (1-tailed)")); + + if (opts->statistics & STATS_XPROD) + { + tab_text (t, 1, 3 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Cross-products")); + tab_text (t, 1, 4 + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("Covariance")); + } + + if ( opts->missing_type != CORR_LISTWISE ) + tab_text (t, 1, rows_per_variable + r * rows_per_variable, TAB_LEFT | TAT_TITLE, _("N")); + + tab_hline (t, TAL_1, 0, nc - 1, r * rows_per_variable + 1); + } + + for (c = 0 ; c < matrix_cols ; ++c) + { + const struct variable *v = corr->n_vars_total > corr->n_vars1 ? corr->vars[corr->n_vars_total - corr->n_vars1 + c] : corr->vars[c]; + tab_text (t, heading_columns + c, 0, TAB_LEFT | TAT_TITLE, var_to_string (v)); + } + + for (r = 0 ; r < corr->n_vars1 ; ++r) + { + const int row = r * rows_per_variable + heading_rows; + for (c = 0 ; c < matrix_cols ; ++c) + { + unsigned char flags = 0; + const int col_index = corr->n_vars_total - corr->n_vars1 + c; + double pearson = gsl_matrix_get (cm, r, col_index); + double w = gsl_matrix_get (samples, r, col_index); + double sig = opts->tails * significance_of_correlation (pearson, w); + + if ( opts->missing_type != CORR_LISTWISE ) + tab_double (t, c + heading_columns, row + rows_per_variable - 1, 0, w, wfmt); + + if ( c != r) + tab_double (t, c + heading_columns, row + 1, 0, sig, NULL); + + if ( opts->sig && c != r && sig < 0.05) + flags = TAB_EMPH; + + tab_double (t, c + heading_columns, row, flags, pearson, NULL); + + if (opts->statistics & STATS_XPROD) + { + double cov = gsl_matrix_get (cv, r, col_index); + const double xprod_dev = cov * w; + cov *= w / (w - 1.0); + + tab_double (t, c + heading_columns, row + 2, 0, xprod_dev, NULL); + tab_double (t, c + heading_columns, row + 3, 0, cov, NULL); + } + } + } + + tab_submit (t); + } + + + static gsl_matrix * + correlation_from_covariance (const gsl_matrix *cv, const gsl_matrix *v) + { + size_t i, j; + gsl_matrix *corr = gsl_matrix_calloc (cv->size1, cv->size2); + + for (i = 0 ; i < cv->size1; ++i) + { + for (j = 0 ; j < cv->size2; ++j) + { + double rho = gsl_matrix_get (cv, i, j); + + rho /= sqrt (gsl_matrix_get (v, i, j)) + * + sqrt (gsl_matrix_get (v, j, i)); + + gsl_matrix_set (corr, i, j, rho); + } + } + + return corr; + } + + + + + static void + run_corr (struct casereader *r, const struct corr_opts *opts, const struct corr *corr) + { + struct ccase *c; + const gsl_matrix *var_matrix, *samples_matrix, *mean_matrix; + const gsl_matrix *cov_matrix; + gsl_matrix *corr_matrix; + struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars, + opts->wv, opts->exclude); + + for ( ; (c = casereader_read (r) ); case_unref (c)) + { + covariance_accumulate (cov, c); + } + + cov_matrix = covariance_calculate (cov); + + samples_matrix = covariance_moments (cov, MOMENT_NONE); + var_matrix = covariance_moments (cov, MOMENT_VARIANCE); + mean_matrix = covariance_moments (cov, MOMENT_MEAN); + + corr_matrix = correlation_from_covariance (cov_matrix, var_matrix); + + if ( opts->statistics & STATS_DESCRIPTIVES) + output_descriptives (corr, mean_matrix, var_matrix, samples_matrix); + + output_correlation (corr, opts, + corr_matrix, + samples_matrix, + cov_matrix); + + covariance_destroy (cov); + gsl_matrix_free (corr_matrix); + } + + int + cmd_correlation (struct lexer *lexer, struct dataset *ds) + { + int i; + int n_all_vars = 0; /* Total number of variables involved in this command */ + const struct variable **all_vars ; + const struct dictionary *dict = dataset_dict (ds); + bool ok = true; + + struct casegrouper *grouper; + struct casereader *group; + + struct corr *corr = NULL; + size_t n_corrs = 0; + + struct corr_opts opts; + opts.missing_type = CORR_PAIRWISE; + opts.wv = dict_get_weight (dict); + opts.tails = 2; + opts.sig = false; + opts.exclude = MV_ANY; + opts.statistics = 0; + + /* Parse CORRELATIONS. */ + while (lex_token (lexer) != '.') + { + lex_match (lexer, '/'); + if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "PAIRWISE")) + opts.missing_type = CORR_PAIRWISE; + else if (lex_match_id (lexer, "LISTWISE")) + opts.missing_type = CORR_LISTWISE; + + else if (lex_match_id (lexer, "INCLUDE")) + opts.exclude = MV_SYSTEM; + else if (lex_match_id (lexer, "EXCLUDE")) + opts.exclude = MV_ANY; + else + { + lex_error (lexer, NULL); + goto error; + } + lex_match (lexer, ','); + } + } + else if (lex_match_id (lexer, "PRINT")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if ( lex_match_id (lexer, "TWOTAIL")) + opts.tails = 2; + else if (lex_match_id (lexer, "ONETAIL")) + opts.tails = 1; + else if (lex_match_id (lexer, "SIG")) + opts.sig = false; + else if (lex_match_id (lexer, "NOSIG")) + opts.sig = true; + else + { + lex_error (lexer, NULL); + goto error; + } + + lex_match (lexer, ','); + } + } + else if (lex_match_id (lexer, "STATISTICS")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if ( lex_match_id (lexer, "DESCRIPTIVES")) + opts.statistics = STATS_DESCRIPTIVES; + else if (lex_match_id (lexer, "XPROD")) + opts.statistics = STATS_XPROD; + else if (lex_token (lexer) == T_ALL) + { + opts.statistics = STATS_ALL; + lex_get (lexer); + } + else + { + lex_error (lexer, NULL); + goto error; + } + + lex_match (lexer, ','); + } + } + else + { + if (lex_match_id (lexer, "VARIABLES")) + { + lex_match (lexer, '='); + } + + corr = xrealloc (corr, sizeof (*corr) * (n_corrs + 1)); + corr[n_corrs].n_vars_total = corr[n_corrs].n_vars1 = 0; + + if ( ! parse_variables_const (lexer, dict, &corr[n_corrs].vars, + &corr[n_corrs].n_vars_total, + PV_NUMERIC)) + { + ok = false; + break; + } + + + corr[n_corrs].n_vars1 = corr[n_corrs].n_vars_total; + + if ( lex_match (lexer, T_WITH)) + { + if ( ! parse_variables_const (lexer, dict, + &corr[n_corrs].vars, &corr[n_corrs].n_vars_total, + PV_NUMERIC | PV_APPEND)) + { + ok = false; + break; + } + } + + n_all_vars += corr[n_corrs].n_vars_total; + + n_corrs++; + } + } + + if (n_corrs == 0) + { + msg (SE, _("No variables specified.")); + goto error; + } + + + all_vars = xmalloc (sizeof (*all_vars) * n_all_vars); + + { + /* FIXME: Using a hash here would make more sense */ + const struct variable **vv = all_vars; + + for (i = 0 ; i < n_corrs; ++i) + { + int v; + const struct corr *c = &corr[i]; + for (v = 0 ; v < c->n_vars_total; ++v) + *vv++ = c->vars[v]; + } + } + + grouper = casegrouper_create_splits (proc_open (ds), dict); + + while (casegrouper_get_next_group (grouper, &group)) + { + for (i = 0 ; i < n_corrs; ++i) + { + /* FIXME: No need to iterate the data multiple times */ + struct casereader *r = casereader_clone (group); + + if ( opts.missing_type == CORR_LISTWISE) + r = casereader_create_filter_missing (r, all_vars, n_all_vars, + opts.exclude, NULL, NULL); + + + run_corr (r, &opts, &corr[i]); + casereader_destroy (r); + } + casereader_destroy (group); + } + + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + + free (all_vars); + + + /* Done. */ + free (corr); + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + + error: + free (corr); + return CMD_FAILURE; + } diff --cc src/language/stats/frequencies.q index 76ba5d3f,5a704d0d..e1d6fb0e --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@@ -1081,21 -1049,17 +1065,20 @@@ dump_full (const struct variable *v, co vf = get_var_freqs (v); ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; - t = tab_create (5 + lab, n_categories + 3); - tab_headers (t, 0, 0, 2, 0); - t = tab_create (5 + lab, n_categories + 2, 0); ++ t = tab_create (5 + lab, n_categories + 2); + tab_headers (t, 0, 0, 1, 0); - tab_dim (t, full_dim, NULL); + + aux = xmalloc (sizeof *aux); + aux->show_labels = lab; + tab_dim (t, full_dim, full_dim_free, aux); if (lab) - tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value Label")); + tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Value Label")); - for (p = vec; p->s; p++) - tab_text (t, lab ? p->c : p->c - 1, p->r, - TAB_CENTER | TAT_TITLE, gettext (p->s)); + for (x = 0; x < 5; x++) + tab_text (t, lab + x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x])); - r = 2; + r = 1; for (f = ft->valid; f < ft->missing; f++) { double percent, valid_percent; diff --cc src/ui/gui/automake.mk index 838f0d69,7c06b8e4..05f36219 --- a/src/ui/gui/automake.mk +++ b/src/ui/gui/automake.mk @@@ -29,8 -26,7 +26,8 @@@ src_ui_gui_psppire_LDADD = src/libpspp.la \ src/libpspp-core.la \ $(GTK_LIBS) \ + $(CAIRO_LIBS) \ - @LIBINTL@ + $(LIBINTL) src_ui_gui_psppiredir = $(pkgdatadir) diff --cc src/ui/terminal/automake.mk index 63b83384,9cde11c9..81b896dc --- a/src/ui/terminal/automake.mk +++ b/src/ui/terminal/automake.mk @@@ -25,10 -25,9 +25,10 @@@ src_ui_terminal_pspp_LDADD = src/ui/libuicommon.la \ src/libpspp.la \ src/libpspp-core.la \ + $(CAIRO_LIBS) \ $(NCURSES_LIBS) \ $(LIBICONV) \ - @LIBINTL@ @LIBREADLINE@ + $(LIBINTL) $(LIBREADLINE) src_ui_terminal_pspp_LDFLAGS = $(PSPP_LDFLAGS) $(PG_LDFLAGS)