From 502855f1d746c4acf920006326daa8c86a561a5b Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 20 Sep 2008 08:36:49 +0800 Subject: [PATCH] Added the /WILCOXON subcommand to NPAR TESTS Implemented wilcoxon signed rank test. Thanks to Ben for review. Closes patch #6635 --- AUTHORS | 5 + doc/statistics.texi | 41 ++++ lib/automake.mk | 1 + lib/misc/README | 2 + lib/misc/automake.mk | 8 + lib/misc/wx-mp-sr.c | 101 ++++++++ lib/misc/wx-mp-sr.h | 6 + src/language/stats/automake.mk | 4 +- src/language/stats/binomial.c | 4 +- src/language/stats/binomial.h | 3 +- src/language/stats/chisquare.c | 4 +- src/language/stats/chisquare.h | 4 +- src/language/stats/npar.h | 6 +- src/language/stats/npar.q | 136 ++++++++--- src/language/stats/wilcoxon.c | 419 +++++++++++++++++++++++++++++++++ src/language/stats/wilcoxon.h | 65 +++++ src/ui/gui/automake.mk | 1 + src/ui/gui/psppire.c | 4 + src/ui/terminal/automake.mk | 1 + src/ui/terminal/main.c | 1 + tests/automake.mk | 1 + tests/command/npar-wilcoxon.sh | 174 ++++++++++++++ 22 files changed, 948 insertions(+), 43 deletions(-) create mode 100644 lib/misc/README create mode 100644 lib/misc/automake.mk create mode 100644 lib/misc/wx-mp-sr.c create mode 100644 lib/misc/wx-mp-sr.h create mode 100644 src/language/stats/wilcoxon.c create mode 100644 src/language/stats/wilcoxon.h create mode 100755 tests/command/npar-wilcoxon.sh diff --git a/AUTHORS b/AUTHORS index 12b25ab7..43103dcd 100644 --- a/AUTHORS +++ b/AUTHORS @@ -14,6 +14,11 @@ to other modules. including lib/gslextras and the linear regression features. Jason is also an important contributor to GSL, which is used by PSPP. +* Rob van Son wrote the routine for calculation of the significance +of the Wilcoxon matched pairs signed rank statistic used by the + NPAR TEST command. + + We also thank past contributors: * John Williams wrote an initial draft of the T-TEST procedure. diff --git a/doc/statistics.texi b/doc/statistics.texi index a67402c0..89e5e83c 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -506,6 +506,8 @@ NPAR TESTS [ /STATISTICS=@{DESCRIPTIVES@} ] [ /MISSING=@{ANALYSIS, LISTWISE@} @{INCLUDE, EXCLUDE@} ] + + [ /METHOD=EXACT [ TIMER [(n)] ] ] @end display NPAR TESTS performs nonparametric tests. @@ -515,10 +517,21 @@ One or more tests may be specified by using the corresponding subcommand. If the /STATISTICS subcommand is also specified, then summary statistics are produces for each variable that is the subject of any test. +Certain tests may take a long time to execute, if an exact figure is required. +Therefore, by default asymptotic approximations are used unless the +subcommand /METHOD=EXACT is specified. +Exact tests give more accurate results, but may take an unacceptably long +time to perform. If the TIMER keyword is used, it sets a maximum time, +after which the test will be abandoned, and a warning message printed. +The time, in minutes, should be specified in parentheses after the TIMER keyword. +If the TIMER keyword is given without this figure, then a default value of 5 minutes +is used. + @menu * BINOMIAL:: Binomial Test * CHISQUARE:: Chisquare Test +* WILCOXON:: Wilcoxon Signed Ranks Test @end menu @@ -598,6 +611,34 @@ sum of the frequencies need not be 1. If no /EXPECTED subcommand is given, then then equal frequencies are expected. +@node WILCOXON +@subsection Wilcoxon +@comment node-name, next, previous, up +@vindex WILCOXON +@cindex wilcoxon matched pairs signed ranks test + +@display + [ /WILCOXON varlist [ WITH varlist [ (PAIRED) ]]] +@end display + +The wilcoxon subcommand tests for differences between means of the +variables listed. The test does not make any assumptions about the +variances of the samples. + +If the @code{WITH} keyword is omitted, then tests for all +combinations of the listed variables are performed. +If the @code{WITH} keyword is given, and the @code{(PAIRED)} keyword +is also given, then the number of variables preceding @code{WITH} +must be the same as the number following it. +In this case, tests for each respective pair of variables are +performed. +If the @code{WITH} keyword is given, but the +@code{(PAIRED)} keyword is omitted, then tests for each combination +of variable preceding @code{WITH} against variable following +@code{WITH} are performed. + +If the number of observations is large, and exact tests have been +requested. then the test may take a very long time to complete. @node T-TEST @comment node-name, next, previous, up diff --git a/lib/automake.mk b/lib/automake.mk index 6410988b..9197da97 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -1,6 +1,7 @@ ## Process this file with automake to produce Makefile.in -*- makefile -*- include $(top_srcdir)/lib/linreg/automake.mk +include $(top_srcdir)/lib/misc/automake.mk if WITHGUI include $(top_srcdir)/lib/gtksheet/automake.mk diff --git a/lib/misc/README b/lib/misc/README new file mode 100644 index 00000000..68ba91ad --- /dev/null +++ b/lib/misc/README @@ -0,0 +1,2 @@ +This is not part of the GNU PSPP program, but is used with GNU PSPP. + diff --git a/lib/misc/automake.mk b/lib/misc/automake.mk new file mode 100644 index 00000000..01bfd2b4 --- /dev/null +++ b/lib/misc/automake.mk @@ -0,0 +1,8 @@ +## Process this file with automake to produce Makefile.in -*- makefile -*- + +noinst_LIBRARIES += lib/misc/libmisc.a + +lib_misc_libmisc_a_SOURCES = \ + lib/misc/wx-mp-sr.c lib/misc/wx-mp-sr.h + +EXTRA_DIST += lib/misc/README diff --git a/lib/misc/wx-mp-sr.c b/lib/misc/wx-mp-sr.c new file mode 100644 index 00000000..39e83b97 --- /dev/null +++ b/lib/misc/wx-mp-sr.c @@ -0,0 +1,101 @@ +#include +#include "wx-mp-sr.h" + +/********************************************************************* +* +* Calculate the exact level of significance for a +* Wilcoxon Matched-Pair Signed-Ranks Test using the sample's +* Sum of Ranks W and the sample size (i.e., number of pairs) N. +* This whole routine can be run as a stand-alone program. +* +* Use: +* WX-MP-SR W N +* +* Copyright 1996, Rob van Son +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License along +* with this program; if not, write to the Free Software Foundation, Inc., +* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +* +* ------------------------------------------------------- +* Rob van Son +* Institute of Phonetic Sciences & IFOTT +* University of Amsterdam, Spuistraat 210 +* NL-1012VT Amsterdam, The Netherlands +* Tel.: (+31) 205252196 Fax.: (+31) 205252197 +* Email: r.j.j.h.vanson@uva.nl +* WWW page: http://www.fon.hum.uva.nl/rob +* ------------------------------------------------------- +* +* This is the actual routine that calculates the exact (two-tailed) +* level of significance for the Wilcoxon Matched-Pairs Signed-Ranks +* test. The inputs are the Sum of Ranks of either the positive of +* negative samples (W) and the sample size (N). +* The Level of significance is calculated by checking for each +* possible outcome (2**N possibilities) whether the sum of ranks +* is larger than or equal to the observed Sum of Ranks (W). +* +* NOTE: The execution-time scales like ~ N*2**N, i.e., N*pow(2, N), +* which is more than exponential. Adding a single pair to the sample +* (i.e., increase N by 1) will more than double the time needed to +* complete the calculations (apart from an additive constant). +* The execution-time of this program can easily outrun your +* patience. +* +***********************************************************************/ + +double LevelOfSignificanceWXMPSR(double Winput, long int N) +{ + unsigned long int W, MaximalW, NumberOfPossibilities, CountLarger; + unsigned long int i, RankSum, j; + double p; + + /* Determine Wmax, i.e., work with the largest Rank Sum */ + MaximalW = N*(N+1)/2; + if(Winput < MaximalW/2)Winput = MaximalW - Winput; + W = Winput; /* Convert to long int */ + if(W != Winput)++W; /* Increase to next full integer */ + + /* The total number of possible outcomes is 2**N */ + NumberOfPossibilities = 1 << N; + + /* Initialize and loop. The loop-interior will be run 2**N times. */ + CountLarger = 0; + /* Generate all distributions of sign over ranks as bit-patterns (i). */ + for(i=0; i < NumberOfPossibilities; ++i) + { + RankSum = 0; + /* + Shift "sign" bits out of i to determine the Sum of Ranks (j). + */ + for(j=0; j < N; ++j) + { + if((i >> j) & 1)RankSum += j + 1; + }; + /* + * Count the number of "samples" that have a Sum of Ranks larger than + * or equal to the one found (i.e., >= W). + */ + if(RankSum >= W)++CountLarger; + }; + /***************************************************************** + * The level of significance is the number of outcomes with a + * sum of ranks equal to or larger than the one found (W) + * divided by the total number of possible outcomes. + * The level is doubled to get the two-tailed result. + ******************************************************************/ + p = 2*((double)CountLarger) / ((double)NumberOfPossibilities); + + return p; +} + diff --git a/lib/misc/wx-mp-sr.h b/lib/misc/wx-mp-sr.h new file mode 100644 index 00000000..ec3ef5ed --- /dev/null +++ b/lib/misc/wx-mp-sr.h @@ -0,0 +1,6 @@ +#ifndef WX_MP_SR +#define WX_MP_SR 1 + +double LevelOfSignificanceWXMPSR(double Winput, long int N); + +#endif diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 9981ed67..3e9ab232 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -32,7 +32,9 @@ language_stats_sources = \ src/language/stats/freq.c \ src/language/stats/freq.h \ src/language/stats/npar-summary.c \ - src/language/stats/npar-summary.h + src/language/stats/npar-summary.h \ + src/language/stats/wilcoxon.c \ + src/language/stats/wilcoxon.h all_q_sources += $(src_language_stats_built_sources:.c=.q) EXTRA_DIST += $(src_language_stats_built_sources:.c=.q) diff --git a/src/language/stats/binomial.c b/src/language/stats/binomial.c index d9209f1b..f5435051 100644 --- a/src/language/stats/binomial.c +++ b/src/language/stats/binomial.c @@ -143,7 +143,9 @@ void binomial_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, - const struct npar_test *test) + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) { int v; const struct binomial_test *bst = (const struct binomial_test *) test; diff --git a/src/language/stats/binomial.h b/src/language/stats/binomial.h index a8360330..df01a13b 100644 --- a/src/language/stats/binomial.h +++ b/src/language/stats/binomial.h @@ -40,6 +40,7 @@ struct dataset; void binomial_execute (const struct dataset *, struct casereader *, enum mv_class, - const struct npar_test *); + const struct npar_test *, + bool, double); #endif diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c index 1b772306..ed143824 100644 --- a/src/language/stats/chisquare.c +++ b/src/language/stats/chisquare.c @@ -320,7 +320,9 @@ void chisquare_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, - const struct npar_test *test) + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) { const struct dictionary *dict = dataset_dict (ds); int v, i; diff --git a/src/language/stats/chisquare.h b/src/language/stats/chisquare.h index 916a2639..91a17d1a 100644 --- a/src/language/stats/chisquare.h +++ b/src/language/stats/chisquare.h @@ -46,7 +46,9 @@ void chisquare_insert_variables (const struct npar_test *test, void chisquare_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, - const struct npar_test *test); + const struct npar_test *test, + bool, + double); diff --git a/src/language/stats/npar.h b/src/language/stats/npar.h index 37939fe9..6aed01cf 100644 --- a/src/language/stats/npar.h +++ b/src/language/stats/npar.h @@ -18,6 +18,7 @@ #define npar_h 1 #include +#include #include #include @@ -36,8 +37,9 @@ struct npar_test void (*execute) (const struct dataset *, struct casereader *, enum mv_class exclude, - const struct npar_test * - ); + const struct npar_test *, + bool, + double); void (*insert_variables) (const struct npar_test *, struct const_hsh_table *); diff --git a/src/language/stats/npar.q b/src/language/stats/npar.q index 647205fd..34e03677 100644 --- a/src/language/stats/npar.q +++ b/src/language/stats/npar.q @@ -1,5 +1,5 @@ -/* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. +/* PSPP - a program for statistical analysis. -*-c-*- + Copyright (C) 2006, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -53,7 +54,8 @@ +friedman=varlist; +kendall=varlist; missing=miss:!analysis/listwise, - incl:include/!exclude; + incl:include/!exclude; + method=custom; +statistics[st_]=descriptives,quartiles,all. */ /* (declarations) */ @@ -70,17 +72,25 @@ struct npar_specs size_t n_tests; const struct variable ** vv; /* Compendium of all variables - (those mentioned on ANY subcommand */ + (those mentioned on ANY subcommand */ int n_vars; /* Number of variables in vv */ enum mv_class filter; /* Missing values to filter. */ bool descriptives; /* Descriptive statistics should be calculated */ bool quartiles; /* Quartiles should be calculated */ + + bool exact; /* Whether exact calculations have been requested */ + double timer; /* Maximum time (in minutes) to wait for exact calculations */ }; -void one_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *variables); +static void one_sample_insert_variables (const struct npar_test *test, + struct const_hsh_table *variables); + +static void two_sample_insert_variables (const struct npar_test *test, + struct const_hsh_table *variables); + + static void npar_execute(struct casereader *input, @@ -98,7 +108,7 @@ npar_execute(struct casereader *input, msg (SW, _("NPAR subcommand not currently implemented.")); continue; } - test->execute (ds, casereader_clone (input), specs->filter, test); + test->execute (ds, casereader_clone (input), specs->filter, test, specs->exact, specs->timer); } if ( specs->descriptives ) @@ -126,7 +136,7 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) { bool ok; int i; - struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0}; + struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; struct const_hsh_table *var_hash; struct casegrouper *grouper; struct casereader *input, *group; @@ -134,8 +144,8 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) npar_specs.pool = pool_create (); var_hash = const_hsh_create_pool (npar_specs.pool, 0, - compare_vars_by_name, hash_var_by_name, - NULL, NULL); + compare_vars_by_name, hash_var_by_name, + NULL, NULL); if ( ! parse_npar_tests (lexer, ds, &cmd, &npar_specs) ) { @@ -183,11 +193,14 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) input = proc_open (ds); if ( cmd.miss == NPAR_LISTWISE ) - input = casereader_create_filter_missing (input, - npar_specs.vv, - npar_specs.n_vars, - npar_specs.filter, - NULL, NULL); + { + input = casereader_create_filter_missing (input, + npar_specs.vv, + npar_specs.n_vars, + npar_specs.filter, + NULL, NULL); + } + grouper = casegrouper_create_splits (input, dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) @@ -203,7 +216,8 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) } int -npar_custom_chisquare(struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests *cmd UNUSED, void *aux ) +npar_custom_chisquare (struct lexer *lexer, struct dataset *ds, + struct cmd_npar_tests *cmd UNUSED, void *aux ) { struct npar_specs *specs = aux; @@ -214,8 +228,8 @@ npar_custom_chisquare(struct lexer *lexer, struct dataset *ds, struct cmd_npar_t ((struct npar_test *)tp)->insert_variables = one_sample_insert_variables; if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), - &tp->vars, &tp->n_vars, - PV_NO_SCRATCH | PV_NO_DUPLICATE)) + &tp->vars, &tp->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE)) { return 2; } @@ -308,7 +322,8 @@ npar_custom_chisquare(struct lexer *lexer, struct dataset *ds, struct cmd_npar_t int -npar_custom_binomial(struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests *cmd UNUSED, void *aux) +npar_custom_binomial (struct lexer *lexer, struct dataset *ds, + struct cmd_npar_tests *cmd UNUSED, void *aux) { struct npar_specs *specs = aux; struct binomial_test *btp = pool_alloc(specs->pool, sizeof(*btp)); @@ -334,8 +349,8 @@ npar_custom_binomial(struct lexer *lexer, struct dataset *ds, struct cmd_npar_te if ( lex_match (lexer, '=') ) { if (parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), - &tp->vars, &tp->n_vars, - PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) + &tp->vars, &tp->n_vars, + PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) { if ( lex_match (lexer, '(')) { @@ -399,18 +414,20 @@ parse_two_sample_related_test (struct lexer *lexer, const struct variable **vlist2; size_t n_vlist2; + ((struct npar_test *)test_parameters)->insert_variables = two_sample_insert_variables; + if (!parse_variables_const_pool (lexer, pool, - dict, - &vlist1, &n_vlist1, - PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) + dict, + &vlist1, &n_vlist1, + PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) return false; if ( lex_match(lexer, T_WITH)) { with = true; if ( !parse_variables_const_pool (lexer, pool, dict, - &vlist2, &n_vlist2, - PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) + &vlist2, &n_vlist2, + PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) return false; paired = (lex_match (lexer, '(') && @@ -450,8 +467,8 @@ parse_two_sample_related_test (struct lexer *lexer, assert (n_vlist1 == n_vlist2); for ( i = 0 ; i < n_vlist1; ++i ) { - test_parameters->pairs[n][0] = vlist1[i]; - test_parameters->pairs[n][1] = vlist2[i]; + test_parameters->pairs[n][1] = vlist1[i]; + test_parameters->pairs[n][0] = vlist2[i]; n++; } } @@ -462,8 +479,8 @@ parse_two_sample_related_test (struct lexer *lexer, { for ( j = 0 ; j < n_vlist2; ++j ) { - test_parameters->pairs[n][0] = vlist1[i]; - test_parameters->pairs[n][1] = vlist2[j]; + test_parameters->pairs[n][1] = vlist1[i]; + test_parameters->pairs[n][0] = vlist2[j]; n++; } } @@ -477,8 +494,8 @@ parse_two_sample_related_test (struct lexer *lexer, for ( j = i + 1 ; j < n_vlist1; ++j ) { assert ( n < test_parameters->n_pairs); - test_parameters->pairs[n][0] = vlist1[i]; - test_parameters->pairs[n][1] = vlist1[j]; + test_parameters->pairs[n][1] = vlist1[i]; + test_parameters->pairs[n][0] = vlist1[j]; n++; } } @@ -496,8 +513,8 @@ npar_custom_wilcoxon (struct lexer *lexer, { struct npar_specs *specs = aux; - struct two_sample_test *tp = pool_alloc(specs->pool, sizeof(*tp)); - ((struct npar_test *)tp)->execute = NULL; + struct two_sample_test *tp = pool_alloc (specs->pool, sizeof(*tp)); + ((struct npar_test *)tp)->execute = wilcoxon_execute; if (!parse_two_sample_related_test (lexer, dataset_dict (ds), cmd, tp, specs->pool) ) @@ -560,9 +577,9 @@ npar_custom_sign (struct lexer *lexer, struct dataset *ds, } /* Insert the variables for TEST into VAR_HASH */ -void +static void one_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *var_hash) + struct const_hsh_table *var_hash) { int i; struct one_sample_test *ost = (struct one_sample_test *) test; @@ -571,3 +588,50 @@ one_sample_insert_variables (const struct npar_test *test, const_hsh_insert (var_hash, ost->vars[i]); } +static void +two_sample_insert_variables (const struct npar_test *test, + struct const_hsh_table *var_hash) +{ + int i; + + const struct two_sample_test *tst = (const struct two_sample_test *) test; + + for ( i = 0 ; i < tst->n_pairs ; ++i ) + { + variable_pair *pair = &tst->pairs[i]; + + const_hsh_insert (var_hash, (*pair)[0]); + const_hsh_insert (var_hash, (*pair)[1]); + } + +} + + +static int +npar_custom_method (struct lexer *lexer, struct dataset *ds UNUSED, + struct cmd_npar_tests *test UNUSED, void *aux) +{ + struct npar_specs *specs = aux; + + if ( lex_match_id (lexer, "EXACT") ) + { + specs->exact = true; + specs->timer = 0.0; + if (lex_match_id (lexer, "TIMER")) + { + specs->timer = 5.0; + + if ( lex_match (lexer, '(')) + { + if ( lex_force_num (lexer) ) + { + specs->timer = lex_number (lexer); + lex_get (lexer); + } + lex_force_match (lexer, ')'); + } + } + } + + return 1; +} diff --git a/src/language/stats/wilcoxon.c b/src/language/stats/wilcoxon.c new file mode 100644 index 00000000..bca0b7d9 --- /dev/null +++ b/src/language/stats/wilcoxon.c @@ -0,0 +1,419 @@ +/* Pspp - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + + +#include +#include "wilcoxon.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static double timed_wilcoxon_significance (double w, long int n, double timer); + + +static double +append_difference (const struct ccase *c, casenumber n UNUSED, void *aux) +{ + const variable_pair *vp = aux; + + return case_data (c, (*vp)[0])->f - case_data (c, (*vp)[1])->f; +} + +static void show_ranks_box (const struct wilcoxon_state *, + const struct two_sample_test *); + +static void show_tests_box (const struct wilcoxon_state *, + const struct two_sample_test *, + bool exact, double timer); + + + +static void +distinct_callback (double v UNUSED, casenumber n, double w UNUSED, void *aux) +{ + struct wilcoxon_state *ws = aux; + + ws->tiebreaker += pow3 (n) - n; +} + +#define WEIGHT_IDX 2 + +void +wilcoxon_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact, + double timer) +{ + int i; + bool warn = true; + const struct dictionary *dict = dataset_dict (ds); + const struct two_sample_test *t2s = (struct two_sample_test *) test; + + struct wilcoxon_state *ws = xcalloc (sizeof (*ws), t2s->n_pairs); + const struct variable *weight = dict_get_weight (dict); + struct variable *weightx = var_create_internal (WEIGHT_IDX); + + input = + casereader_create_filter_weight (input, dict, &warn, NULL); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + struct casereader *r = casereader_clone (input); + struct casewriter *writer; + struct ccase c; + struct case_ordering *ordering = case_ordering_create (); + variable_pair *vp = &t2s->pairs[i]; + + const int reader_width = weight ? 3 : 2; + + ws[i].sign = var_create_internal (0); + ws[i].absdiff = var_create_internal (1); + + case_ordering_add_var (ordering, ws[i].absdiff, SRT_ASCEND); + + + r = casereader_create_filter_missing (r, *vp, 2, + exclude, + NULL, NULL); + + writer = sort_create_writer (ordering, reader_width); + while (casereader_read (r, &c)) + { + struct ccase output; + double d = append_difference (&c, 0, vp); + + case_create (&output, reader_width); + + if (d > 0) + { + case_data_rw (&output, ws[i].sign)->f = 1.0; + + } + else if (d < 0) + { + case_data_rw (&output, ws[i].sign)->f = -1.0; + } + else + { + double w = 1.0; + if (weight) + w = case_data (&c, weight)->f; + + /* Central point values should be dropped */ + ws[i].n_zeros += w; + case_destroy (&c); + continue; + } + + case_data_rw (&output, ws[i].absdiff)->f = fabs (d); + + if (weight) + case_data_rw (&output, weightx)->f = case_data (&c, weight)->f; + + casewriter_write (writer, &output); + case_destroy (&c); + } + casereader_destroy (r); + ws[i].reader = casewriter_make_reader (writer); + } + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + struct casereader *rr ; + struct ccase c; + enum rank_error err = 0; + + rr = casereader_create_append_rank (ws[i].reader, ws[i].absdiff, + weight ? weightx : NULL, &err, + distinct_callback, &ws[i] + ); + + while (casereader_read (rr, &c)) + { + double sign = case_data (&c, ws[i].sign)->f; + double rank = case_data_idx (&c, weight ? 3 : 2)->f; + double w = 1.0; + if (weight) + w = case_data (&c, weightx)->f; + + if ( sign > 0 ) + { + ws[i].positives.sum += rank * w; + ws[i].positives.n += w; + } + else if (sign < 0) + { + ws[i].negatives.sum += rank * w; + ws[i].negatives.n += w; + } + else + NOT_REACHED (); + + case_destroy (&c); + } + + casereader_destroy (rr); + } + + casereader_destroy (input); + + var_destroy (weightx); + + show_ranks_box (ws, t2s); + show_tests_box (ws, t2s, exact, timer); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + var_destroy (ws[i].sign); + var_destroy (ws[i].absdiff); + } + + free (ws); +} + + + + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +static void +show_ranks_box (const struct wilcoxon_state *ws, const struct two_sample_test *t2s) +{ + size_t i; + struct tab_table *table = tab_create (5, 1 + 4 * t2s->n_pairs, 0); + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Ranks")); + + tab_headers (table, 2, 0, 1, 0); + + /* Vertical lines inside the box */ + tab_box (table, 0, 0, -1, TAL_1, + 1, 0, table->nc - 1, tab_nr (table) - 1 ); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + + tab_text (table, 2, 0, TAB_CENTER, _("N")); + tab_text (table, 3, 0, TAB_CENTER, _("Mean Rank")); + tab_text (table, 4, 0, TAB_CENTER, _("Sum of Ranks")); + + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + tab_text (table, 1, 1 + i * 4, TAB_LEFT, _("Negative Ranks")); + tab_text (table, 1, 2 + i * 4, TAB_LEFT, _("Positive Ranks")); + tab_text (table, 1, 3 + i * 4, TAB_LEFT, _("Ties")); + tab_text (table, 1, 4 + i * 4, TAB_LEFT, _("Total")); + + tab_hline (table, TAL_1, 0, table->nc - 1, 1 + i * 4); + + + tab_text (table, 0, 1 + i * 4, TAB_LEFT, ds_cstr (&pair_name)); + ds_destroy (&pair_name); + + + /* N */ + tab_float (table, 2, 1 + i * 4, TAB_RIGHT, ws[i].negatives.n, 8, 0); + tab_float (table, 2, 2 + i * 4, TAB_RIGHT, ws[i].positives.n, 8, 0); + tab_float (table, 2, 3 + i * 4, TAB_RIGHT, ws[i].n_zeros, 8, 0); + + tab_float (table, 2, 4 + i * 4, TAB_RIGHT, + ws[i].n_zeros + ws[i].positives.n + ws[i].negatives.n, 8, 0); + + /* Sums */ + tab_float (table, 4, 1 + i * 4, TAB_RIGHT, ws[i].negatives.sum, 8, 2); + tab_float (table, 4, 2 + i * 4, TAB_RIGHT, ws[i].positives.sum, 8, 2); + + + /* Means */ + tab_float (table, 3, 1 + i * 4, TAB_RIGHT, + ws[i].negatives.sum / (double) ws[i].negatives.n, 8, 2); + + tab_float (table, 3, 2 + i * 4, TAB_RIGHT, + ws[i].positives.sum / (double) ws[i].positives.n, 8, 2); + + } + + tab_hline (table, TAL_2, 0, table->nc - 1, 1); + tab_vline (table, TAL_2, 2, 0, table->nr - 1); + + + tab_submit (table); +} + + +static void +show_tests_box (const struct wilcoxon_state *ws, + const struct two_sample_test *t2s, + bool exact, + double timer + ) +{ + size_t i; + struct tab_table *table = tab_create (1 + t2s->n_pairs, exact ? 5 : 3, 0); + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Test Statistics")); + + tab_headers (table, 1, 0, 1, 0); + + /* Vertical lines inside the box */ + tab_box (table, 0, 0, -1, TAL_1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + + tab_text (table, 0, 1, TAB_LEFT, _("Z")); + tab_text (table, 0, 2, TAB_LEFT, _("Asymp. Sig (2-tailed)")); + + if ( exact ) + { + tab_text (table, 0, 3, TAB_LEFT, _("Exact Sig (2-tailed)")); + tab_text (table, 0, 4, TAB_LEFT, _("Exact Sig (1-tailed)")); + +#if 0 + tab_text (table, 0, 5, TAB_LEFT, _("Point Probability")); +#endif + } + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + double z; + double n = ws[i].positives.n + ws[i].negatives.n; + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + + tab_text (table, 1 + i, 0, TAB_CENTER, ds_cstr (&pair_name)); + ds_destroy (&pair_name); + + z = MIN (ws[i].positives.sum, ws[i].negatives.sum); + z -= n * (n + 1)/ 4.0; + + z /= sqrt (n * (n + 1) * (2*n + 1)/24.0 - ws[i].tiebreaker / 48.0); + + tab_float (table, 1 + i, 1, TAB_RIGHT, z, 8, 3); + + tab_float (table, 1 + i, 2, TAB_RIGHT, + 2.0 * gsl_cdf_ugaussian_P (z), + 8, 3); + + if (exact) + { + double p = + timed_wilcoxon_significance (ws[i].positives.sum, + n, + timer ); + + if ( p == SYSMIS) + { + msg (MW, _("Exact significance was not calculated after %.2f minutes. Skipping test."), timer); + } + else + { + tab_float (table, 1 + i, 3, TAB_RIGHT, p, 8, 3); + tab_float (table, 1 + i, 4, TAB_RIGHT, p / 2.0, 8, 3); + } + } + } + + tab_hline (table, TAL_2, 0, table->nc - 1, 1); + tab_vline (table, TAL_2, 1, 0, table->nr - 1); + + + tab_submit (table); +} + + + +#include + +static sigjmp_buf env; + +static void +give_up_callback (int signal UNUSED) +{ + siglongjmp (env, 1); +} + +static double +timed_wilcoxon_significance (double w, long int n, double timer) +{ + double p = SYSMIS; + + sigset_t set; + + struct sigaction timeout_action; + struct sigaction old_action; + + if (timer <= 0 ) + return LevelOfSignificanceWXMPSR (w, n); + + sigemptyset (&set); + + timeout_action.sa_mask = set; + timeout_action.sa_flags = 0; + + timeout_action.sa_restorer = 0; + timeout_action.sa_handler = give_up_callback; + + if ( 0 == sigsetjmp (env, 1)) + { + sigaction (SIGALRM, &timeout_action, &old_action); + alarm (timer * 60.0); + + p = LevelOfSignificanceWXMPSR (w, n); + } + + sigaction (SIGALRM, &old_action, NULL); + + return p; +} diff --git a/src/language/stats/wilcoxon.h b/src/language/stats/wilcoxon.h new file mode 100644 index 00000000..b0f86a2c --- /dev/null +++ b/src/language/stats/wilcoxon.h @@ -0,0 +1,65 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#if !wilcoxon_h +#define wilcoxon_h 1 + +#include +#include +#include +#include + + +struct rank_sum +{ + double n; + double sum; +}; + +struct wilcoxon_state +{ + struct casereader *reader; + struct variable *sign; + struct variable *absdiff; + + struct rank_sum positives; + struct rank_sum negatives; + double n_zeros; + + double tiebreaker; +}; + + +struct wilcoxon_test +{ + struct two_sample_test parent; +}; + +struct casereader; +struct dataset; + + +void wilcoxon_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact, + double timer + ); + + + +#endif diff --git a/src/ui/gui/automake.mk b/src/ui/gui/automake.mk index 64fe75e7..c6aa455d 100644 --- a/src/ui/gui/automake.mk +++ b/src/ui/gui/automake.mk @@ -59,6 +59,7 @@ src_ui_gui_psppire_LDADD = \ src/output/liboutput.a \ src/math/libpspp_math.a \ lib/linreg/liblinreg.a \ + lib/misc/libmisc.a \ src/data/libdata.a \ src/libpspp/libpspp.a \ $(GTK_LIBS) \ diff --git a/src/ui/gui/psppire.c b/src/ui/gui/psppire.c index 239d153e..04677463 100644 --- a/src/ui/gui/psppire.c +++ b/src/ui/gui/psppire.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "relocatable.h" @@ -122,6 +123,9 @@ initialize (void) journal_enable (); textdomain (PACKAGE); + /* Ignore alarm clock signals */ + signal (SIGALRM, SIG_IGN); + new_data_window (NULL, NULL); } diff --git a/src/ui/terminal/automake.mk b/src/ui/terminal/automake.mk index 928716c8..0e3d295a 100644 --- a/src/ui/terminal/automake.mk +++ b/src/ui/terminal/automake.mk @@ -28,6 +28,7 @@ src_ui_terminal_pspp_LDADD = \ src/math/libpspp_math.a \ src/ui/libuicommon.a \ lib/linreg/liblinreg.a \ + lib/misc/libmisc.a \ src/data/libdata.a \ src/libpspp/libpspp.a \ $(LIBXML2_LIBS) \ diff --git a/src/ui/terminal/main.c b/src/ui/terminal/main.c index b8cabb3a..e91677b3 100644 --- a/src/ui/terminal/main.c +++ b/src/ui/terminal/main.c @@ -84,6 +84,7 @@ main (int argc, char **argv) signal (SIGABRT, bug_handler); signal (SIGSEGV, bug_handler); signal (SIGFPE, bug_handler); + signal (SIGALRM, SIG_IGN); at_fatal_signal (clean_up); i18n_init (); diff --git a/tests/automake.mk b/tests/automake.mk index 505c4335..00afa197 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -40,6 +40,7 @@ dist_TESTS = \ tests/command/n_of_cases.sh \ tests/command/npar-binomial.sh \ tests/command/npar-chisquare.sh \ + tests/command/npar-wilcoxon.sh \ tests/command/oneway.sh \ tests/command/oneway-missing.sh \ tests/command/oneway-with-splits.sh \ diff --git a/tests/command/npar-wilcoxon.sh b/tests/command/npar-wilcoxon.sh new file mode 100755 index 00000000..c4a5d825 --- /dev/null +++ b/tests/command/npar-wilcoxon.sh @@ -0,0 +1,174 @@ +#!/bin/sh + +# This program tests the wilcoxon subcommand of npar tests + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_srcdir and top_builddir are absolute +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +top_srcdir=`cd $top_srcdir; pwd` +top_builddir=`cd $top_builddir; pwd` + +PSPP=$top_builddir/src/ui/terminal/pspp + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +LANG=C +export LANG + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program 1" +cat > $TESTFILE << EOF +data list notable list /foo * bar * w *. +begin data. +1.00 1.00 1 +1.00 2.00 1 +2.00 1.00 1 +1.00 4.00 1 +2.00 5.00 1 +1.00 19.00 1 +2.00 7.00 1 +4.00 5.00 1 +1.00 12.00 1 +2.00 13.00 1 +2.00 2.00 1 +12.00 .00 2 +12.00 1.00 1 +13.00 1.00 1 +end data + +variable labels foo "first" bar "second". + +weight by w. + +npar test + /wilcoxon=foo with bar (paired) + /missing analysis + /method=exact. + +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program 1" +$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="generate results" +cat > $TEMPDIR/results.txt < $TESTFILE << EOF +data list notable list /foo * bar * dummy *. +begin data. +1.00 1.00 1 +1.00 2.00 1 +2.00 1.00 1 +1.00 4.00 . +2.00 5.00 . +1.00 19.00 . +2.00 7.00 1 +4.00 5.00 1 +1.00 12.00 1 +2.00 13.00 1 +2.00 2.00 1 +12.00 .00 1 +12.00 .00 1 +34.2 . 1 +12.00 1.00 1 +13.00 1.00 1 +end data + +variable labels foo "first" bar "second". + +npar test + /wilcoxon=foo with bar (paired) + /missing analysis + /method=exact. + +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program 2" +$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="compare output 2" +diff pspp.list $TEMPDIR/results.txt +if [ $? -ne 0 ] ; then fail ; fi + + + +pass; -- 2.30.2