From a5f512f378fbab43da8496d366dedcf59f29f580 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 9 Apr 2009 15:58:58 +0800 Subject: [PATCH] Implemented the sign test. Added an implementation of the SIGN test for the NPAR TESTS command. Closes patch #6801 --- doc/statistics.texi | 44 +++++-- src/language/stats/automake.mk | 4 +- src/language/stats/npar.q | 4 +- src/language/stats/sign.c | 225 +++++++++++++++++++++++++++++++++ src/language/stats/sign.h | 35 +++++ tests/automake.mk | 1 + tests/command/npar-sign.sh | 117 +++++++++++++++++ 7 files changed, 418 insertions(+), 12 deletions(-) create mode 100644 src/language/stats/sign.c create mode 100644 src/language/stats/sign.h create mode 100755 tests/command/npar-sign.sh diff --git a/doc/statistics.texi b/doc/statistics.texi index 98556060..9cf68498 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -532,6 +532,7 @@ is used. * BINOMIAL:: Binomial Test * CHISQUARE:: Chisquare Test * WILCOXON:: Wilcoxon Signed Ranks Test +* SIGN:: The Sign Test @end menu @@ -544,7 +545,7 @@ is used. [ /BINOMIAL[(p)]=var_list[(value1[, value2)] ] ] @end display -The binomial test compares the observed distribution of a dichotomous +The /BINOMIAL subcommand compares the observed distribution of a dichotomous variable with that of a binomial distribution. The variable @var{p} specifies the test proportion of the binomial distribution. @@ -584,7 +585,7 @@ even for very large sample sizes. @node CHISQUARE -@subsection Chisquare test +@subsection Chisquare Test @vindex CHISQUARE @cindex chisquare test @@ -594,7 +595,7 @@ even for very large sample sizes. @end display -The chisquare test produces a chi-square statistic for the differences +The /CHISQUARE subcommand produces a chi-square statistic for the differences between the expected and observed frequencies of the categories of a variable. Optionally, a range of values may appear after the variable list. If a range is given, then non integer values are truncated, and values @@ -612,7 +613,7 @@ If no /EXPECTED subcommand is given, then then equal frequencies are expected. @node WILCOXON -@subsection Wilcoxon +@subsection Wilcoxon Matched Pairs Signed Ranks Test @comment node-name, next, previous, up @vindex WILCOXON @cindex wilcoxon matched pairs signed ranks test @@ -621,9 +622,10 @@ are expected. [ /WILCOXON varlist [ WITH varlist [ (PAIRED) ]]] @end display -The wilcoxon subcommand tests for differences between means of the -variables listed. The test does not make any assumptions about the -variances of the samples. +The /WILCOXON subcommand tests for differences between medians of the +variables listed. +The test does not make any assumptions about the variances of the samples. +It does however assume that the distribution is symetrical. If the @code{WITH} keyword is omitted, then tests for all combinations of the listed variables are performed. @@ -637,8 +639,32 @@ If the @code{WITH} keyword is given, but the of variable preceding @code{WITH} against variable following @code{WITH} are performed. -If the number of observations is large, and exact tests have been -requested. then the test may take a very long time to complete. + +@node SIGN +@subsection Sign Test +@vindex SIGN +@cindex sign test + +@display + [ /SIGN varlist [ WITH varlist [ (PAIRED) ]]] +@end display + +The /SIGN subcommand tests for differences between medians of the +variables listed. +The test does not make any assumptions about the +distribution of the data. + +If the @code{WITH} keyword is omitted, then tests for all +combinations of the listed variables are performed. +If the @code{WITH} keyword is given, and the @code{(PAIRED)} keyword +is also given, then the number of variables preceding @code{WITH} +must be the same as the number following it. +In this case, tests for each respective pair of variables are +performed. +If the @code{WITH} keyword is given, but the +@code{(PAIRED)} keyword is omitted, then tests for each combination +of variable preceding @code{WITH} against variable following +@code{WITH} are performed. @node T-TEST @comment node-name, next, previous, up diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 3e9ab232..cb65e087 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -34,7 +34,9 @@ language_stats_sources = \ src/language/stats/npar-summary.c \ src/language/stats/npar-summary.h \ src/language/stats/wilcoxon.c \ - src/language/stats/wilcoxon.h + src/language/stats/wilcoxon.h \ + src/language/stats/sign.c \ + src/language/stats/sign.h all_q_sources += $(src_language_stats_built_sources:.c=.q) EXTRA_DIST += $(src_language_stats_built_sources:.c=.q) diff --git a/src/language/stats/npar.q b/src/language/stats/npar.q index 4caa112a..3234c4f7 100644 --- a/src/language/stats/npar.q +++ b/src/language/stats/npar.q @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -557,8 +558,7 @@ npar_custom_sign (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs = aux; struct two_sample_test *tp = pool_alloc(specs->pool, sizeof(*tp)); - ((struct npar_test *)tp)->execute = NULL; - + ((struct npar_test *) tp)->execute = sign_execute; if (!parse_two_sample_related_test (lexer, dataset_dict (ds), cmd, tp, specs->pool) ) diff --git a/src/language/stats/sign.c b/src/language/stats/sign.c new file mode 100644 index 00000000..b76d9e33 --- /dev/null +++ b/src/language/stats/sign.c @@ -0,0 +1,225 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "sign.h" + +#include + +#include +#include +#include +#include +#include +#include "npar.h" +#include +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +struct sign_test_params +{ + double pos; + double ties; + double neg; + + double one_tailed_sig; + double point_prob; +}; + + +static void +output_frequency_table (const struct two_sample_test *t2s, + const struct sign_test_params *param, + const struct dictionary *dict) +{ + int i; + struct tab_table *table = tab_create (3, 1 + 4 * t2s->n_pairs, 0); + + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Frequencies")); + + tab_headers (table, 2, 0, 1, 0); + + /* Vertical lines inside the box */ + tab_box (table, 0, 0, -1, TAL_1, + 1, 0, table->nc - 1, tab_nr (table) - 1 ); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + tab_text (table, 2, 0, TAB_CENTER, _("N")); + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + tab_text (table, 0, 1 + i * 4, TAB_LEFT, ds_cstr (&pair_name)); + + ds_destroy (&pair_name); + + tab_hline (table, TAL_1, 0, table->nc - 1, 1 + i * 4); + + tab_text (table, 1, 1 + i * 4, TAB_LEFT, _("Negative Differences")); + tab_text (table, 1, 2 + i * 4, TAB_LEFT, _("Positive Differences")); + tab_text (table, 1, 3 + i * 4, TAB_LEFT, _("Ties")); + tab_text (table, 1, 4 + i * 4, TAB_LEFT, _("Total")); + + tab_double (table, 2, 1 + i * 4, TAB_RIGHT, param[i].neg, wfmt); + tab_double (table, 2, 2 + i * 4, TAB_RIGHT, param[i].pos, wfmt); + tab_double (table, 2, 3 + i * 4, TAB_RIGHT, param[i].ties, wfmt); + tab_double (table, 2, 4 + i * 4, TAB_RIGHT, + param[i].ties + param[i].neg + param[i].pos, wfmt); + } + + tab_submit (table); +} + +static void +output_statistics_table (const struct two_sample_test *t2s, + const struct sign_test_params *param) +{ + int i; + struct tab_table *table = tab_create (1 + t2s->n_pairs, 4, 0); + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Test Statistics")); + + tab_headers (table, 0, 1, 0, 1); + + tab_hline (table, TAL_2, 0, table->nc - 1, 1); + tab_vline (table, TAL_2, 1, 0, table->nr - 1); + + + /* Vertical lines inside the box */ + tab_box (table, -1, -1, -1, TAL_1, + 0, 0, + table->nc - 1, tab_nr (table) - 1); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, + tab_nr (table) - 1); + + tab_text (table, 0, 1, TAT_TITLE | TAB_LEFT, + _("Exact Sig. (2-tailed)")); + + tab_text (table, 0, 2, TAT_TITLE | TAB_LEFT, + _("Exact Sig. (1-tailed)")); + + tab_text (table, 0, 3, TAT_TITLE | TAB_LEFT, + _("Point Probability")); + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + tab_text (table, 1 + i, 0, TAB_LEFT, ds_cstr (&pair_name)); + ds_destroy (&pair_name); + + tab_double (table, 1 + i, 1, TAB_RIGHT, + param[i].one_tailed_sig * 2, NULL); + + tab_double (table, 1 + i, 2, TAB_RIGHT, param[i].one_tailed_sig, NULL); + tab_double (table, 1 + i, 3, TAB_RIGHT, param[i].point_prob, NULL); + } + + tab_submit (table); +} + +void +sign_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) +{ + int i; + bool warn = true; + const struct dictionary *dict = dataset_dict (ds); + const struct two_sample_test *t2s = (const struct two_sample_test *) test; + struct ccase *c; + + struct sign_test_params *stp = xcalloc (sizeof *stp, t2s->n_pairs); + + struct casereader *r = input; + + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + const double weight = dict_get_case_weight (dict, c, &warn); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + variable_pair *vp = &t2s->pairs[i]; + const union value *value0 = case_data (c, (*vp)[0]); + const union value *value1 = case_data (c, (*vp)[1]); + const double diff = value0->f - value1->f; + + if (var_is_value_missing ((*vp)[0], value0, exclude)) + continue; + + if (var_is_value_missing ((*vp)[1], value1, exclude)) + continue; + + if ( diff > 0) + stp[i].pos += weight; + else if (diff < 0) + stp[i].neg += weight; + else + stp[i].ties += weight; + } + } + + casereader_destroy (r); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + int r = MIN (stp[i].pos, stp[i].neg); + stp[i].one_tailed_sig = gsl_cdf_binomial_P (r, + 0.5, + stp[i].pos + stp[i].neg); + + stp[i].point_prob = gsl_ran_binomial_pdf (r, 0.5, + stp[i].pos + stp[i].neg); + } + + output_frequency_table (t2s, stp, dict); + + output_statistics_table (t2s, stp); + + free (stp); +} diff --git a/src/language/stats/sign.h b/src/language/stats/sign.h new file mode 100644 index 00000000..1404e006 --- /dev/null +++ b/src/language/stats/sign.h @@ -0,0 +1,35 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#if !sign_h +#define sign_h 1 + + +#include +#include + +struct casereader; +struct dataset; +struct npar_test; + +void sign_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact, + double timer); + +#endif diff --git a/tests/automake.mk b/tests/automake.mk index 6db16ebc..2384f9d6 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -46,6 +46,7 @@ dist_TESTS = \ tests/command/npar-binomial.sh \ tests/command/npar-chisquare.sh \ tests/command/npar-wilcoxon.sh \ + tests/command/npar-sign.sh \ tests/command/oneway.sh \ tests/command/oneway-missing.sh \ tests/command/oneway-with-splits.sh \ diff --git a/tests/command/npar-sign.sh b/tests/command/npar-sign.sh new file mode 100755 index 00000000..17c0c239 --- /dev/null +++ b/tests/command/npar-sign.sh @@ -0,0 +1,117 @@ +#!/bin/sh + +# This program tests the SIGN subcommand of npar tests + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_srcdir and top_builddir are absolute +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +top_srcdir=`cd $top_srcdir; pwd` +top_builddir=`cd $top_builddir; pwd` + +PSPP=$top_builddir/src/ui/terminal/pspp + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +LANG=C +export LANG + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program 1" +cat > $TESTFILE << EOF +set format = F9.3. + +data list notable list /age * height rank *. +begin data. +10 12 11 +12 13 13 +13 14 12 +12 12 10 +9 9 10 +10.3 10.2 12 +end data. + +npar tests + /sign=age height WITH height rank (PAIRED) + /MISSING ANALYSIS + /METHOD=EXACT + . +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program 1" +$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare output 1" +diff - $TEMPDIR/pspp.list <