From 5f53ffc9e8e493bb9a0959a9442d839f76fa39ce Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 30 Oct 2010 17:09:41 +0200 Subject: [PATCH] NPAR: Implemented the /KENDALL subcommand. --- doc/statistics.texi | 18 +++++++ src/language/stats/friedman.c | 90 ++++++++++++++++++++++------------- src/language/stats/friedman.h | 19 ++++++-- src/language/stats/npar.c | 58 ++++++++++++++++++++-- tests/language/stats/npar.at | 65 +++++++++++++++++++++++++ 5 files changed, 209 insertions(+), 41 deletions(-) diff --git a/doc/statistics.texi b/doc/statistics.texi index 9b4c3ed204..63c97b0607 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -683,6 +683,7 @@ is used. * CHISQUARE:: Chisquare Test * COCHRAN:: Cochran Q Test * FRIEDMAN:: Friedman Test +* KENDALL:: Kendall's W Test * KRUSKAL-WALLIS:: Kruskal-Wallis Test * MANN-WHITNEY:: Mann Whitney U Test * RUNS:: Runs Test @@ -796,6 +797,23 @@ The Friedman test is used to test for differences between repeated measures when A list of variables which contain the measured data must be given. The procedure prints the sum of ranks for each variable, the test statistic and its significance. +@node KENDALL +@subsection Kendall's W Test +@vindex KENDALL +@cindex Kendall's W test +@cindex coefficient of concordance + +@display + [ /KENDALL = varlist ] +@end display + +The Kendall test investigates whether an arbitrary number of related samples come from the +same population. +It is identical to the Friedman test except that the additional statistic W, Kendall's Coefficient of Concordance is printed. +It has the range [0,1] --- a value of zero indicates no agreement between the samples whereas a value of +unity indicates complete agreement. + + @node KRUSKAL-WALLIS @subsection Kruskal-Wallis Test @vindex KRUSKAL-WALLIS diff --git a/src/language/stats/friedman.c b/src/language/stats/friedman.c index 4a0201e6ac..9711167b1d 100644 --- a/src/language/stats/friedman.c +++ b/src/language/stats/friedman.c @@ -41,6 +41,7 @@ struct friedman double *rank_sum; double cc; double chi_sq; + double w; const struct dictionary *dict; }; @@ -50,7 +51,6 @@ static void show_ranks_box (const struct one_sample_test *ost, static void show_sig_box (const struct one_sample_test *ost, const struct friedman *fr); - struct datum { long posn; @@ -83,11 +83,11 @@ cmp_posn (const void *a_, const void *b_) void friedman_execute (const struct dataset *ds, - struct casereader *input, - enum mv_class exclude, - const struct npar_test *test, - bool exact UNUSED, - double timer UNUSED) + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) { double numerator = 0.0; double denominator = 0.0; @@ -96,17 +96,18 @@ friedman_execute (const struct dataset *ds, const struct dictionary *dict = dataset_dict (ds); const struct variable *weight = dict_get_weight (dict); - struct one_sample_test *ft = UP_CAST (test, struct one_sample_test, parent); + struct one_sample_test *ost = UP_CAST (test, struct one_sample_test, parent); + struct friedman_test *ft = UP_CAST (ost, struct friedman_test, parent); bool warn = true; double sigma_t = 0.0; - struct datum *row = xcalloc (ft->n_vars, sizeof *row); - + struct datum *row = xcalloc (ost->n_vars, sizeof *row); + double rsq; struct friedman fr; - fr.rank_sum = xcalloc (ft->n_vars, sizeof *fr.rank_sum); + fr.rank_sum = xcalloc (ost->n_vars, sizeof *fr.rank_sum); fr.cc = 0.0; fr.dict = dict; - for (v = 0; v < ft->n_vars; ++v) + for (v = 0; v < ost->n_vars; ++v) { row[v].posn = v; fr.rank_sum[v] = 0.0; @@ -114,7 +115,7 @@ friedman_execute (const struct dataset *ds, input = casereader_create_filter_weight (input, dict, &warn, NULL); input = casereader_create_filter_missing (input, - ft->vars, ft->n_vars, + ost->vars, ost->n_vars, exclude, 0, 0); for (; (c = casereader_read (input)); case_unref (c)) @@ -126,15 +127,15 @@ friedman_execute (const struct dataset *ds, fr.cc += w; - for (v = 0; v < ft->n_vars; ++v) + for (v = 0; v < ost->n_vars; ++v) { - const struct variable *var = ft->vars[v]; + const struct variable *var = ost->vars[v]; const union value *val = case_data (c, var); row[v].x = val->f; } - qsort (row, ft->n_vars, sizeof *row, cmp_x); - for (v = 0; v < ft->n_vars; ++v) + qsort (row, ost->n_vars, sizeof *row, cmp_x); + for (v = 0; v < ost->n_vars; ++v) { double x = row[v].x; /* Replace value by the Rank */ @@ -169,30 +170,41 @@ friedman_execute (const struct dataset *ds, sigma_t += w * (pow3 (t) - t ); } - qsort (row, ft->n_vars, sizeof *row, cmp_posn); + qsort (row, ost->n_vars, sizeof *row, cmp_posn); - for (v = 0; v < ft->n_vars; ++v) + for (v = 0; v < ost->n_vars; ++v) fr.rank_sum[v] += row[v].x * w; } casereader_destroy (input); free (row); - for (v = 0; v < ft->n_vars; ++v) + for (v = 0; v < ost->n_vars; ++v) { numerator += pow2 (fr.rank_sum[v]); } - numerator *= 12.0 / (fr.cc * ft->n_vars * ( ft->n_vars + 1)); - numerator -= 3 * fr.cc * ( ft->n_vars + 1); + rsq = numerator; + + numerator *= 12.0 / (fr.cc * ost->n_vars * ( ost->n_vars + 1)); + numerator -= 3 * fr.cc * ( ost->n_vars + 1); - denominator = 1 - sigma_t / ( fr.cc * ft->n_vars * ( pow2 (ft->n_vars) - 1)); + denominator = 1 - sigma_t / ( fr.cc * ost->n_vars * ( pow2 (ost->n_vars) - 1)); fr.chi_sq = numerator / denominator; - show_ranks_box (ft, &fr); + if ( ft->kendalls_w) + { + fr.w = 12 * rsq ; + fr.w -= 3 * pow2 (fr.cc) * + ost->n_vars * pow2 (ost->n_vars + 1); - show_sig_box (ft, &fr); + fr.w /= pow2 (fr.cc) * (pow3 (ost->n_vars) - ost->n_vars) + - fr.cc * sigma_t; + } + + show_ranks_box (ost, &fr); + show_sig_box (ost, &fr); free (fr.rank_sum); } @@ -244,28 +256,35 @@ show_ranks_box (const struct one_sample_test *ost, const struct friedman *fr) static void show_sig_box (const struct one_sample_test *ost, const struct friedman *fr) { + const struct friedman_test *ft = UP_CAST (ost, const struct friedman_test, parent); + + int row = 0; const struct variable *weight = dict_get_weight (fr->dict); const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0; const int row_headers = 1; const int column_headers = 0; struct tab_table *table = - tab_create (row_headers + 1, column_headers + 4); + tab_create (row_headers + 1, column_headers + (ft->kendalls_w ? 5 : 4)); tab_headers (table, row_headers, 0, column_headers, 0); tab_title (table, _("Test Statistics")); - tab_text (table, 0, column_headers, + tab_text (table, 0, column_headers + row++, TAT_TITLE | TAB_LEFT , _("N")); - tab_text (table, 0, 1 + column_headers, + if ( ft->kendalls_w) + tab_text (table, 0, column_headers + row++, + TAT_TITLE | TAB_LEFT , _("Kendall's W")); + + tab_text (table, 0, column_headers + row++, TAT_TITLE | TAB_LEFT , _("Chi-Square")); - tab_text (table, 0, 2 + column_headers, + tab_text (table, 0, column_headers + row++, TAT_TITLE | TAB_LEFT, _("df")); - tab_text (table, 0, 3 + column_headers, + tab_text (table, 0, column_headers + row++, TAT_TITLE | TAB_LEFT, _("Asymp. Sig.")); /* Box around the table */ @@ -276,16 +295,21 @@ show_sig_box (const struct one_sample_test *ost, const struct friedman *fr) tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); - tab_double (table, 1, column_headers, + row = 0; + tab_double (table, 1, column_headers + row++, 0, fr->cc, wfmt); - tab_double (table, 1, column_headers + 1, + if (ft->kendalls_w) + tab_double (table, 1, column_headers + row++, + 0, fr->w, 0); + + tab_double (table, 1, column_headers + row++, 0, fr->chi_sq, 0); - tab_double (table, 1, column_headers + 2, + tab_double (table, 1, column_headers + row++, 0, ost->n_vars - 1, &F_8_0); - tab_double (table, 1, column_headers + 3, + tab_double (table, 1, column_headers + row++, 0, gsl_cdf_chisq_Q (fr->chi_sq, ost->n_vars - 1), 0); diff --git a/src/language/stats/friedman.h b/src/language/stats/friedman.h index 4d271d8547..22154c9cf5 100644 --- a/src/language/stats/friedman.h +++ b/src/language/stats/friedman.h @@ -23,12 +23,21 @@ +struct friedman_test +{ + struct one_sample_test parent; + + /* Calculate and display the Kendall W statistic */ + bool kendalls_w; +}; + + void friedman_execute (const struct dataset *ds, - struct casereader *input, - enum mv_class exclude, - const struct npar_test *test, - bool, - double); + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool, + double); #endif diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index 6bac48de91..a36715e65e 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -86,6 +86,7 @@ struct cmd_npar_tests int sign; int runs; int friedman; + int kendall; int kruskal_wallis; int mann_whitney; int missing; @@ -125,6 +126,7 @@ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs * static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *); static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *); static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_kendall (struct lexer *, struct dataset *, struct npar_specs *); static int npar_cochran (struct lexer *, struct dataset *, struct npar_specs *); static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *); static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *); @@ -188,6 +190,22 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } + else if (lex_match_hyphenated_word (lexer, "KENDALL")) + { + npt->kendall++; + switch (npar_kendall (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } else if (lex_match_hyphenated_word (lexer, "RUNS")) { npt->runs++; @@ -606,16 +624,49 @@ static int npar_friedman (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) { - struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); - struct npar_test *nt = &ft->parent; + struct friedman_test *ft = pool_alloc (specs->pool, sizeof (*ft)); + struct one_sample_test *ost = &ft->parent; + struct npar_test *nt = &ost->parent; + ft->kendalls_w = false; nt->execute = friedman_execute; nt->insert_variables = one_sample_insert_variables; lex_match (lexer, '='); if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), - &ft->vars, &ft->n_vars, + &ost->vars, &ost->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) + { + return 2; + } + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + +static int +npar_kendall (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct friedman_test *kt = pool_alloc (specs->pool, sizeof (*kt)); + struct one_sample_test *ost = &kt->parent; + struct npar_test *nt = &ost->parent; + + kt->kendalls_w = true; + nt->execute = friedman_execute; + nt->insert_variables = one_sample_insert_variables; + + lex_match (lexer, '='); + + if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &ost->vars, &ost->n_vars, PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) { return 2; @@ -631,6 +682,7 @@ npar_friedman (struct lexer *lexer, struct dataset *ds, return 1; } + static int npar_cochran (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at index 02cc99d289..dd8ddf29e3 100644 --- a/tests/language/stats/npar.at +++ b/tests/language/stats/npar.at @@ -1026,3 +1026,68 @@ Asymp. Sig.,.047 ]) AT_CLEANUP + + + +AT_SETUP([NPAR TESTS Kendall]) +AT_DATA([npar-kendall.sps], [dnl +SET FORMAT F14.3. + +data list notable list /v1 * v2 * v3 +begin data. + 7 7 2 + 5 6 5 + 8 6 4 + 5 7 4 + 5 4 4 + 8 6 5 + 6 3 5 + 7 6 5 + 8 5 5 + . 2 2 + 5 4 5 + 3 4 4 + 5 1 2 + 5 2 1 + 7 6 5 + 6 3 4 + 6 6 6 + 5 4 5 + 4 3 4 + 9 1 1 + 6 2 1 + 3 7 8 + 6 3 4 + 4 4 4 + 5 4 3 + 6 5 2 + 4 4 8 + 4 6 4 + 6 5 5 + 7 8 6 + 5 3 5 +end data. + +npar tests + /kendall = all + . +]) + +AT_CHECK([pspp -o pspp.csv npar-kendall.sps]) + +AT_CHECK([cat pspp.csv], [0], [dnl +Table: Ranks +,Mean Rank +v1,2.500 +v2,1.817 +v3,1.683 + +Table: Test Statistics +N,30 +Kendall's W,.233 +Chi-Square,13.960 +df,2 +Asymp. Sig.,.001 +]) + +AT_CLEANUP -- 2.30.2