From 5f53ffc9e8e493bb9a0959a9442d839f76fa39ce Mon Sep 17 00:00:00 2001
From: John Darrington <john@darrington.wattle.id.au>
Date: Sat, 30 Oct 2010 17:09:41 +0200
Subject: [PATCH] NPAR: Implemented the /KENDALL subcommand.

---
 doc/statistics.texi           | 18 +++++++
 src/language/stats/friedman.c | 90 ++++++++++++++++++++++-------------
 src/language/stats/friedman.h | 19 ++++++--
 src/language/stats/npar.c     | 58 ++++++++++++++++++++--
 tests/language/stats/npar.at  | 65 +++++++++++++++++++++++++
 5 files changed, 209 insertions(+), 41 deletions(-)

diff --git a/doc/statistics.texi b/doc/statistics.texi
index 9b4c3ed204..63c97b0607 100644
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -683,6 +683,7 @@ is used.
 * CHISQUARE::               Chisquare Test
 * COCHRAN::                 Cochran Q Test
 * FRIEDMAN::                Friedman Test
+* KENDALL::                 Kendall's W Test
 * KRUSKAL-WALLIS::          Kruskal-Wallis Test
 * MANN-WHITNEY::            Mann Whitney U Test
 * RUNS::                    Runs Test
@@ -796,6 +797,23 @@ The Friedman test is used to test for differences between repeated measures when
 
 A list of variables which contain the measured data must be given.  The procedure prints the sum of ranks for each variable, the test statistic and its significance.
 
+@node KENDALL
+@subsection Kendall's W Test
+@vindex KENDALL
+@cindex Kendall's W test
+@cindex coefficient of concordance
+
+@display
+     [ /KENDALL = varlist ]
+@end display
+
+The Kendall test investigates whether an arbitrary number of related samples come from the 
+same population.
+It is identical to the Friedman test except that the additional statistic W, Kendall's Coefficient of Concordance is printed.
+It has the range [0,1] --- a value of zero indicates no agreement between the samples whereas a value of
+unity indicates complete agreement.
+
+
 @node KRUSKAL-WALLIS
 @subsection Kruskal-Wallis Test
 @vindex KRUSKAL-WALLIS
diff --git a/src/language/stats/friedman.c b/src/language/stats/friedman.c
index 4a0201e6ac..9711167b1d 100644
--- a/src/language/stats/friedman.c
+++ b/src/language/stats/friedman.c
@@ -41,6 +41,7 @@ struct friedman
   double *rank_sum;
   double cc;
   double chi_sq;
+  double w;
   const struct dictionary *dict;
 };
 
@@ -50,7 +51,6 @@ static void show_ranks_box (const struct one_sample_test *ost,
 static void show_sig_box (const struct one_sample_test *ost,
 			  const struct friedman *fr);
 
-
 struct datum
 {
   long posn;
@@ -83,11 +83,11 @@ cmp_posn (const void *a_, const void *b_)
 
 void
 friedman_execute (const struct dataset *ds,
-	      struct casereader *input,
-	      enum mv_class exclude,
-	      const struct npar_test *test,
-	      bool exact UNUSED,
-	      double timer UNUSED)
+		  struct casereader *input,
+		  enum mv_class exclude,
+		  const struct npar_test *test,
+		  bool exact UNUSED,
+		  double timer UNUSED)
 {
   double numerator = 0.0;
   double denominator = 0.0;
@@ -96,17 +96,18 @@ friedman_execute (const struct dataset *ds,
   const struct dictionary *dict = dataset_dict (ds);
   const struct variable *weight = dict_get_weight (dict);
 
-  struct one_sample_test *ft = UP_CAST (test, struct one_sample_test, parent);
+  struct one_sample_test *ost = UP_CAST (test, struct one_sample_test, parent);
+  struct friedman_test *ft = UP_CAST (ost, struct friedman_test, parent);
   bool warn = true;
 
   double sigma_t = 0.0;	
-  struct datum *row = xcalloc (ft->n_vars, sizeof *row);
-
+  struct datum *row = xcalloc (ost->n_vars, sizeof *row);
+  double rsq;
   struct friedman fr;
-  fr.rank_sum = xcalloc (ft->n_vars, sizeof *fr.rank_sum);
+  fr.rank_sum = xcalloc (ost->n_vars, sizeof *fr.rank_sum);
   fr.cc = 0.0;
   fr.dict = dict;
-  for (v = 0; v < ft->n_vars; ++v)
+  for (v = 0; v < ost->n_vars; ++v)
     {
       row[v].posn = v;
       fr.rank_sum[v] = 0.0;
@@ -114,7 +115,7 @@ friedman_execute (const struct dataset *ds,
 
   input = casereader_create_filter_weight (input, dict, &warn, NULL);
   input = casereader_create_filter_missing (input,
-					    ft->vars, ft->n_vars,
+					    ost->vars, ost->n_vars,
 					    exclude, 0, 0);
 
   for (; (c = casereader_read (input)); case_unref (c))
@@ -126,15 +127,15 @@ friedman_execute (const struct dataset *ds,
 
       fr.cc += w;
 
-      for (v = 0; v < ft->n_vars; ++v)
+      for (v = 0; v < ost->n_vars; ++v)
 	{
-	  const struct variable *var = ft->vars[v];
+	  const struct variable *var = ost->vars[v];
 	  const union value *val = case_data (c, var);
 	  row[v].x = val->f;
 	}
 
-      qsort (row, ft->n_vars, sizeof *row, cmp_x);
-      for (v = 0; v < ft->n_vars; ++v)
+      qsort (row, ost->n_vars, sizeof *row, cmp_x);
+      for (v = 0; v < ost->n_vars; ++v)
 	{
 	  double x = row[v].x;
 	  /* Replace value by the Rank */
@@ -169,30 +170,41 @@ friedman_execute (const struct dataset *ds,
 	  sigma_t += w * (pow3 (t) - t );
 	}
 
-      qsort (row, ft->n_vars, sizeof *row, cmp_posn);
+      qsort (row, ost->n_vars, sizeof *row, cmp_posn);
 
-      for (v = 0; v < ft->n_vars; ++v)
+      for (v = 0; v < ost->n_vars; ++v)
 	fr.rank_sum[v] += row[v].x * w;
     }
   casereader_destroy (input);
   free (row);
 
 
-  for (v = 0; v < ft->n_vars; ++v)
+  for (v = 0; v < ost->n_vars; ++v)
     {
       numerator += pow2 (fr.rank_sum[v]);
     }
 
-  numerator *= 12.0 / (fr.cc * ft->n_vars * ( ft->n_vars + 1));
-  numerator -= 3 * fr.cc * ( ft->n_vars + 1);
+  rsq = numerator;
+
+  numerator *= 12.0 / (fr.cc * ost->n_vars * ( ost->n_vars + 1));
+  numerator -= 3 * fr.cc * ( ost->n_vars + 1);
 
-  denominator = 1 - sigma_t / ( fr.cc * ft->n_vars * ( pow2 (ft->n_vars) - 1));
+  denominator = 1 - sigma_t / ( fr.cc * ost->n_vars * ( pow2 (ost->n_vars) - 1));
 
   fr.chi_sq = numerator / denominator;
 
-  show_ranks_box (ft, &fr);
+  if ( ft->kendalls_w)
+    {
+      fr.w = 12 * rsq ;
+      fr.w -= 3 * pow2 (fr.cc) *
+	ost->n_vars * pow2 (ost->n_vars + 1);
 
-  show_sig_box (ft, &fr);
+      fr.w /= pow2 (fr.cc) * (pow3 (ost->n_vars) - ost->n_vars)
+	- fr.cc * sigma_t;
+    }
+
+  show_ranks_box (ost, &fr);
+  show_sig_box (ost, &fr);
 
   free (fr.rank_sum);
 }
@@ -244,28 +256,35 @@ show_ranks_box (const struct one_sample_test *ost, const struct friedman *fr)
 static void
 show_sig_box (const struct one_sample_test *ost, const struct friedman *fr)
 {
+  const struct friedman_test *ft = UP_CAST (ost, const struct friedman_test, parent);
+  
+  int row = 0;
   const struct variable *weight = dict_get_weight (fr->dict);
   const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
 
   const int row_headers = 1;
   const int column_headers = 0;
   struct tab_table *table =
-    tab_create (row_headers + 1, column_headers + 4);
+    tab_create (row_headers + 1, column_headers + (ft->kendalls_w ? 5 : 4));
 
   tab_headers (table, row_headers, 0, column_headers, 0);
 
   tab_title (table, _("Test Statistics"));
 
-  tab_text (table,  0, column_headers,
+  tab_text (table,  0, column_headers + row++,
 	    TAT_TITLE | TAB_LEFT , _("N"));
 
-  tab_text (table,  0, 1 + column_headers,
+  if ( ft->kendalls_w)
+    tab_text (table,  0, column_headers + row++,
+	      TAT_TITLE | TAB_LEFT , _("Kendall's W"));
+
+  tab_text (table,  0, column_headers + row++,
 	    TAT_TITLE | TAB_LEFT , _("Chi-Square"));
 
-  tab_text (table,  0, 2 + column_headers,
+  tab_text (table,  0, column_headers + row++,
 	    TAT_TITLE | TAB_LEFT, _("df"));
 
-  tab_text (table,  0, 3 + column_headers,
+  tab_text (table,  0, column_headers + row++,
 	    TAT_TITLE | TAB_LEFT, _("Asymp. Sig."));
 
   /* Box around the table */
@@ -276,16 +295,21 @@ show_sig_box (const struct one_sample_test *ost, const struct friedman *fr)
   tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
   tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
 
-  tab_double (table, 1, column_headers, 
+  row = 0;
+  tab_double (table, 1, column_headers + row++, 
 	      0, fr->cc, wfmt);
 
-  tab_double (table, 1, column_headers + 1, 
+  if (ft->kendalls_w)
+    tab_double (table, 1, column_headers + row++, 
+		0, fr->w, 0);
+
+  tab_double (table, 1, column_headers + row++, 
 	      0, fr->chi_sq, 0);
 
-  tab_double (table, 1, column_headers + 2, 
+  tab_double (table, 1, column_headers + row++, 
 	      0, ost->n_vars - 1, &F_8_0);
 
-  tab_double (table, 1, column_headers + 3, 
+  tab_double (table, 1, column_headers + row++, 
 	      0, gsl_cdf_chisq_Q (fr->chi_sq, ost->n_vars - 1), 
 	      0);
 
diff --git a/src/language/stats/friedman.h b/src/language/stats/friedman.h
index 4d271d8547..22154c9cf5 100644
--- a/src/language/stats/friedman.h
+++ b/src/language/stats/friedman.h
@@ -23,12 +23,21 @@
 
 
 
+struct friedman_test
+{
+  struct one_sample_test parent;
+
+  /* Calculate and display the Kendall W statistic */
+  bool kendalls_w;
+};
+
+
 void friedman_execute (const struct dataset *ds,
-			struct casereader *input,
-                        enum mv_class exclude,
-			const struct npar_test *test,
-			bool,
-		   double);
+		       struct casereader *input,
+		       enum mv_class exclude,
+		       const struct npar_test *test,
+		       bool,
+		       double);
 
 
 #endif
diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c
index 6bac48de91..a36715e65e 100644
--- a/src/language/stats/npar.c
+++ b/src/language/stats/npar.c
@@ -86,6 +86,7 @@ struct cmd_npar_tests
     int sign;
     int runs;
     int friedman;
+    int kendall;
     int kruskal_wallis;
     int mann_whitney;
     int missing;
@@ -125,6 +126,7 @@ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *
 static int npar_binomial (struct lexer *, struct dataset *,  struct npar_specs *);
 static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *);
 static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *);
+static int npar_kendall (struct lexer *, struct dataset *, struct npar_specs *);
 static int npar_cochran (struct lexer *, struct dataset *, struct npar_specs *);
 static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *);
 static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
@@ -188,6 +190,22 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests
               NOT_REACHED ();
             }
 	}
+      else if (lex_match_hyphenated_word (lexer, "KENDALL"))
+	{
+          npt->kendall++;
+          switch (npar_kendall (lexer, ds, nps))
+            {
+            case 0:
+              goto lossage;
+            case 1:
+              break;
+            case 2:
+              lex_error (lexer, NULL);
+              goto lossage;
+            default:
+              NOT_REACHED ();
+            }
+	}
       else if (lex_match_hyphenated_word (lexer, "RUNS"))
 	{
           npt->runs++;
@@ -606,16 +624,49 @@ static int
 npar_friedman (struct lexer *lexer, struct dataset *ds,
 	       struct npar_specs *specs)
 {
-  struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); 
-  struct npar_test *nt = &ft->parent;
+  struct friedman_test *ft = pool_alloc (specs->pool, sizeof (*ft)); 
+  struct one_sample_test *ost = &ft->parent;
+  struct npar_test *nt = &ost->parent;
 
+  ft->kendalls_w = false;
   nt->execute = friedman_execute;
   nt->insert_variables = one_sample_insert_variables;
 
   lex_match (lexer, '=');
 
   if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
-				   &ft->vars, &ft->n_vars,
+				   &ost->vars, &ost->n_vars,
+				   PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
+    {
+      return 2;
+    }
+
+  specs->n_tests++;
+  specs->test = pool_realloc (specs->pool,
+			      specs->test,
+			      sizeof (*specs->test) * specs->n_tests);
+
+  specs->test[specs->n_tests - 1] = nt;
+
+  return 1;
+}
+
+static int
+npar_kendall (struct lexer *lexer, struct dataset *ds,
+	       struct npar_specs *specs)
+{
+  struct friedman_test *kt = pool_alloc (specs->pool, sizeof (*kt)); 
+  struct one_sample_test *ost = &kt->parent;
+  struct npar_test *nt = &ost->parent;
+
+  kt->kendalls_w = true;
+  nt->execute = friedman_execute;
+  nt->insert_variables = one_sample_insert_variables;
+
+  lex_match (lexer, '=');
+
+  if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
+				   &ost->vars, &ost->n_vars,
 				   PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
     {
       return 2;
@@ -631,6 +682,7 @@ npar_friedman (struct lexer *lexer, struct dataset *ds,
   return 1;
 }
 
+
 static int
 npar_cochran (struct lexer *lexer, struct dataset *ds,
 	       struct npar_specs *specs)
diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at
index 02cc99d289..dd8ddf29e3 100644
--- a/tests/language/stats/npar.at
+++ b/tests/language/stats/npar.at
@@ -1026,3 +1026,68 @@ Asymp. Sig.,.047
 ])
 
 AT_CLEANUP
+
+
+
+AT_SETUP([NPAR TESTS Kendall])
+AT_DATA([npar-kendall.sps], [dnl
+SET FORMAT F14.3.
+
+data list notable list /v1 * v2 * v3
+begin data.
+ 7  7  2 
+ 5  6  5 
+ 8  6  4 
+ 5  7  4 
+ 5  4  4 
+ 8  6  5 
+ 6  3  5 
+ 7  6  5 
+ 8  5  5
+ .  2  2 
+ 5  4  5 
+ 3  4  4 
+ 5  1  2 
+ 5  2  1 
+ 7  6  5 
+ 6  3  4 
+ 6  6  6 
+ 5  4  5 
+ 4  3  4 
+ 9  1  1 
+ 6  2  1 
+ 3  7  8 
+ 6  3  4 
+ 4  4  4 
+ 5  4  3 
+ 6  5  2 
+ 4  4  8 
+ 4  6  4 
+ 6  5  5 
+ 7  8  6 
+ 5  3  5 
+end data.
+
+npar tests
+	/kendall = all
+	.
+])
+
+AT_CHECK([pspp -o pspp.csv npar-kendall.sps])
+
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Ranks
+,Mean Rank
+v1,2.500
+v2,1.817
+v3,1.683
+
+Table: Test Statistics
+N,30
+Kendall's W,.233
+Chi-Square,13.960
+df,2
+Asymp. Sig.,.001
+])
+
+AT_CLEANUP
-- 
2.30.2