NPAR: Implemented the /KENDALL subcommand.

author John Darrington <john@darrington.wattle.id.au>

Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)
author John Darrington <john@darrington.wattle.id.au>
Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 9b4c3ed204cac599b5d6ede2b87c2c822cb291e2..63c97b06078e564dcc0e94cb55cef35b05ee641a 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -683,6 +683,7 @@ is used.
  * CHISQUARE::               Chisquare Test
  * COCHRAN::                 Cochran Q Test
  * FRIEDMAN::                Friedman Test
+* KENDALL::                 Kendall's W Test
  * KRUSKAL-WALLIS::          Kruskal-Wallis Test
  * MANN-WHITNEY::            Mann Whitney U Test
  * RUNS::                    Runs Test
@@ -796,6 +797,23 @@ The Friedman test is used to test for differences between repeated measures when
  
  A list of variables which contain the measured data must be given.  The procedure prints the sum of ranks for each variable, the test statistic and its significance.
  
+@node KENDALL
+@subsection Kendall's W Test
+@vindex KENDALL
+@cindex Kendall's W test
+@cindex coefficient of concordance
+
+@display
+     [ /KENDALL = varlist ]
+@end display
+
+The Kendall test investigates whether an arbitrary number of related samples come from the 
+same population.
+It is identical to the Friedman test except that the additional statistic W, Kendall's Coefficient of Concordance is printed.
+It has the range [0,1] --- a value of zero indicates no agreement between the samples whereas a value of
+unity indicates complete agreement.
+
+
  @node KRUSKAL-WALLIS
  @subsection Kruskal-Wallis Test
  @vindex KRUSKAL-WALLIS
diff --git a/src/language/stats/friedman.c b/src/language/stats/friedman.c

index 4a0201e6ac112b3ecfbd5bbbfc0b856424939a71..9711167b1d3a0ef8fea3d8600c614f4375873ea5 100644 (file)
--- a/src/language/stats/friedman.c
+++ b/src/language/stats/friedman.c
@@ -41,6 +41,7 @@ struct friedman
    double *rank_sum;
    double cc;
    double chi_sq;
+  double w;
    const struct dictionary *dict;
  };
  
@@ -50,7 +51,6 @@ static void show_ranks_box (const struct one_sample_test *ost,
  static void show_sig_box (const struct one_sample_test *ost,
                           const struct friedman *fr);
  
-
  struct datum
  {
    long posn;
@@ -83,11 +83,11 @@ cmp_posn (const void *a_, const void *b_)
  
  void
  friedman_execute (const struct dataset *ds,
-             struct casereader *input,
-             enum mv_class exclude,
-             const struct npar_test *test,
-             bool exact UNUSED,
-             double timer UNUSED)
+                 struct casereader *input,
+                 enum mv_class exclude,
+                 const struct npar_test *test,
+                 bool exact UNUSED,
+                 double timer UNUSED)
  {
    double numerator = 0.0;
    double denominator = 0.0;
@@ -96,17 +96,18 @@ friedman_execute (const struct dataset *ds,
    const struct dictionary *dict = dataset_dict (ds);
    const struct variable *weight = dict_get_weight (dict);
  
-  struct one_sample_test *ft = UP_CAST (test, struct one_sample_test, parent);
+  struct one_sample_test *ost = UP_CAST (test, struct one_sample_test, parent);
+  struct friedman_test *ft = UP_CAST (ost, struct friedman_test, parent);
    bool warn = true;
  
    double sigma_t = 0.0;        
-  struct datum *row = xcalloc (ft->n_vars, sizeof *row);
-
+  struct datum *row = xcalloc (ost->n_vars, sizeof *row);
+  double rsq;
    struct friedman fr;
-  fr.rank_sum = xcalloc (ft->n_vars, sizeof *fr.rank_sum);
+  fr.rank_sum = xcalloc (ost->n_vars, sizeof *fr.rank_sum);
    fr.cc = 0.0;
    fr.dict = dict;
-  for (v = 0; v < ft->n_vars; ++v)
+  for (v = 0; v < ost->n_vars; ++v)
      {
        row[v].posn = v;
        fr.rank_sum[v] = 0.0;
@@ -114,7 +115,7 @@ friedman_execute (const struct dataset *ds,
  
    input = casereader_create_filter_weight (input, dict, &warn, NULL);
    input = casereader_create_filter_missing (input,
-                                           ft->vars, ft->n_vars,
+                                           ost->vars, ost->n_vars,
                                             exclude, 0, 0);
  
    for (; (c = casereader_read (input)); case_unref (c))
@@ -126,15 +127,15 @@ friedman_execute (const struct dataset *ds,
  
        fr.cc += w;
  
-      for (v = 0; v < ft->n_vars; ++v)
+      for (v = 0; v < ost->n_vars; ++v)
         {
-         const struct variable *var = ft->vars[v];
+         const struct variable *var = ost->vars[v];
           const union value *val = case_data (c, var);
           row[v].x = val->f;
         }
  
-      qsort (row, ft->n_vars, sizeof *row, cmp_x);
-      for (v = 0; v < ft->n_vars; ++v)
+      qsort (row, ost->n_vars, sizeof *row, cmp_x);
+      for (v = 0; v < ost->n_vars; ++v)
         {
           double x = row[v].x;
           /* Replace value by the Rank */
@@ -169,30 +170,41 @@ friedman_execute (const struct dataset *ds,
           sigma_t += w * (pow3 (t) - t );
         }
  
-      qsort (row, ft->n_vars, sizeof *row, cmp_posn);
+      qsort (row, ost->n_vars, sizeof *row, cmp_posn);
  
-      for (v = 0; v < ft->n_vars; ++v)
+      for (v = 0; v < ost->n_vars; ++v)
         fr.rank_sum[v] += row[v].x * w;
      }
    casereader_destroy (input);
    free (row);
  
  
-  for (v = 0; v < ft->n_vars; ++v)
+  for (v = 0; v < ost->n_vars; ++v)
      {
        numerator += pow2 (fr.rank_sum[v]);
      }
  
-  numerator *= 12.0 / (fr.cc * ft->n_vars * ( ft->n_vars + 1));
-  numerator -= 3 * fr.cc * ( ft->n_vars + 1);
+  rsq = numerator;
+
+  numerator *= 12.0 / (fr.cc * ost->n_vars * ( ost->n_vars + 1));
+  numerator -= 3 * fr.cc * ( ost->n_vars + 1);
  
-  denominator = 1 - sigma_t / ( fr.cc * ft->n_vars * ( pow2 (ft->n_vars) - 1));
+  denominator = 1 - sigma_t / ( fr.cc * ost->n_vars * ( pow2 (ost->n_vars) - 1));
  
    fr.chi_sq = numerator / denominator;
  
-  show_ranks_box (ft, &fr);
+  if ( ft->kendalls_w)
+    {
+      fr.w = 12 * rsq ;
+      fr.w -= 3 * pow2 (fr.cc) *
+       ost->n_vars * pow2 (ost->n_vars + 1);
  
-  show_sig_box (ft, &fr);
+      fr.w /= pow2 (fr.cc) * (pow3 (ost->n_vars) - ost->n_vars)
+       - fr.cc * sigma_t;
+    }
+
+  show_ranks_box (ost, &fr);
+  show_sig_box (ost, &fr);
  
    free (fr.rank_sum);
  }
@@ -244,28 +256,35 @@ show_ranks_box (const struct one_sample_test *ost, const struct friedman *fr)
  static void
  show_sig_box (const struct one_sample_test *ost, const struct friedman *fr)
  {
+  const struct friedman_test *ft = UP_CAST (ost, const struct friedman_test, parent);
+  
+  int row = 0;
    const struct variable *weight = dict_get_weight (fr->dict);
    const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
  
    const int row_headers = 1;
    const int column_headers = 0;
    struct tab_table *table =
-    tab_create (row_headers + 1, column_headers + 4);
+    tab_create (row_headers + 1, column_headers + (ft->kendalls_w ? 5 : 4));
  
    tab_headers (table, row_headers, 0, column_headers, 0);
  
    tab_title (table, _("Test Statistics"));
  
-  tab_text (table,  0, column_headers,
+  tab_text (table,  0, column_headers + row++,
             TAT_TITLE | TAB_LEFT , _("N"));
  
-  tab_text (table,  0, 1 + column_headers,
+  if ( ft->kendalls_w)
+    tab_text (table,  0, column_headers + row++,
+             TAT_TITLE | TAB_LEFT , _("Kendall's W"));
+
+  tab_text (table,  0, column_headers + row++,
             TAT_TITLE | TAB_LEFT , _("Chi-Square"));
  
-  tab_text (table,  0, 2 + column_headers,
+  tab_text (table,  0, column_headers + row++,
             TAT_TITLE | TAB_LEFT, _("df"));
  
-  tab_text (table,  0, 3 + column_headers,
+  tab_text (table,  0, column_headers + row++,
             TAT_TITLE | TAB_LEFT, _("Asymp. Sig."));
  
    /* Box around the table */
@@ -276,16 +295,21 @@ show_sig_box (const struct one_sample_test *ost, const struct friedman *fr)
    tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
    tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
  
-  tab_double (table, 1, column_headers, 
+  row = 0;
+  tab_double (table, 1, column_headers + row++, 
               0, fr->cc, wfmt);
  
-  tab_double (table, 1, column_headers + 1, 
+  if (ft->kendalls_w)
+    tab_double (table, 1, column_headers + row++, 
+               0, fr->w, 0);
+
+  tab_double (table, 1, column_headers + row++, 
               0, fr->chi_sq, 0);
  
-  tab_double (table, 1, column_headers + 2, 
+  tab_double (table, 1, column_headers + row++, 
               0, ost->n_vars - 1, &F_8_0);
  
-  tab_double (table, 1, column_headers + 3, 
+  tab_double (table, 1, column_headers + row++, 
               0, gsl_cdf_chisq_Q (fr->chi_sq, ost->n_vars - 1), 
               0);
  
diff --git a/src/language/stats/friedman.h b/src/language/stats/friedman.h

index 4d271d8547f37dfd556698ef790b6a6c83539279..22154c9cf5d52a1f39cb0ca137d204dddb4fd93a 100644 (file)
--- a/src/language/stats/friedman.h
+++ b/src/language/stats/friedman.h
@@ -23,12 +23,21 @@
  
  
  
+struct friedman_test
+{
+  struct one_sample_test parent;
+
+  /* Calculate and display the Kendall W statistic */
+  bool kendalls_w;
+};
+
+
  void friedman_execute (const struct dataset *ds,
-                       struct casereader *input,
-                        enum mv_class exclude,
-                       const struct npar_test *test,
-                       bool,
-                  double);
+                      struct casereader *input,
+                      enum mv_class exclude,
+                      const struct npar_test *test,
+                      bool,
+                      double);
  
  
  #endif
diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c

index 6bac48de913974ef42635aa78d7c4e2a427dc684..a36715e65e4d7ee96cd8a940ce95db38708c5bc1 100644 (file)
--- a/src/language/stats/npar.c
+++ b/src/language/stats/npar.c
@@ -86,6 +86,7 @@ struct cmd_npar_tests
      int sign;
      int runs;
      int friedman;
+    int kendall;
      int kruskal_wallis;
      int mann_whitney;
      int missing;
@@ -125,6 +126,7 @@ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *
  static int npar_binomial (struct lexer *, struct dataset *,  struct npar_specs *);
  static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *);
+static int npar_kendall (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_cochran (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
@@ -188,6 +190,22 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests
                NOT_REACHED ();
              }
         }
+      else if (lex_match_hyphenated_word (lexer, "KENDALL"))
+       {
+          npt->kendall++;
+          switch (npar_kendall (lexer, ds, nps))
+            {
+            case 0:
+              goto lossage;
+            case 1:
+              break;
+            case 2:
+              lex_error (lexer, NULL);
+              goto lossage;
+            default:
+              NOT_REACHED ();
+            }
+       }
        else if (lex_match_hyphenated_word (lexer, "RUNS"))
         {
            npt->runs++;
@@ -606,16 +624,49 @@ static int
  npar_friedman (struct lexer *lexer, struct dataset *ds,
                struct npar_specs *specs)
  {
-  struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); 
-  struct npar_test *nt = &ft->parent;
+  struct friedman_test *ft = pool_alloc (specs->pool, sizeof (*ft)); 
+  struct one_sample_test *ost = &ft->parent;
+  struct npar_test *nt = &ost->parent;
  
+  ft->kendalls_w = false;
    nt->execute = friedman_execute;
    nt->insert_variables = one_sample_insert_variables;
  
    lex_match (lexer, '=');
  
    if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
-                                  &ft->vars, &ft->n_vars,
+                                  &ost->vars, &ost->n_vars,
+                                  PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
+    {
+      return 2;
+    }
+
+  specs->n_tests++;
+  specs->test = pool_realloc (specs->pool,
+                             specs->test,
+                             sizeof (*specs->test) * specs->n_tests);
+
+  specs->test[specs->n_tests - 1] = nt;
+
+  return 1;
+}
+
+static int
+npar_kendall (struct lexer *lexer, struct dataset *ds,
+              struct npar_specs *specs)
+{
+  struct friedman_test *kt = pool_alloc (specs->pool, sizeof (*kt)); 
+  struct one_sample_test *ost = &kt->parent;
+  struct npar_test *nt = &ost->parent;
+
+  kt->kendalls_w = true;
+  nt->execute = friedman_execute;
+  nt->insert_variables = one_sample_insert_variables;
+
+  lex_match (lexer, '=');
+
+  if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
+                                  &ost->vars, &ost->n_vars,
                                    PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
      {
        return 2;
@@ -631,6 +682,7 @@ npar_friedman (struct lexer *lexer, struct dataset *ds,
    return 1;
  }
  
+
  static int
  npar_cochran (struct lexer *lexer, struct dataset *ds,
                struct npar_specs *specs)
diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at

index 02cc99d2899e0e918b6c2c7c27e25a46e62336cc..dd8ddf29e387753ccecab24e39b12b9b3bbadd6e 100644 (file)
--- a/tests/language/stats/npar.at
+++ b/tests/language/stats/npar.at
@@ -1026,3 +1026,68 @@ Asymp. Sig.,.047
  ])
  
  AT_CLEANUP
+
+
+
+AT_SETUP([NPAR TESTS Kendall])
+AT_DATA([npar-kendall.sps], [dnl
+SET FORMAT F14.3.
+
+data list notable list /v1 * v2 * v3
+begin data.
+ 7  7  2 
+ 5  6  5 
+ 8  6  4 
+ 5  7  4 
+ 5  4  4 
+ 8  6  5 
+ 6  3  5 
+ 7  6  5 
+ 8  5  5
+ .  2  2 
+ 5  4  5 
+ 3  4  4 
+ 5  1  2 
+ 5  2  1 
+ 7  6  5 
+ 6  3  4 
+ 6  6  6 
+ 5  4  5 
+ 4  3  4 
+ 9  1  1 
+ 6  2  1 
+ 3  7  8 
+ 6  3  4 
+ 4  4  4 
+ 5  4  3 
+ 6  5  2 
+ 4  4  8 
+ 4  6  4 
+ 6  5  5 
+ 7  8  6 
+ 5  3  5 
+end data.
+
+npar tests
+       /kendall = all
+       .
+])
+
+AT_CHECK([pspp -o pspp.csv npar-kendall.sps])
+
+AT_CHECK([cat pspp.csv], [0], [dnl
+Table: Ranks
+,Mean Rank
+v1,2.500
+v2,1.817
+v3,1.683
+
+Table: Test Statistics
+N,30
+Kendall's W,.233
+Chi-Square,13.960
+df,2
+Asymp. Sig.,.001
+])
+
+AT_CLEANUP
author	John Darrington <john@darrington.wattle.id.au>
	Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Sat, 30 Oct 2010 15:09:41 +0000 (17:09 +0200)
doc/statistics.texi		patch \| blob \| history
src/language/stats/friedman.c		patch \| blob \| history
src/language/stats/friedman.h		patch \| blob \| history
src/language/stats/npar.c		patch \| blob \| history
tests/language/stats/npar.at		patch \| blob \| history