Added an implementation of the median test

author John Darrington <john@darrington.wattle.id.au>

Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
author John Darrington <john@darrington.wattle.id.au>
Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index b1121b498c24af28ddda49d8163d00e45673c355..18cc79e7ace82bdd38e55b4908aaf64a5e8a47b3 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -691,6 +691,7 @@ is used.
  * KRUSKAL-WALLIS::          Kruskal-Wallis Test
  * MANN-WHITNEY::            Mann Whitney U Test
  * MCNEMAR::                 McNemar Test
  * KRUSKAL-WALLIS::          Kruskal-Wallis Test
  * MANN-WHITNEY::            Mann Whitney U Test
  * MCNEMAR::                 McNemar Test
+* MEDIAN::                  Median Test
  * RUNS::                    Runs Test
  * SIGN::                    The Sign Test
  * WILCOXON::                Wilcoxon Signed Ranks Test
  * RUNS::                    Runs Test
  * SIGN::                    The Sign Test
  * WILCOXON::                Wilcoxon Signed Ranks Test
@@ -931,6 +932,31 @@ The data in each variable must be dichotomous.  If there are more
  than two distinct variables an error will occur and the test will
  not be run.
  
  than two distinct variables an error will occur and the test will
  not be run.
  
+@node MEDIAN
+@subsection Median Test
+@vindex MEDIAN
+@cindex Median test
+
+@display
+     [ /MEDIAN [(value)] = varlist BY variable (value1, value2) ]
+@end display
+
+The median test is used to test whether independent samples come from 
+populations with a common median.
+The median of the populations against which the samples are to be tested
+may be given in parentheses immediately after the 
+/MEDIAN subcommand.  If it is not given, the median will be imputed from the 
+union of all the samples.
+
+The variables of the samples to be tested should immediately follow the @samp{=} sign. The
+keyword @code{BY} must come next, and then the grouping variable.  Two values
+in parentheses should follow.  If the first value is greater than the second,
+then a 2 sample test is performed using these two values to determine the groups.
+If however, the first variable is less than the second, then a @i{k} sample test is
+conducted and the group values used are all values encountered which lie in the
+range [@var{value1},@var{value2}].
+
+
  @node RUNS
  @subsection Runs Test
  @vindex RUNS
  @node RUNS
  @subsection Runs Test
  @vindex RUNS
diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk

index 4b4510e96e3d58a7b6d66f793265ea23f9b82808..b44ebb2d38eb9dcaa53286a80a8e116413729409 100644 (file)
--- a/src/language/stats/automake.mk
+++ b/src/language/stats/automake.mk
@@ -36,6 +36,8 @@ language_stats_sources = \
         src/language/stats/mann-whitney.h \
         src/language/stats/mcnemar.c \
         src/language/stats/mcnemar.h \
         src/language/stats/mann-whitney.h \
         src/language/stats/mcnemar.c \
         src/language/stats/mcnemar.h \
+       src/language/stats/median.c \
+       src/language/stats/median.h \
         src/language/stats/npar.c  \
         src/language/stats/npar.h \
         src/language/stats/npar-summary.c \
         src/language/stats/npar.c  \
         src/language/stats/npar.h \
         src/language/stats/npar-summary.c \
diff --git a/src/language/stats/median.c b/src/language/stats/median.c

new file mode 100644 (file)

index 0000000..2db57ab
--- /dev/null
+++ b/src/language/stats/median.c
@@ -0,0 +1,461 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
+*/
+
+#include <config.h>
+#include "median.h"
+
+#include <gsl/gsl_cdf.h>
+
+#include "data/format.h"
+#include "libpspp/cast.h"
+
+#include "data/variable.h"
+#include "data/case.h"
+#include "data/dictionary.h"
+#include "data/dataset.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/subcase.h"
+
+
+#include "data/casereader.h"
+#include "math/percentiles.h"
+
+#include "math/sort.h"
+
+#include "libpspp/hmap.h"
+#include "libpspp/array.h"
+#include "libpspp/str.h"
+#include "data/value.h"
+#include "libpspp/misc.h"
+
+#include "output/tab.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct val_node
+{
+  struct hmap_node node;
+  union value val;
+  casenumber le;
+  casenumber gt;
+};
+
+struct results
+{
+  const struct variable *var;
+  struct val_node **sorted_array;
+  double n;
+  double median;
+  double chisq;  
+};
+
+
+
+static int 
+val_node_cmp_3way (const void *a_, const void *b_, const void *aux)
+{
+  const struct variable *indep_var = aux;
+  const struct val_node *const *a = a_;
+  const struct val_node *const *b = b_;
+
+  return value_compare_3way (&(*a)->val, &(*b)->val, var_get_width (indep_var));
+}
+
+static void 
+show_frequencies (const struct n_sample_test *nst, const struct results *results,  int n_vals, const struct dictionary *);
+
+static void 
+show_test_statistics (const struct n_sample_test *nst, const struct results *results, int, const struct dictionary *);
+
+
+static struct val_node *
+find_value (const struct hmap *map, const union value *val, 
+           const struct variable *var)
+{
+  struct val_node *foo = NULL;
+  size_t hash = value_hash (val, var_get_width (var), 0);
+  HMAP_FOR_EACH_WITH_HASH (foo, struct val_node, node, hash, map)
+    if (value_equal (val, &foo->val, var_get_width (var)))
+      break;
+
+  return foo;
+}
+
+void
+median_execute (const struct dataset *ds,
+               struct casereader *input,
+               enum mv_class exclude,
+               const struct npar_test *test,
+               bool exact UNUSED,
+               double timer UNUSED)
+{
+  const struct dictionary *dict = dataset_dict (ds);
+  const struct variable *wvar = dict_get_weight (dict);
+  bool warn = true;
+  int v;
+  const struct median_test *mt = UP_CAST (test, const struct median_test,
+                                         parent.parent);
+
+  const struct n_sample_test *nst = UP_CAST (test, const struct n_sample_test,
+                                         parent);
+
+  const bool n_sample_test = (value_compare_3way (&nst->val2, &nst->val1,
+                                      var_get_width (nst->indep_var)) > 0);
+
+  struct results *results = xcalloc (nst->n_vars, sizeof (*results));
+  int n_vals = 0;
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      double count = 0;
+      double cc = 0;
+      double median = mt->median;
+      const struct variable *var = nst->vars[v];
+      struct ccase *c;
+      struct hmap map = HMAP_INITIALIZER (map);
+      struct casereader *r = casereader_clone (input);
+
+
+
+      if (n_sample_test == false)
+       {
+         struct val_node *vn = xzalloc (sizeof *vn);
+         value_clone (&vn->val,  &nst->val1, var_get_width (nst->indep_var));
+         hmap_insert (&map, &vn->node, value_hash (&nst->val1,
+                                           var_get_width (nst->indep_var), 0));
+
+         vn = xzalloc (sizeof *vn);
+         value_clone (&vn->val,  &nst->val2, var_get_width (nst->indep_var));
+         hmap_insert (&map, &vn->node, value_hash (&nst->val2,
+                                           var_get_width (nst->indep_var), 0));
+       }
+
+      if ( median == SYSMIS)
+       {
+         struct percentile *ptl;
+         struct order_stats *os;
+
+         struct casereader *rr;
+         struct subcase sc;
+         struct casewriter *writer;
+         subcase_init_var (&sc, var, SC_ASCEND);
+         rr = casereader_clone (r);
+         writer = sort_create_writer (&sc, casereader_get_proto (rr));
+
+         for (; (c = casereader_read (rr)) != NULL; )
+           {
+             if ( var_is_value_missing (var, case_data (c, var), exclude))
+               {
+                 case_unref (c);
+                 continue;
+               }
+
+             cc += dict_get_case_weight (dict, c, &warn);
+             casewriter_write (writer, c);
+           }
+         subcase_destroy (&sc);
+         casereader_destroy (rr);
+
+         rr = casewriter_make_reader (writer);
+
+         ptl = percentile_create (0.5, cc);
+         os = &ptl->parent;
+           
+         order_stats_accumulate (&os, 1,
+                                 rr,
+                                 wvar,
+                                 var,
+                                 exclude);
+
+         median = percentile_calculate (ptl, PC_HAVERAGE);
+         statistic_destroy (&ptl->parent.parent);
+       }
+
+      results[v].median = median;
+      
+
+      for (; (c = casereader_read (r)) != NULL; case_unref (c))
+       {
+         struct val_node *vn ;
+         const double weight = dict_get_case_weight (dict, c, &warn);
+         const union value *val = case_data (c, var);
+         const union value *indep_val = case_data (c, nst->indep_var);
+
+         if ( var_is_value_missing (var, case_data (c, var), exclude))
+           {
+             case_unref (c);
+             continue;
+           }
+
+         if (n_sample_test)
+           {
+             int width = var_get_width (nst->indep_var);
+             /* Ignore out of range values */
+             if (
+                 value_compare_3way (indep_val, &nst->val1, width) < 0
+               ||
+                 value_compare_3way (indep_val, &nst->val2, width) > 0
+                  )
+               {
+                 case_unref (c);
+                 continue;
+               }
+           }
+
+         vn = find_value (&map, indep_val, nst->indep_var);
+         if ( vn == NULL)
+           {
+             if ( n_sample_test == true)
+               {
+                 int width = var_get_width (nst->indep_var);
+                 vn = xzalloc (sizeof *vn);
+                 value_clone (&vn->val,  indep_val, width);
+                 
+                 hmap_insert (&map, &vn->node, value_hash (indep_val, width, 0));
+               }
+             else
+               {
+                 continue;
+               }
+           }
+
+         if (val->f <= median)
+           vn->le += weight;
+         else
+           vn->gt += weight;
+
+         count += weight;
+       }
+      casereader_destroy (r);
+
+      {
+       int x = 0;
+       struct val_node *vn = NULL;
+       double r_0 = 0;
+       double r_1 = 0;
+       HMAP_FOR_EACH (vn, struct val_node, node, &map)
+         {
+           r_0 += vn->le;
+           r_1 += vn->gt;
+         }
+
+       results[v].n = count;
+       results[v].sorted_array = xcalloc (hmap_count (&map), sizeof (void*));
+       results[v].var = var;
+
+       HMAP_FOR_EACH (vn, struct val_node, node, &map)
+         {
+           double e_0j = r_0 * (vn->le + vn->gt) / count;
+           double e_1j = r_1 * (vn->le + vn->gt) / count;
+
+           results[v].chisq += pow2 (vn->le - e_0j) / e_0j;
+           results[v].chisq += pow2 (vn->gt - e_1j) / e_1j;
+
+           results[v].sorted_array[x++] = vn;
+         }
+
+       n_vals = x;
+       hmap_destroy (&map);
+
+       sort (results[v].sorted_array, x, sizeof (*results[v].sorted_array), 
+             val_node_cmp_3way, nst->indep_var);
+
+      }
+    }
+
+  casereader_destroy (input);
+
+  show_frequencies (nst, results,  n_vals, dict);
+  show_test_statistics (nst, results, n_vals, dict);
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      int i;
+      const struct results *rs = results + v;
+
+      for (i = 0; i < n_vals; ++i)
+       {
+         struct val_node *vn = rs->sorted_array[i];
+         value_destroy (&vn->val, var_get_width (nst->indep_var));
+         free (vn);
+       }
+      free (rs->sorted_array);
+    }
+  free (results);
+}
+
+
+
+static void 
+show_frequencies (const struct n_sample_test *nst, const struct results *results,  int n_vals, const struct dictionary *dict)
+{
+  const struct variable *weight = dict_get_weight (dict);
+  const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
+
+  int i;
+  int v;
+
+  const int row_headers = 2;
+  const int column_headers = 2;
+  const int nc = row_headers + n_vals;
+  const int nr = column_headers + nst->n_vars * 2;
+    
+  struct tab_table *table = tab_create (nc, nr);
+
+  tab_headers (table, row_headers, 0, column_headers, 0);
+
+  tab_title (table, _("Frequencies"));
+
+  /* Box around the table and vertical lines inside*/
+  tab_box (table, TAL_2, TAL_2, -1, TAL_1,
+          0,  0, tab_nc (table) - 1, tab_nr (table) - 1 );
+
+  tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
+  tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
+
+  tab_joint_text (table,
+                 row_headers, 0, row_headers + n_vals - 1, 0,
+                 TAT_TITLE | TAB_CENTER, var_to_string (nst->indep_var));
+
+
+  tab_hline (table, TAL_1, row_headers, tab_nc (table) - 1, 1);
+
+
+  for (i = 0; i < n_vals; ++i)
+    {
+      const struct results *rs = results + 0;
+      struct string label;
+      ds_init_empty (&label);
+
+      var_append_value_name (nst->indep_var, &rs->sorted_array[i]->val,
+                           &label);
+
+      tab_text (table, row_headers + i, 1,
+               TAT_TITLE | TAB_LEFT, ds_cstr (&label));
+  
+      ds_destroy (&label);
+    }
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      const struct results *rs = &results[v];
+      tab_text (table,  0, column_headers + v * 2,
+               TAT_TITLE | TAB_LEFT, var_to_string (rs->var) );
+
+      tab_text (table,  1, column_headers + v * 2,
+               TAT_TITLE | TAB_LEFT, _("> Median") );
+
+      tab_text (table,  1, column_headers + v * 2 + 1,
+               TAT_TITLE | TAB_LEFT, _("≤ Median") );
+
+      if ( v > 0)
+       tab_hline (table, TAL_1, 0, tab_nc (table) - 1, column_headers + v * 2);
+    }
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      int i;
+      const struct results *rs = &results[v];
+
+      for (i = 0; i < n_vals; ++i)
+       {
+         const struct val_node *vn = rs->sorted_array[i];
+         tab_double (table, row_headers + i, column_headers + v * 2,
+                     0, vn->gt, wfmt);
+
+         tab_double (table, row_headers + i, column_headers + v * 2 + 1,
+                   0, vn->le, wfmt);
+       }
+    }
+
+  tab_submit (table);
+}
+
+
+static void 
+show_test_statistics (const struct n_sample_test *nst,
+                     const struct results *results,
+                     int n_vals,
+                     const struct dictionary *dict)
+{
+  const struct variable *weight = dict_get_weight (dict);
+  const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
+
+  int v;
+
+  const int row_headers = 1;
+  const int column_headers = 1;
+  const int nc = row_headers + 5;
+  const int nr = column_headers + nst->n_vars;
+    
+  struct tab_table *table = tab_create (nc, nr);
+
+  tab_headers (table, row_headers, 0, column_headers, 0);
+
+  tab_title (table, _("Test Statistics"));
+
+
+  tab_box (table, TAL_2, TAL_2, -1, TAL_1,
+          0,  0, tab_nc (table) - 1, tab_nr (table) - 1 );
+
+  tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
+  tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
+
+  tab_text (table, row_headers + 0, 0,
+           TAT_TITLE | TAB_CENTER, _("N"));
+
+  tab_text (table, row_headers + 1, 0,
+           TAT_TITLE | TAB_CENTER, _("Median"));
+
+  tab_text (table, row_headers + 2, 0,
+           TAT_TITLE | TAB_CENTER, _("Chi-Square"));
+
+  tab_text (table, row_headers + 3, 0,
+           TAT_TITLE | TAB_CENTER, _("df"));
+
+  tab_text (table, row_headers + 4, 0,
+           TAT_TITLE | TAB_CENTER, _("Asymp. Sig."));
+
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      double df = n_vals - 1;
+      const struct results *rs = &results[v];
+      tab_text (table,  0, column_headers + v,
+               TAT_TITLE | TAB_LEFT, var_to_string (rs->var));
+
+
+      tab_double (table, row_headers + 0, column_headers + v,
+                 0, rs->n, wfmt);
+
+      tab_double (table, row_headers + 1, column_headers + v,
+                 0, rs->median, NULL);
+
+      tab_double (table, row_headers + 2, column_headers + v,
+                 0, rs->chisq, NULL);
+
+      tab_double (table, row_headers + 3, column_headers + v,
+                 0, df, wfmt);
+
+      tab_double (table, row_headers + 4, column_headers + v,
+                 0, gsl_cdf_chisq_Q (rs->chisq, df), NULL);
+    }
+  
+  tab_submit (table);
+}
diff --git a/src/language/stats/median.h b/src/language/stats/median.h

new file mode 100644 (file)

index 0000000..236984c
--- /dev/null
+++ b/src/language/stats/median.h
@@ -0,0 +1,41 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#if !median_h
+#define median_h 1
+
+#include <stddef.h>
+#include <stdbool.h>
+#include "language/stats/npar.h"
+
+struct median_test
+{
+  struct n_sample_test parent;
+  double median;
+};
+
+struct casereader;
+struct dataset;
+
+void median_execute (const struct dataset *ds,
+                      struct casereader *input,
+                      enum mv_class exclude,
+                      const struct npar_test *test,
+                      bool exact,
+                      double timer
+                      );
+
+#endif
diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c

index fda7b0c6b27c79453435e463abe72fda6efbc185..e0d50d4adecd9c33120400b7c5eb5377bccedbc3 100644 (file)
--- a/src/language/stats/npar.c
+++ b/src/language/stats/npar.c
@@ -40,6 +40,7 @@
  #include "language/stats/kruskal-wallis.h"
  #include "language/stats/mann-whitney.h"
  #include "language/stats/mcnemar.h"
  #include "language/stats/kruskal-wallis.h"
  #include "language/stats/mann-whitney.h"
  #include "language/stats/mcnemar.h"
+#include "language/stats/median.h"
  #include "language/stats/npar-summary.h"
  #include "language/stats/runs.h"
  #include "language/stats/sign.h"
  #include "language/stats/npar-summary.h"
  #include "language/stats/runs.h"
  #include "language/stats/sign.h"
@@ -93,6 +94,7 @@ struct cmd_npar_tests
      int kruskal_wallis;
      int mann_whitney;
      int mcnemar;
      int kruskal_wallis;
      int mann_whitney;
      int mcnemar;
+    int median;
      int missing;
      int method;
      int statistics;
      int missing;
      int method;
      int statistics;
@@ -138,6 +140,8 @@ static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mann_whitney (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mcnemar (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mann_whitney (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mcnemar (struct lexer *, struct dataset *, struct npar_specs *);
+static int npar_median (struct lexer *, struct dataset *, struct npar_specs *);
+
  static int npar_method (struct lexer *, struct npar_specs *);
  
  /* Command parsing functions. */
  static int npar_method (struct lexer *, struct npar_specs *);
  
  /* Command parsing functions. */
@@ -336,6 +340,23 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests
              default:
                NOT_REACHED ();
              }
              default:
                NOT_REACHED ();
              }
+       }
+      else if (lex_match_phrase (lexer, "MEDIAN"))
+        {
+          npt->median++;
+
+          switch (npar_median (lexer, ds, nps))
+            {
+            case 0:
+              goto lossage;
+            case 1:
+              break;
+            case 2:
+              lex_error (lexer, NULL);
+              goto lossage;
+            default:
+              NOT_REACHED ();
+            }
          }
        else if (lex_match_id (lexer, "WILCOXON"))
          {
          }
        else if (lex_match_id (lexer, "WILCOXON"))
          {
@@ -1212,6 +1233,42 @@ npar_mann_whitney (struct lexer *lexer,
  }
  
  
  }
  
  
+static int
+npar_median (struct lexer *lexer,
+            struct dataset *ds,
+            struct npar_specs *specs)
+{
+  struct median_test *mt = pool_alloc (specs->pool, sizeof (*mt));
+  struct n_sample_test *tp = &mt->parent;
+  struct npar_test *nt = &tp->parent;
+
+  mt->median = SYSMIS;
+
+  if ( lex_match (lexer, T_LPAREN))
+    {
+      lex_force_num (lexer);
+      mt->median = lex_number (lexer);
+      lex_get (lexer);
+      lex_force_match (lexer, T_RPAREN);
+    }
+
+  lex_match (lexer, T_EQUALS);
+
+  nt->insert_variables = n_sample_insert_variables;
+  nt->execute = median_execute;
+
+  if (!parse_n_sample_related_test (lexer, dataset_dict (ds),
+                                   tp, specs->pool) )
+    return 0;
+
+  specs->n_tests++;
+  specs->test = pool_realloc (specs->pool,
+                             specs->test,
+                             sizeof (*specs->test) * specs->n_tests);
+  specs->test[specs->n_tests - 1] = nt;
+
+  return 1;
+}
  
  
  static int
  
  
  static int
diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at

index cc1bd167c5cc81f8a09ef318c5d4f7a483ecc763..f4cec2edb9ea2eeed120835c4742e365c23adbb1 100644 (file)
--- a/tests/language/stats/npar.at
+++ b/tests/language/stats/npar.at
@@ -1389,3 +1389,197 @@ Asymp. Sig. (2-tailed),,.569,.552
  
  
  AT_CLEANUP
  
  
  AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (median imputed)])
+
+AT_DATA([median1.sps], [dnl
+set format F12.3.
+data list notable list /ignore * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  6   10  1
+99  0    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median = years by animal (1, 5)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median1.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,,,,
+,,Animal 1,Animal 2,Animal 3,Animal 4,Animal 5
+Years expected,> Median,2,1,2,3,4
+,≤ Median,2,4,3,6,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+Years expected,28,7.000,4.317,4,.365
+])
+
+AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (median given)])
+
+AT_DATA([median2.sps], [dnl
+set format F12.3.
+data list notable list /ignore * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median (7) = years by animal (1, 5)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median2.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,,,,
+,,Animal 1,Animal 2,Animal 3,Animal 4,Animal 5
+Years expected,> Median,2,1,2,3,4
+,≤ Median,2,4,3,6,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+Years expected,28,7.000,4.317,4,.365
+])
+
+AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (two sample)])
+
+AT_DATA([median3.sps], [dnl
+set format F12.3.
+data list notable list /xx * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median (7) = xx years by animal (5, 1)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median3.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,
+,,Animal 1,Animal 5
+xx,> Median,4,5
+,≤ Median,0,0
+Years expected,> Median,2,4
+,≤ Median,2,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+xx,9,7.000,NaN,1,NaN
+Years expected,9,7.000,.900,1,.343
+])
+
+AT_CLEANUP
+\ No newline at end of file
author	John Darrington <john@darrington.wattle.id.au>
	Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
doc/statistics.texi		patch \| blob \| history
src/language/stats/automake.mk		patch \| blob \| history
src/language/stats/median.c	[new file with mode: 0644]	patch \| blob
src/language/stats/median.h	[new file with mode: 0644]	patch \| blob
src/language/stats/npar.c		patch \| blob \| history
tests/language/stats/npar.at		patch \| blob \| history