Added an implementation of the median test

author John Darrington <john@darrington.wattle.id.au>

Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
author John Darrington <john@darrington.wattle.id.au>
Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index b1121b498c24af28ddda49d8163d00e45673c355..18cc79e7ace82bdd38e55b4908aaf64a5e8a47b3 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -691,6 +691,7 @@ is used.
  * KRUSKAL-WALLIS::          Kruskal-Wallis Test
  * MANN-WHITNEY::            Mann Whitney U Test
  * MCNEMAR::                 McNemar Test
+* MEDIAN::                  Median Test
  * RUNS::                    Runs Test
  * SIGN::                    The Sign Test
  * WILCOXON::                Wilcoxon Signed Ranks Test
@@ -931,6 +932,31 @@ The data in each variable must be dichotomous.  If there are more
  than two distinct variables an error will occur and the test will
  not be run.
  
+@node MEDIAN
+@subsection Median Test
+@vindex MEDIAN
+@cindex Median test
+
+@display
+     [ /MEDIAN [(value)] = varlist BY variable (value1, value2) ]
+@end display
+
+The median test is used to test whether independent samples come from 
+populations with a common median.
+The median of the populations against which the samples are to be tested
+may be given in parentheses immediately after the 
+/MEDIAN subcommand.  If it is not given, the median will be imputed from the 
+union of all the samples.
+
+The variables of the samples to be tested should immediately follow the @samp{=} sign. The
+keyword @code{BY} must come next, and then the grouping variable.  Two values
+in parentheses should follow.  If the first value is greater than the second,
+then a 2 sample test is performed using these two values to determine the groups.
+If however, the first variable is less than the second, then a @i{k} sample test is
+conducted and the group values used are all values encountered which lie in the
+range [@var{value1},@var{value2}].
+
+
  @node RUNS
  @subsection Runs Test
  @vindex RUNS
diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk

index 4b4510e96e3d58a7b6d66f793265ea23f9b82808..b44ebb2d38eb9dcaa53286a80a8e116413729409 100644 (file)
--- a/src/language/stats/automake.mk
+++ b/src/language/stats/automake.mk
@@ -36,6 +36,8 @@ language_stats_sources = \
         src/language/stats/mann-whitney.h \
         src/language/stats/mcnemar.c \
         src/language/stats/mcnemar.h \
+       src/language/stats/median.c \
+       src/language/stats/median.h \
         src/language/stats/npar.c  \
         src/language/stats/npar.h \
         src/language/stats/npar-summary.c \
diff --git a/src/language/stats/median.c b/src/language/stats/median.c

new file mode 100644 (file)

index 0000000..2db57ab
--- /dev/null
+++ b/src/language/stats/median.c
@@ -0,0 +1,461 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
+*/
+
+#include <config.h>
+#include "median.h"
+
+#include <gsl/gsl_cdf.h>
+
+#include "data/format.h"
+#include "libpspp/cast.h"
+
+#include "data/variable.h"
+#include "data/case.h"
+#include "data/dictionary.h"
+#include "data/dataset.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/subcase.h"
+
+
+#include "data/casereader.h"
+#include "math/percentiles.h"
+
+#include "math/sort.h"
+
+#include "libpspp/hmap.h"
+#include "libpspp/array.h"
+#include "libpspp/str.h"
+#include "data/value.h"
+#include "libpspp/misc.h"
+
+#include "output/tab.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct val_node
+{
+  struct hmap_node node;
+  union value val;
+  casenumber le;
+  casenumber gt;
+};
+
+struct results
+{
+  const struct variable *var;
+  struct val_node **sorted_array;
+  double n;
+  double median;
+  double chisq;  
+};
+
+
+
+static int 
+val_node_cmp_3way (const void *a_, const void *b_, const void *aux)
+{
+  const struct variable *indep_var = aux;
+  const struct val_node *const *a = a_;
+  const struct val_node *const *b = b_;
+
+  return value_compare_3way (&(*a)->val, &(*b)->val, var_get_width (indep_var));
+}
+
+static void 
+show_frequencies (const struct n_sample_test *nst, const struct results *results,  int n_vals, const struct dictionary *);
+
+static void 
+show_test_statistics (const struct n_sample_test *nst, const struct results *results, int, const struct dictionary *);
+
+
+static struct val_node *
+find_value (const struct hmap *map, const union value *val, 
+           const struct variable *var)
+{
+  struct val_node *foo = NULL;
+  size_t hash = value_hash (val, var_get_width (var), 0);
+  HMAP_FOR_EACH_WITH_HASH (foo, struct val_node, node, hash, map)
+    if (value_equal (val, &foo->val, var_get_width (var)))
+      break;
+
+  return foo;
+}
+
+void
+median_execute (const struct dataset *ds,
+               struct casereader *input,
+               enum mv_class exclude,
+               const struct npar_test *test,
+               bool exact UNUSED,
+               double timer UNUSED)
+{
+  const struct dictionary *dict = dataset_dict (ds);
+  const struct variable *wvar = dict_get_weight (dict);
+  bool warn = true;
+  int v;
+  const struct median_test *mt = UP_CAST (test, const struct median_test,
+                                         parent.parent);
+
+  const struct n_sample_test *nst = UP_CAST (test, const struct n_sample_test,
+                                         parent);
+
+  const bool n_sample_test = (value_compare_3way (&nst->val2, &nst->val1,
+                                      var_get_width (nst->indep_var)) > 0);
+
+  struct results *results = xcalloc (nst->n_vars, sizeof (*results));
+  int n_vals = 0;
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      double count = 0;
+      double cc = 0;
+      double median = mt->median;
+      const struct variable *var = nst->vars[v];
+      struct ccase *c;
+      struct hmap map = HMAP_INITIALIZER (map);
+      struct casereader *r = casereader_clone (input);
+
+
+
+      if (n_sample_test == false)
+       {
+         struct val_node *vn = xzalloc (sizeof *vn);
+         value_clone (&vn->val,  &nst->val1, var_get_width (nst->indep_var));
+         hmap_insert (&map, &vn->node, value_hash (&nst->val1,
+                                           var_get_width (nst->indep_var), 0));
+
+         vn = xzalloc (sizeof *vn);
+         value_clone (&vn->val,  &nst->val2, var_get_width (nst->indep_var));
+         hmap_insert (&map, &vn->node, value_hash (&nst->val2,
+                                           var_get_width (nst->indep_var), 0));
+       }
+
+      if ( median == SYSMIS)
+       {
+         struct percentile *ptl;
+         struct order_stats *os;
+
+         struct casereader *rr;
+         struct subcase sc;
+         struct casewriter *writer;
+         subcase_init_var (&sc, var, SC_ASCEND);
+         rr = casereader_clone (r);
+         writer = sort_create_writer (&sc, casereader_get_proto (rr));
+
+         for (; (c = casereader_read (rr)) != NULL; )
+           {
+             if ( var_is_value_missing (var, case_data (c, var), exclude))
+               {
+                 case_unref (c);
+                 continue;
+               }
+
+             cc += dict_get_case_weight (dict, c, &warn);
+             casewriter_write (writer, c);
+           }
+         subcase_destroy (&sc);
+         casereader_destroy (rr);
+
+         rr = casewriter_make_reader (writer);
+
+         ptl = percentile_create (0.5, cc);
+         os = &ptl->parent;
+           
+         order_stats_accumulate (&os, 1,
+                                 rr,
+                                 wvar,
+                                 var,
+                                 exclude);
+
+         median = percentile_calculate (ptl, PC_HAVERAGE);
+         statistic_destroy (&ptl->parent.parent);
+       }
+
+      results[v].median = median;
+      
+
+      for (; (c = casereader_read (r)) != NULL; case_unref (c))
+       {
+         struct val_node *vn ;
+         const double weight = dict_get_case_weight (dict, c, &warn);
+         const union value *val = case_data (c, var);
+         const union value *indep_val = case_data (c, nst->indep_var);
+
+         if ( var_is_value_missing (var, case_data (c, var), exclude))
+           {
+             case_unref (c);
+             continue;
+           }
+
+         if (n_sample_test)
+           {
+             int width = var_get_width (nst->indep_var);
+             /* Ignore out of range values */
+             if (
+                 value_compare_3way (indep_val, &nst->val1, width) < 0
+               ||
+                 value_compare_3way (indep_val, &nst->val2, width) > 0
+                  )
+               {
+                 case_unref (c);
+                 continue;
+               }
+           }
+
+         vn = find_value (&map, indep_val, nst->indep_var);
+         if ( vn == NULL)
+           {
+             if ( n_sample_test == true)
+               {
+                 int width = var_get_width (nst->indep_var);
+                 vn = xzalloc (sizeof *vn);
+                 value_clone (&vn->val,  indep_val, width);
+                 
+                 hmap_insert (&map, &vn->node, value_hash (indep_val, width, 0));
+               }
+             else
+               {
+                 continue;
+               }
+           }
+
+         if (val->f <= median)
+           vn->le += weight;
+         else
+           vn->gt += weight;
+
+         count += weight;
+       }
+      casereader_destroy (r);
+
+      {
+       int x = 0;
+       struct val_node *vn = NULL;
+       double r_0 = 0;
+       double r_1 = 0;
+       HMAP_FOR_EACH (vn, struct val_node, node, &map)
+         {
+           r_0 += vn->le;
+           r_1 += vn->gt;
+         }
+
+       results[v].n = count;
+       results[v].sorted_array = xcalloc (hmap_count (&map), sizeof (void*));
+       results[v].var = var;
+
+       HMAP_FOR_EACH (vn, struct val_node, node, &map)
+         {
+           double e_0j = r_0 * (vn->le + vn->gt) / count;
+           double e_1j = r_1 * (vn->le + vn->gt) / count;
+
+           results[v].chisq += pow2 (vn->le - e_0j) / e_0j;
+           results[v].chisq += pow2 (vn->gt - e_1j) / e_1j;
+
+           results[v].sorted_array[x++] = vn;
+         }
+
+       n_vals = x;
+       hmap_destroy (&map);
+
+       sort (results[v].sorted_array, x, sizeof (*results[v].sorted_array), 
+             val_node_cmp_3way, nst->indep_var);
+
+      }
+    }
+
+  casereader_destroy (input);
+
+  show_frequencies (nst, results,  n_vals, dict);
+  show_test_statistics (nst, results, n_vals, dict);
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      int i;
+      const struct results *rs = results + v;
+
+      for (i = 0; i < n_vals; ++i)
+       {
+         struct val_node *vn = rs->sorted_array[i];
+         value_destroy (&vn->val, var_get_width (nst->indep_var));
+         free (vn);
+       }
+      free (rs->sorted_array);
+    }
+  free (results);
+}
+
+
+
+static void 
+show_frequencies (const struct n_sample_test *nst, const struct results *results,  int n_vals, const struct dictionary *dict)
+{
+  const struct variable *weight = dict_get_weight (dict);
+  const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
+
+  int i;
+  int v;
+
+  const int row_headers = 2;
+  const int column_headers = 2;
+  const int nc = row_headers + n_vals;
+  const int nr = column_headers + nst->n_vars * 2;
+    
+  struct tab_table *table = tab_create (nc, nr);
+
+  tab_headers (table, row_headers, 0, column_headers, 0);
+
+  tab_title (table, _("Frequencies"));
+
+  /* Box around the table and vertical lines inside*/
+  tab_box (table, TAL_2, TAL_2, -1, TAL_1,
+          0,  0, tab_nc (table) - 1, tab_nr (table) - 1 );
+
+  tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
+  tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
+
+  tab_joint_text (table,
+                 row_headers, 0, row_headers + n_vals - 1, 0,
+                 TAT_TITLE | TAB_CENTER, var_to_string (nst->indep_var));
+
+
+  tab_hline (table, TAL_1, row_headers, tab_nc (table) - 1, 1);
+
+
+  for (i = 0; i < n_vals; ++i)
+    {
+      const struct results *rs = results + 0;
+      struct string label;
+      ds_init_empty (&label);
+
+      var_append_value_name (nst->indep_var, &rs->sorted_array[i]->val,
+                           &label);
+
+      tab_text (table, row_headers + i, 1,
+               TAT_TITLE | TAB_LEFT, ds_cstr (&label));
+  
+      ds_destroy (&label);
+    }
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      const struct results *rs = &results[v];
+      tab_text (table,  0, column_headers + v * 2,
+               TAT_TITLE | TAB_LEFT, var_to_string (rs->var) );
+
+      tab_text (table,  1, column_headers + v * 2,
+               TAT_TITLE | TAB_LEFT, _("> Median") );
+
+      tab_text (table,  1, column_headers + v * 2 + 1,
+               TAT_TITLE | TAB_LEFT, _("≤ Median") );
+
+      if ( v > 0)
+       tab_hline (table, TAL_1, 0, tab_nc (table) - 1, column_headers + v * 2);
+    }
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      int i;
+      const struct results *rs = &results[v];
+
+      for (i = 0; i < n_vals; ++i)
+       {
+         const struct val_node *vn = rs->sorted_array[i];
+         tab_double (table, row_headers + i, column_headers + v * 2,
+                     0, vn->gt, wfmt);
+
+         tab_double (table, row_headers + i, column_headers + v * 2 + 1,
+                   0, vn->le, wfmt);
+       }
+    }
+
+  tab_submit (table);
+}
+
+
+static void 
+show_test_statistics (const struct n_sample_test *nst,
+                     const struct results *results,
+                     int n_vals,
+                     const struct dictionary *dict)
+{
+  const struct variable *weight = dict_get_weight (dict);
+  const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
+
+  int v;
+
+  const int row_headers = 1;
+  const int column_headers = 1;
+  const int nc = row_headers + 5;
+  const int nr = column_headers + nst->n_vars;
+    
+  struct tab_table *table = tab_create (nc, nr);
+
+  tab_headers (table, row_headers, 0, column_headers, 0);
+
+  tab_title (table, _("Test Statistics"));
+
+
+  tab_box (table, TAL_2, TAL_2, -1, TAL_1,
+          0,  0, tab_nc (table) - 1, tab_nr (table) - 1 );
+
+  tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
+  tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
+
+  tab_text (table, row_headers + 0, 0,
+           TAT_TITLE | TAB_CENTER, _("N"));
+
+  tab_text (table, row_headers + 1, 0,
+           TAT_TITLE | TAB_CENTER, _("Median"));
+
+  tab_text (table, row_headers + 2, 0,
+           TAT_TITLE | TAB_CENTER, _("Chi-Square"));
+
+  tab_text (table, row_headers + 3, 0,
+           TAT_TITLE | TAB_CENTER, _("df"));
+
+  tab_text (table, row_headers + 4, 0,
+           TAT_TITLE | TAB_CENTER, _("Asymp. Sig."));
+
+
+  for (v = 0; v < nst->n_vars; ++v)
+    {
+      double df = n_vals - 1;
+      const struct results *rs = &results[v];
+      tab_text (table,  0, column_headers + v,
+               TAT_TITLE | TAB_LEFT, var_to_string (rs->var));
+
+
+      tab_double (table, row_headers + 0, column_headers + v,
+                 0, rs->n, wfmt);
+
+      tab_double (table, row_headers + 1, column_headers + v,
+                 0, rs->median, NULL);
+
+      tab_double (table, row_headers + 2, column_headers + v,
+                 0, rs->chisq, NULL);
+
+      tab_double (table, row_headers + 3, column_headers + v,
+                 0, df, wfmt);
+
+      tab_double (table, row_headers + 4, column_headers + v,
+                 0, gsl_cdf_chisq_Q (rs->chisq, df), NULL);
+    }
+  
+  tab_submit (table);
+}
diff --git a/src/language/stats/median.h b/src/language/stats/median.h

new file mode 100644 (file)

index 0000000..236984c
--- /dev/null
+++ b/src/language/stats/median.h
@@ -0,0 +1,41 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#if !median_h
+#define median_h 1
+
+#include <stddef.h>
+#include <stdbool.h>
+#include "language/stats/npar.h"
+
+struct median_test
+{
+  struct n_sample_test parent;
+  double median;
+};
+
+struct casereader;
+struct dataset;
+
+void median_execute (const struct dataset *ds,
+                      struct casereader *input,
+                      enum mv_class exclude,
+                      const struct npar_test *test,
+                      bool exact,
+                      double timer
+                      );
+
+#endif
diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c

index fda7b0c6b27c79453435e463abe72fda6efbc185..e0d50d4adecd9c33120400b7c5eb5377bccedbc3 100644 (file)
--- a/src/language/stats/npar.c
+++ b/src/language/stats/npar.c
@@ -40,6 +40,7 @@
  #include "language/stats/kruskal-wallis.h"
  #include "language/stats/mann-whitney.h"
  #include "language/stats/mcnemar.h"
+#include "language/stats/median.h"
  #include "language/stats/npar-summary.h"
  #include "language/stats/runs.h"
  #include "language/stats/sign.h"
@@ -93,6 +94,7 @@ struct cmd_npar_tests
      int kruskal_wallis;
      int mann_whitney;
      int mcnemar;
+    int median;
      int missing;
      int method;
      int statistics;
@@ -138,6 +140,8 @@ static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mann_whitney (struct lexer *, struct dataset *, struct npar_specs *);
  static int npar_mcnemar (struct lexer *, struct dataset *, struct npar_specs *);
+static int npar_median (struct lexer *, struct dataset *, struct npar_specs *);
+
  static int npar_method (struct lexer *, struct npar_specs *);
  
  /* Command parsing functions. */
@@ -336,6 +340,23 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests
              default:
                NOT_REACHED ();
              }
+       }
+      else if (lex_match_phrase (lexer, "MEDIAN"))
+        {
+          npt->median++;
+
+          switch (npar_median (lexer, ds, nps))
+            {
+            case 0:
+              goto lossage;
+            case 1:
+              break;
+            case 2:
+              lex_error (lexer, NULL);
+              goto lossage;
+            default:
+              NOT_REACHED ();
+            }
          }
        else if (lex_match_id (lexer, "WILCOXON"))
          {
@@ -1212,6 +1233,42 @@ npar_mann_whitney (struct lexer *lexer,
  }
  
  
+static int
+npar_median (struct lexer *lexer,
+            struct dataset *ds,
+            struct npar_specs *specs)
+{
+  struct median_test *mt = pool_alloc (specs->pool, sizeof (*mt));
+  struct n_sample_test *tp = &mt->parent;
+  struct npar_test *nt = &tp->parent;
+
+  mt->median = SYSMIS;
+
+  if ( lex_match (lexer, T_LPAREN))
+    {
+      lex_force_num (lexer);
+      mt->median = lex_number (lexer);
+      lex_get (lexer);
+      lex_force_match (lexer, T_RPAREN);
+    }
+
+  lex_match (lexer, T_EQUALS);
+
+  nt->insert_variables = n_sample_insert_variables;
+  nt->execute = median_execute;
+
+  if (!parse_n_sample_related_test (lexer, dataset_dict (ds),
+                                   tp, specs->pool) )
+    return 0;
+
+  specs->n_tests++;
+  specs->test = pool_realloc (specs->pool,
+                             specs->test,
+                             sizeof (*specs->test) * specs->n_tests);
+  specs->test[specs->n_tests - 1] = nt;
+
+  return 1;
+}
  
  
  static int
diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at

index cc1bd167c5cc81f8a09ef318c5d4f7a483ecc763..f4cec2edb9ea2eeed120835c4742e365c23adbb1 100644 (file)
--- a/tests/language/stats/npar.at
+++ b/tests/language/stats/npar.at
@@ -1389,3 +1389,197 @@ Asymp. Sig. (2-tailed),,.569,.552
  
  
  AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (median imputed)])
+
+AT_DATA([median1.sps], [dnl
+set format F12.3.
+data list notable list /ignore * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  6   10  1
+99  0    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median = years by animal (1, 5)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median1.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,,,,
+,,Animal 1,Animal 2,Animal 3,Animal 4,Animal 5
+Years expected,> Median,2,1,2,3,4
+,≤ Median,2,4,3,6,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+Years expected,28,7.000,4.317,4,.365
+])
+
+AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (median given)])
+
+AT_DATA([median2.sps], [dnl
+set format F12.3.
+data list notable list /ignore * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median (7) = years by animal (1, 5)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median2.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,,,,
+,,Animal 1,Animal 2,Animal 3,Animal 4,Animal 5
+Years expected,> Median,2,1,2,3,4
+,≤ Median,2,4,3,6,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+Years expected,28,7.000,4.317,4,.365
+])
+
+AT_CLEANUP
+
+
+AT_SETUP([NPAR TESTS Median Test (two sample)])
+
+AT_DATA([median3.sps], [dnl
+set format F12.3.
+data list notable list /xx * animal * years * w *.
+begin data
+99  1   10  1
+99  4    1  1
+99  5   11  1
+99  5   10  1
+99  3    7  1
+99  3   14  1
+99  2    3  1
+99  1    1  1
+99  4    7  1
+99  5   12  1
+99  3    6  1
+99  4    1  1
+99  3    5  1
+99  5    7  1
+99  4    6  1
+99  3   14  1
+99  4    8  1
+99  5   13  1
+99  2    0  1
+99  4    7  1
+99  4    7  1
+99  1    0  1
+99  2    8  1
+99  4   10  1
+99  2    3  1
+99  2    0  1
+99  4    8  1
+99  1    8  1
+end data.
+
+
+variable label years 'Years expected'.
+variable label animal 'Animal Genus'.
+
+add value labels animal 1 'Animal 1' 2 'Animal 2' 3 'Animal 3' 4 'Animal 4' 5 'Animal 5'.
+
+npar tests
+     /median (7) = xx years by animal (5, 1)
+     .
+])
+
+
+AT_CHECK([pspp -O format=csv median3.sps], [0], [dnl
+Table: Frequencies
+,,Animal Genus,
+,,Animal 1,Animal 5
+xx,> Median,4,5
+,≤ Median,0,0
+Years expected,> Median,2,4
+,≤ Median,2,1
+
+Table: Test Statistics
+,N,Median,Chi-Square,df,Asymp. Sig.
+xx,9,7.000,NaN,1,NaN
+Years expected,9,7.000,.900,1,.343
+])
+
+AT_CLEANUP
+\ No newline at end of file
author	John Darrington <john@darrington.wattle.id.au>
	Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Sun, 28 Aug 2011 10:39:51 +0000 (12:39 +0200)
doc/statistics.texi		patch \| blob \| history
src/language/stats/automake.mk		patch \| blob \| history
src/language/stats/median.c	[new file with mode: 0644]	patch \| blob
src/language/stats/median.h	[new file with mode: 0644]	patch \| blob
src/language/stats/npar.c		patch \| blob \| history
tests/language/stats/npar.at		patch \| blob \| history