implement MEDIAN (untested)
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 29 Jan 2022 22:44:14 +0000 (14:44 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sun, 13 Mar 2022 23:56:02 +0000 (16:56 -0700)
src/language/stats/ctables.c

index 652534ec774e74f23d58b4901ae1b41500886c69..5f9d2584c809649d527cf0bd59a0f879cd38eefb 100644 (file)
 #include <math.h>
 
 #include "data/casereader.h"
+#include "data/casewriter.h"
 #include "data/dataset.h"
 #include "data/dictionary.h"
 #include "data/mrset.h"
+#include "data/subcase.h"
 #include "data/value-labels.h"
 #include "language/command.h"
 #include "language/lexer/format-parser.h"
@@ -34,6 +36,8 @@
 #include "libpspp/message.h"
 #include "libpspp/string-array.h"
 #include "math/moments.h"
+#include "math/percentiles.h"
+#include "math/sort.h"
 #include "output/pivot-table.h"
 
 #include "gl/minmax.h"
@@ -1681,7 +1685,14 @@ union ctables_summary
     /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
     struct moments1 *moments;
 
-    /* XXX percentiles, median, mode, multiple response */
+    struct
+      {
+        struct casewriter *writer;
+        double mvalid;
+        double median;
+      };
+
+    /* XXX percentiles, mode, multiple response */
   };
 
 static void
@@ -1713,6 +1724,7 @@ ctables_summary_init (union ctables_summary *s,
     case CTSF_LAYERPCT_TOTALN:
     case CTSF_LAYERROWPCT_TOTALN:
     case CTSF_LAYERCOLPCT_TOTALN:
+    case CTSF_MISSING:
     case CSTF_TOTALN:
     case CTSF_ETOTALN:
     case CTSF_VALIDN:
@@ -1742,7 +1754,22 @@ ctables_summary_init (union ctables_summary *s,
       break;
 
     case CTSF_MEDIAN:
-    case CTSF_MISSING:
+      {
+        struct caseproto *proto = caseproto_create ();
+        proto = caseproto_add_width (proto, 0);
+        proto = caseproto_add_width (proto, 0);
+
+        struct subcase ordering;
+        subcase_init (&ordering, 0, 0, SC_ASCEND);
+        s->writer = sort_create_writer (&ordering, proto);
+        subcase_uninit (&ordering);
+        caseproto_unref (proto);
+
+        s->mvalid = 0;
+        s->median = SYSMIS;
+      }
+      break;
+
     case CTSF_MODE:
     case CTSF_PTILE:
       NOT_REACHED ();
@@ -1802,6 +1829,7 @@ ctables_summary_uninit (union ctables_summary *s,
     case CTSF_LAYERPCT_TOTALN:
     case CTSF_LAYERROWPCT_TOTALN:
     case CTSF_LAYERCOLPCT_TOTALN:
+    case CTSF_MISSING:
     case CSTF_TOTALN:
     case CTSF_ETOTALN:
     case CTSF_VALIDN:
@@ -1829,7 +1857,9 @@ ctables_summary_uninit (union ctables_summary *s,
       break;
 
     case CTSF_MEDIAN:
-    case CTSF_MISSING:
+      casewriter_destroy (s->writer);
+      break;
+
     case CTSF_MODE:
     case CTSF_PTILE:
       NOT_REACHED ();
@@ -1891,6 +1921,7 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_LAYERPCT_TOTALN:
     case CTSF_LAYERROWPCT_TOTALN:
     case CTSF_LAYERCOLPCT_TOTALN:
+    case CTSF_MISSING:
     case CSTF_TOTALN:
     case CTSF_ETOTALN:
     case CTSF_VALIDN:
@@ -1931,7 +1962,17 @@ ctables_summary_add (union ctables_summary *s,
       break;
 
     case CTSF_MEDIAN:
-    case CTSF_MISSING:
+      if (var_is_value_missing (var, value))
+        {
+          s->mvalid += weight;
+
+          struct ccase *c = case_create (casewriter_get_proto (s->writer));
+          *case_num_rw_idx (c, 0) = value->f;
+          *case_num_rw_idx (c, 1) = weight;
+          casewriter_write (s->writer, c);
+        }
+      break;
+
     case CTSF_MODE:
     case CTSF_PTILE:
       NOT_REACHED ();
@@ -2010,6 +2051,9 @@ ctables_summary_value (const struct ctables_cell *cell,
     case CTSF_LAYERCOLPCT_TOTALN:
       NOT_REACHED ();
 
+    case CTSF_MISSING:
+      return s->missing;
+
     case CSTF_TOTALN:
     case CTSF_ETOTALN:
       return s->valid + s->missing;
@@ -2072,7 +2116,19 @@ ctables_summary_value (const struct ctables_cell *cell,
       NOT_REACHED ();
 
     case CTSF_MEDIAN:
-    case CTSF_MISSING:
+      if (s->writer)
+        {
+          struct casereader *reader = casewriter_make_reader (s->writer);
+          s->writer = NULL;
+
+          struct percentile *median = percentile_create (0.5, s->mvalid);
+          struct order_stats *os = &median->parent;
+          order_stats_accumulate_idx (&os, 1, reader, 1, 0);
+          s->median = percentile_calculate (median, PC_HAVERAGE);
+          statistic_destroy (&median->parent.parent);
+        }
+      return s->median;
+
     case CTSF_MODE:
     case CTSF_PTILE:
       NOT_REACHED ();