From: Ben Pfaff Date: Sat, 29 Jan 2022 22:44:14 +0000 (-0800) Subject: implement MEDIAN (untested) X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=1bb28b0cb5e25b5e3a5cf50ae8864f886fa1d221 implement MEDIAN (untested) --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 652534ec77..5f9d2584c8 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -19,9 +19,11 @@ #include #include "data/casereader.h" +#include "data/casewriter.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/mrset.h" +#include "data/subcase.h" #include "data/value-labels.h" #include "language/command.h" #include "language/lexer/format-parser.h" @@ -34,6 +36,8 @@ #include "libpspp/message.h" #include "libpspp/string-array.h" #include "math/moments.h" +#include "math/percentiles.h" +#include "math/sort.h" #include "output/pivot-table.h" #include "gl/minmax.h" @@ -1681,7 +1685,14 @@ union ctables_summary /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ struct moments1 *moments; - /* XXX percentiles, median, mode, multiple response */ + struct + { + struct casewriter *writer; + double mvalid; + double median; + }; + + /* XXX percentiles, mode, multiple response */ }; static void @@ -1713,6 +1724,7 @@ ctables_summary_init (union ctables_summary *s, case CTSF_LAYERPCT_TOTALN: case CTSF_LAYERROWPCT_TOTALN: case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_MISSING: case CSTF_TOTALN: case CTSF_ETOTALN: case CTSF_VALIDN: @@ -1742,7 +1754,22 @@ ctables_summary_init (union ctables_summary *s, break; case CTSF_MEDIAN: - case CTSF_MISSING: + { + struct caseproto *proto = caseproto_create (); + proto = caseproto_add_width (proto, 0); + proto = caseproto_add_width (proto, 0); + + struct subcase ordering; + subcase_init (&ordering, 0, 0, SC_ASCEND); + s->writer = sort_create_writer (&ordering, proto); + subcase_uninit (&ordering); + caseproto_unref (proto); + + s->mvalid = 0; + s->median = SYSMIS; + } + break; + case CTSF_MODE: case CTSF_PTILE: NOT_REACHED (); @@ -1802,6 +1829,7 @@ ctables_summary_uninit (union ctables_summary *s, case CTSF_LAYERPCT_TOTALN: case CTSF_LAYERROWPCT_TOTALN: case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_MISSING: case CSTF_TOTALN: case CTSF_ETOTALN: case CTSF_VALIDN: @@ -1829,7 +1857,9 @@ ctables_summary_uninit (union ctables_summary *s, break; case CTSF_MEDIAN: - case CTSF_MISSING: + casewriter_destroy (s->writer); + break; + case CTSF_MODE: case CTSF_PTILE: NOT_REACHED (); @@ -1891,6 +1921,7 @@ ctables_summary_add (union ctables_summary *s, case CTSF_LAYERPCT_TOTALN: case CTSF_LAYERROWPCT_TOTALN: case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_MISSING: case CSTF_TOTALN: case CTSF_ETOTALN: case CTSF_VALIDN: @@ -1931,7 +1962,17 @@ ctables_summary_add (union ctables_summary *s, break; case CTSF_MEDIAN: - case CTSF_MISSING: + if (var_is_value_missing (var, value)) + { + s->mvalid += weight; + + struct ccase *c = case_create (casewriter_get_proto (s->writer)); + *case_num_rw_idx (c, 0) = value->f; + *case_num_rw_idx (c, 1) = weight; + casewriter_write (s->writer, c); + } + break; + case CTSF_MODE: case CTSF_PTILE: NOT_REACHED (); @@ -2010,6 +2051,9 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_LAYERCOLPCT_TOTALN: NOT_REACHED (); + case CTSF_MISSING: + return s->missing; + case CSTF_TOTALN: case CTSF_ETOTALN: return s->valid + s->missing; @@ -2072,7 +2116,19 @@ ctables_summary_value (const struct ctables_cell *cell, NOT_REACHED (); case CTSF_MEDIAN: - case CTSF_MISSING: + if (s->writer) + { + struct casereader *reader = casewriter_make_reader (s->writer); + s->writer = NULL; + + struct percentile *median = percentile_create (0.5, s->mvalid); + struct order_stats *os = &median->parent; + order_stats_accumulate_idx (&os, 1, reader, 1, 0); + s->median = percentile_calculate (median, PC_HAVERAGE); + statistic_destroy (&median->parent.parent); + } + return s->median; + case CTSF_MODE: case CTSF_PTILE: NOT_REACHED ();