projects
/
pspp
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
28d3dbe
)
implement MEDIAN (untested)
author
Ben Pfaff
<blp@cs.stanford.edu>
Sat, 29 Jan 2022 22:44:14 +0000
(14:44 -0800)
committer
Ben Pfaff
<blp@cs.stanford.edu>
Sat, 2 Apr 2022 01:48:55 +0000
(18:48 -0700)
src/language/stats/ctables.c
patch
|
blob
|
history
diff --git
a/src/language/stats/ctables.c
b/src/language/stats/ctables.c
index 652534ec774e74f23d58b4901ae1b41500886c69..5f9d2584c809649d527cf0bd59a0f879cd38eefb 100644
(file)
--- a/
src/language/stats/ctables.c
+++ b/
src/language/stats/ctables.c
@@
-19,9
+19,11
@@
#include <math.h>
#include "data/casereader.h"
#include <math.h>
#include "data/casereader.h"
+#include "data/casewriter.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/mrset.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/mrset.h"
+#include "data/subcase.h"
#include "data/value-labels.h"
#include "language/command.h"
#include "language/lexer/format-parser.h"
#include "data/value-labels.h"
#include "language/command.h"
#include "language/lexer/format-parser.h"
@@
-34,6
+36,8
@@
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "math/moments.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "math/moments.h"
+#include "math/percentiles.h"
+#include "math/sort.h"
#include "output/pivot-table.h"
#include "gl/minmax.h"
#include "output/pivot-table.h"
#include "gl/minmax.h"
@@
-1681,7
+1685,14
@@
union ctables_summary
/* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
struct moments1 *moments;
/* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
struct moments1 *moments;
- /* XXX percentiles, median, mode, multiple response */
+ struct
+ {
+ struct casewriter *writer;
+ double mvalid;
+ double median;
+ };
+
+ /* XXX percentiles, mode, multiple response */
};
static void
};
static void
@@
-1713,6
+1724,7
@@
ctables_summary_init (union ctables_summary *s,
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
+ case CTSF_MISSING:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
@@
-1742,7
+1754,22
@@
ctables_summary_init (union ctables_summary *s,
break;
case CTSF_MEDIAN:
break;
case CTSF_MEDIAN:
- case CTSF_MISSING:
+ {
+ struct caseproto *proto = caseproto_create ();
+ proto = caseproto_add_width (proto, 0);
+ proto = caseproto_add_width (proto, 0);
+
+ struct subcase ordering;
+ subcase_init (&ordering, 0, 0, SC_ASCEND);
+ s->writer = sort_create_writer (&ordering, proto);
+ subcase_uninit (&ordering);
+ caseproto_unref (proto);
+
+ s->mvalid = 0;
+ s->median = SYSMIS;
+ }
+ break;
+
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
@@
-1802,6
+1829,7
@@
ctables_summary_uninit (union ctables_summary *s,
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
+ case CTSF_MISSING:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
@@
-1829,7
+1857,9
@@
ctables_summary_uninit (union ctables_summary *s,
break;
case CTSF_MEDIAN:
break;
case CTSF_MEDIAN:
- case CTSF_MISSING:
+ casewriter_destroy (s->writer);
+ break;
+
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
@@
-1891,6
+1921,7
@@
ctables_summary_add (union ctables_summary *s,
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
+ case CTSF_MISSING:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
case CSTF_TOTALN:
case CTSF_ETOTALN:
case CTSF_VALIDN:
@@
-1931,7
+1962,17
@@
ctables_summary_add (union ctables_summary *s,
break;
case CTSF_MEDIAN:
break;
case CTSF_MEDIAN:
- case CTSF_MISSING:
+ if (var_is_value_missing (var, value))
+ {
+ s->mvalid += weight;
+
+ struct ccase *c = case_create (casewriter_get_proto (s->writer));
+ *case_num_rw_idx (c, 0) = value->f;
+ *case_num_rw_idx (c, 1) = weight;
+ casewriter_write (s->writer, c);
+ }
+ break;
+
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
@@
-2010,6
+2051,9
@@
ctables_summary_value (const struct ctables_cell *cell,
case CTSF_LAYERCOLPCT_TOTALN:
NOT_REACHED ();
case CTSF_LAYERCOLPCT_TOTALN:
NOT_REACHED ();
+ case CTSF_MISSING:
+ return s->missing;
+
case CSTF_TOTALN:
case CTSF_ETOTALN:
return s->valid + s->missing;
case CSTF_TOTALN:
case CTSF_ETOTALN:
return s->valid + s->missing;
@@
-2072,7
+2116,19
@@
ctables_summary_value (const struct ctables_cell *cell,
NOT_REACHED ();
case CTSF_MEDIAN:
NOT_REACHED ();
case CTSF_MEDIAN:
- case CTSF_MISSING:
+ if (s->writer)
+ {
+ struct casereader *reader = casewriter_make_reader (s->writer);
+ s->writer = NULL;
+
+ struct percentile *median = percentile_create (0.5, s->mvalid);
+ struct order_stats *os = &median->parent;
+ order_stats_accumulate_idx (&os, 1, reader, 1, 0);
+ s->median = percentile_calculate (median, PC_HAVERAGE);
+ statistic_destroy (&median->parent.parent);
+ }
+ return s->median;
+
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();
case CTSF_MODE:
case CTSF_PTILE:
NOT_REACHED ();