From: John Darrington Date: Sun, 24 Aug 2008 05:22:02 +0000 (+0800) Subject: Fixed some issues calculating percentiles when missing values are involved. X-Git-Tag: v0.7.1~50^2~70 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d7c0f3aae79a66ccba9d0f23f357a2249a0e9b0a;p=pspp-builds.git Fixed some issues calculating percentiles when missing values are involved. Added an extra argument to order_stats_accumulate, to indicate which classes of missing values should be excluded. Added an internal cumulative weight counter, to ensure that the declared total weight agrees with that which is encountered. --- diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index a871b67e..7f197ec3 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1020,7 +1020,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, order_stats_accumulate (os, n_os, casereader_clone (metric->up_reader), - wv, dependent_vars[v]); + wv, dependent_vars[v], MV_ANY); free (os); } } @@ -1091,7 +1091,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, order_stats_accumulate ((struct order_stats **) &metric->box_whisker, 1, casereader_clone (metric->up_reader), - wv, dependent_vars[v]); + wv, dependent_vars[v], MV_ANY); } } diff --git a/src/math/order-stats.c b/src/math/order-stats.c index f5b6851a..ca4160f4 100644 --- a/src/math/order-stats.c +++ b/src/math/order-stats.c @@ -23,6 +23,36 @@ #include #include +#if 0 + +#include + +static void +order_stats_dump_k1 (const struct order_stats *os) +{ + struct k *k = &os->k[0]; + printf ("K1: tc %g; c %g cc %g ccp %g\n", + k->tc, k->c, k->cc, k->cc_p1); + +} + +static void +order_stats_dump_k2 (const struct order_stats *os) +{ + struct k *k = &os->k[1]; + printf ("K2: tc %g; c %g cc %g ccp %g\n", + k->tc, k->c, k->cc, k->cc_p1); +} + + +void +order_stats_dump (const struct order_stats *os) +{ + order_stats_dump_k1 (os); + order_stats_dump_k2 (os); +} + +#endif static void update_k_lower (struct k *kk, @@ -55,6 +85,7 @@ update_k_values (const struct ccase *cx, double y_i, double c_i, double cc_i, struct order_stats **os, size_t n_os) { int j; + for (j = 0 ; j < n_os ; ++j) { int k; @@ -69,15 +100,18 @@ update_k_values (const struct ccase *cx, double y_i, double c_i, double cc_i, if ( stat->accumulate ) stat->accumulate (stat, cx, c_i, cc_i, y_i); + + tos->cc = cc_i; } } void order_stats_accumulate (struct order_stats **os, size_t nos, - struct casereader *reader, - const struct variable *wv, - const struct variable *var) + struct casereader *reader, + const struct variable *wv, + const struct variable *var, + enum mv_class exclude) { struct ccase cx; struct ccase prev_cx; @@ -96,6 +130,9 @@ order_stats_accumulate (struct order_stats **os, size_t nos, /* The casereader MUST be sorted */ assert (this_value >= prev_value); + if ( var_is_value_missing (var, case_data (&cx, var), exclude)) + continue; + case_destroy (&prev_cx); if ( prev_value == -DBL_MAX || prev_value == this_value) @@ -117,3 +154,6 @@ order_stats_accumulate (struct order_stats **os, size_t nos, casereader_destroy (reader); } + + + diff --git a/src/math/order-stats.h b/src/math/order-stats.h index fc588950..cea50ed8 100644 --- a/src/math/order-stats.h +++ b/src/math/order-stats.h @@ -29,17 +29,11 @@ struct variable; struct k { double tc; - double cc; - double cc_p1; - double c; - double c_p1; - double y; - double y_p1; }; @@ -49,18 +43,18 @@ struct order_stats struct statistic parent; int n_k; struct k *k; -}; - - -void dump_ptile_k1 (const struct order_stats *ptl); + double cc; +}; -void dump_ptile_k2 (const struct order_stats *ptl); +enum mv_class; +void order_stats_dump (const struct order_stats *os); void order_stats_accumulate (struct order_stats **ptl, size_t nos, - struct casereader *reader, - const struct variable *wv, - const struct variable *var); + struct casereader *reader, + const struct variable *wv, + const struct variable *var, + enum mv_class exclude); #endif diff --git a/src/math/percentiles.c b/src/math/percentiles.c index 53d704d6..bf99de16 100644 --- a/src/math/percentiles.c +++ b/src/math/percentiles.c @@ -47,6 +47,8 @@ percentile_calculate (const struct percentile *ptl, enum pc_alg alg) struct percentile *mutable = (struct percentile *) ptl; const struct order_stats *os = &ptl->parent; + assert (os->cc == ptl->w); + if ( ptl->g1 == SYSMIS) mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1; @@ -169,6 +171,7 @@ percentile_create (double p, double W) assert (p <= 1.0); ptl->ptile = p; + ptl->w = W; os->n_k = 2; os->k = xcalloc (sizeof (*os->k), 2); @@ -186,15 +189,3 @@ percentile_create (double p, double W) return os; } -#if 0 -void -percentile_dump (const struct percentile *ptl) -{ - printf ("Percentile %g:\n\tk1: ", ptl->ptile); - - dump_os_k1 ((const struct os *)ptl); - printf ("\tk2: "); - dump_os_k2 ((const struct os *)ptl); - printf ("\n"); -} -#endif diff --git a/src/math/percentiles.h b/src/math/percentiles.h index 93558463..0dd09820 100644 --- a/src/math/percentiles.h +++ b/src/math/percentiles.h @@ -39,6 +39,7 @@ struct percentile struct order_stats parent; double ptile; + double w; /* Mutable */ double g1; diff --git a/src/math/trimmed-mean.c b/src/math/trimmed-mean.c index fa205017..da3d4240 100644 --- a/src/math/trimmed-mean.c +++ b/src/math/trimmed-mean.c @@ -77,6 +77,8 @@ trimmed_mean_calculate (const struct trimmed_mean *tm) { const struct order_stats *os = (const struct order_stats *) tm; + assert (os->cc == tm->w); + return ( (os->k[0].cc_p1 - os->k[0].tc) * os->k[0].y_p1