From ddb6ce30f985ce39be0326193ae0bad452307127 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 2 Aug 2007 03:07:17 +0000 Subject: [PATCH] Clean up handling of median, by treating it almost like any other percentile. Fixes bug #17424. Thanks to John Darrington for review. * frequencies.q (internal_cmd_frequencies): Fix handling of bit masks for `stats' variable. If median is selected, turn it off and add a 50th percentile. (add_percentile): Simplify code a little. (calc_stats): Drop special casing of median. (dump_statistics): Ditto, except that we label the 50th percentile as "50 (Median)" to make it clear that it's also the median. --- src/language/stats/ChangeLog | 13 ++++ src/language/stats/frequencies.q | 60 +++++---------- tests/ChangeLog | 9 +++ tests/command/weight.sh | 36 ++++----- tests/stats/percentiles-compatible.sh | 26 +++---- tests/stats/percentiles-enhanced.sh | 104 +++++++++++++------------- 6 files changed, 123 insertions(+), 125 deletions(-) diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index bf238758..b9c54777 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,16 @@ +2007-08-01 Ben Pfaff + + Clean up handling of median, by treating it almost like any other + percentile. Fixes bug #17424. Thanks to John Darrington for + review. + * frequencies.q (internal_cmd_frequencies): Fix handling of bit + masks for `stats' variable. If median is selected, turn it off + and add a 50th percentile. + (add_percentile): Simplify code a little. + (calc_stats): Drop special casing of median. + (dump_statistics): Ditto, except that we label the 50th percentile + as "50 (Median)" to make it clear that it's also the median. + 2007-07-31 Ben Pfaff Remove integer mode from FREQUENCIES and incidentally fix bug diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 0934fc4a..93e16f7f 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -156,8 +156,6 @@ static void add_percentile (double x) ; static struct percentile *percentiles; static int n_percentiles; -static int implicit_50th ; - /* Groups of statistics. */ #define BI BIT_INDEX #define frq_default \ @@ -323,14 +321,14 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) if (cmd.a_statistics[FRQ_ST_ALL]) stats |= frq_all; if (cmd.sort != FRQ_AVALUE && cmd.sort != FRQ_DVALUE) - stats &= ~frq_median; + stats &= ~BIT_INDEX (frq_median); for (i = 0; i < frq_n_stats; i++) if (cmd.a_statistics[st_name[i].st_indx]) stats |= BIT_INDEX (i); if (stats & frq_kurt) - stats |= frq_sekurt; + stats |= BIT_INDEX (frq_sekurt); if (stats & frq_skew) - stats |= frq_seskew; + stats |= BIT_INDEX (frq_seskew); /* Calculate n_stats. */ n_stats = 0; @@ -363,7 +361,14 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) add_percentile (j / (double) cmd.n_ntiles[i]); } } - + if (stats & BIT_INDEX (frq_median)) + { + /* Treat the median as the 50% percentile. + We output it in the percentiles table as "50 (Median)." */ + add_percentile (0.5); + stats &= ~BIT_INDEX (frq_median); + n_stats--; + } /* Do it! */ input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), @@ -876,11 +881,7 @@ add_percentile (double x) { percentiles = pool_nrealloc (syntax_pool, percentiles, n_percentiles + 1, sizeof *percentiles); - - if (i < n_percentiles) - memmove (&percentiles[i + 1], &percentiles[i], - (n_percentiles - i) * sizeof (struct percentile) ); - + insert_element (percentiles, n_percentiles, sizeof *percentiles, i); percentiles[i].p = x; n_percentiles++; } @@ -1236,28 +1237,9 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) double rank; int i = 0; int idx; - double *median_value; /* Calculate percentiles. */ - /* If the 50th percentile was not explicitly requested then we must - calculate it anyway --- it's the median */ - median_value = 0 ; - for (i = 0; i < n_percentiles; i++) - { - if (percentiles[i].p == 0.5) - { - median_value = &percentiles[i].value; - break; - } - } - - if ( 0 == median_value ) - { - add_percentile (0.5); - implicit_50th = 1; - } - for (i = 0; i < n_percentiles; i++) { percentiles[i].flag = 0; @@ -1337,9 +1319,6 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) percentiles[i].value = percentiles[i].x1 + ( percentiles[i].x2 - percentiles[i].x1) * s ; - - if ( percentiles[i].p == 0.50) - median_value = &percentiles[i].value; } @@ -1376,7 +1355,6 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; - d[frq_median] = *median_value; d[frq_sum] = d[frq_mean] * W; d[frq_stddev] = sqrt (d[frq_variance]); d[frq_semean] = d[frq_stddev] / sqrt (W); @@ -1393,11 +1371,6 @@ dump_statistics (const struct variable *v, int show_varname) struct tab_table *t; int i, r; - int n_explicit_percentiles = n_percentiles; - - if ( implicit_50th && n_percentiles > 0 ) - --n_percentiles; - if (var_is_alpha (v)) return; ft = &get_var_freqs (v)->tab; @@ -1409,7 +1382,7 @@ dump_statistics (const struct variable *v, int show_varname) } calc_stats (v, stat_value); - t = tab_create (3, n_stats + n_explicit_percentiles + 2, 0); + t = tab_create (3, n_stats + n_percentiles + 2, 0); tab_dim (t, tab_natural_dimensions); tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; @@ -1437,14 +1410,17 @@ dump_statistics (const struct variable *v, int show_varname) tab_float(t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, 11, 0); - for (i = 0; i < n_explicit_percentiles; i++, r++) + for (i = 0; i < n_percentiles; i++, r++) { if ( i == 0 ) { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles")); } - tab_float (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0 ); + if (percentiles[i].p == 0.5) + tab_text (t, 1, r, TAB_LEFT, _("50 (Median)")); + else + tab_float (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0); tab_float (t, 2, r, TAB_NONE, percentiles[i].value, 11, 3); } diff --git a/tests/ChangeLog b/tests/ChangeLog index 01e6ff33..48973f36 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,12 @@ +2007-08-01 Ben Pfaff + + * command/weight.sh: Update to match new output format for median + under FREQUENCIES. + + * stats/percentiles-compatible.sh: Ditto. + + * stats/percentiles-enhanced.sh: Ditto. + 2007-07-28 John Darrington * command/t-test-1-indep-val.sh: Changed the order of groups in the diff --git a/tests/command/weight.sh b/tests/command/weight.sh index 3b30d530..82d65e79 100755 --- a/tests/command/weight.sh +++ b/tests/command/weight.sh @@ -146,24 +146,24 @@ diff -b $TEMPDIR/pspp.list - <