From ef05645131b8163a4bb9839e609bb76d553b80e9 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 11 Dec 2004 03:45:40 +0000 Subject: [PATCH] Fixed the trimmed mean calculation under various special cases. --- src/ChangeLog | 5 ++ src/factor_stats.c | 28 ++++---- tests/Makefile.am | 2 + tests/bugs/examine-1sample.sh | 68 ++++++++++++++++++++ tests/command/trimmed-mean.sh | 117 ++++++++++++++++++++++++++++++++++ 5 files changed, 208 insertions(+), 12 deletions(-) create mode 100755 tests/bugs/examine-1sample.sh create mode 100755 tests/command/trimmed-mean.sh diff --git a/src/ChangeLog b/src/ChangeLog index 8ccbc7cc..00f49780 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +Sat Dec 11 11:43:45 WST 2004 John Darrington + + * factor_stats.c Fixed calculation of trimmed mean under various + special conditions. + Sat Dec 4 17:14:45 WST 2004 John Darrington * histogram.c chart.[ch] factor_stats.c frequencies.q diff --git a/src/factor_stats.c b/src/factor_stats.c index 2ba785a7..3a88ab44 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -144,20 +144,26 @@ metrics_postcalc(struct metrics *m) m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data); m->n_data = hsh_count(m->ordered_data); - if ( m->n_data == 0 ) + m->histogram = histogram_create(10, m->min, m->max); + + for ( i = 0 ; i < m->n_data ; ++i ) { - m->trimmed_mean = m->mean; - return; + struct weighted_value **wv = (m->wvp) ; + gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w); } /* Trimmed mean calculation */ + if ( m->n_data <= 1 ) + { + m->trimmed_mean = m->mean; + return; + } tc = m->n * 0.05 ; k1 = -1; k2 = -1; - for ( i = 0 ; i < m->n_data ; ++i ) { cc += m->wvp[i]->w; @@ -179,6 +185,12 @@ metrics_postcalc(struct metrics *m) k2 = i; } + /* Special case here */ + if ( k1 + 1 == k2 ) + { + m->trimmed_mean = m->wvp[k2]->v.f; + return; + } m->trimmed_mean = 0; for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) @@ -191,14 +203,6 @@ metrics_postcalc(struct metrics *m) m->trimmed_mean += (m->wvp[k1 + 1]->cc - tc) * m->wvp[k1 + 1]->v.f ; m->trimmed_mean /= 0.9 * m->n ; - m->histogram = histogram_create(10, m->min, m->max); - - for ( i = 0 ; i < m->n_data ; ++i ) - { - struct weighted_value **wv = (m->wvp) ; - gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w); - } - } diff --git a/tests/Makefile.am b/tests/Makefile.am index 5ab78fd8..696af98a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -35,6 +35,7 @@ TESTS = \ command/t-test-paired-missing-anal.sh \ command/t-test-paired-missing-list.sh \ command/t-test-pairs.sh \ + command/trimmed-mean.sh \ command/tabs.sh \ command/use.sh \ command/weight.sh \ @@ -45,6 +46,7 @@ TESTS = \ bugs/data-crash.sh \ bugs/double-frequency.sh \ bugs/get.sh \ + bugs/examine-1sample.sh \ bugs/html-frequency.sh \ bugs/multipass.sh \ bugs/random.sh \ diff --git a/tests/bugs/examine-1sample.sh b/tests/bugs/examine-1sample.sh new file mode 100755 index 00000000..497ee892 --- /dev/null +++ b/tests/bugs/examine-1sample.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +# This program tests for a bug in the EXAMINE command, where it +# would crash if a data file with only one case was presented + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program" +cat > $TEMPDIR/out.stat < $TEMPDIR/out.stat <