From: John Darrington <john@darrington.wattle.id.au>
Date: Sat, 11 Dec 2004 03:45:40 +0000 (+0000)
Subject: Fixed the trimmed mean calculation under various special cases.
X-Git-Tag: v0.4.0~212
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ef05645131b8163a4bb9839e609bb76d553b80e9;p=pspp-builds.git

Fixed the trimmed mean calculation under various special cases.
---

diff --git a/src/ChangeLog b/src/ChangeLog
index 8ccbc7cc..00f49780 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+Sat Dec 11 11:43:45 WST 2004 John Darrington <john@darrington.wattle.id.au>
+
+	* factor_stats.c Fixed calculation of trimmed mean under various
+	special conditions.
+
 Sat Dec  4 17:14:45 WST 2004 John Darrington <john@darrington.wattle.id.au>
 
 	* histogram.c chart.[ch] factor_stats.c frequencies.q
diff --git a/src/factor_stats.c b/src/factor_stats.c
index 2ba785a7..3a88ab44 100644
--- a/src/factor_stats.c
+++ b/src/factor_stats.c
@@ -144,20 +144,26 @@ metrics_postcalc(struct metrics *m)
   m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data);
   m->n_data = hsh_count(m->ordered_data);
 
-  if ( m->n_data == 0 ) 
+  m->histogram = histogram_create(10, m->min, m->max);
+
+  for ( i = 0 ; i < m->n_data ; ++i ) 
     {
-      m->trimmed_mean = m->mean;
-      return;
+      struct weighted_value **wv = (m->wvp) ;
+      gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w);
     }
 
 
   /* Trimmed mean calculation */
+  if ( m->n_data <= 1 ) 
+    {
+      m->trimmed_mean = m->mean;
+      return;
+    }
 
   tc = m->n * 0.05 ;
   k1 = -1;
   k2 = -1;
 
-
   for ( i = 0 ; i < m->n_data ; ++i ) 
     {
       cc += m->wvp[i]->w;
@@ -179,6 +185,12 @@ metrics_postcalc(struct metrics *m)
 	k2 = i;
     }
 
+  /* Special case here */
+  if ( k1 + 1 == k2 ) 
+    {
+      m->trimmed_mean = m->wvp[k2]->v.f;
+      return;
+    }
 
   m->trimmed_mean = 0;
   for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) 
@@ -191,14 +203,6 @@ metrics_postcalc(struct metrics *m)
   m->trimmed_mean += (m->wvp[k1 + 1]->cc - tc) * m->wvp[k1 + 1]->v.f ;
   m->trimmed_mean /= 0.9 * m->n ;
 
-  m->histogram = histogram_create(10, m->min, m->max);
-
-  for ( i = 0 ; i < m->n_data ; ++i ) 
-    {
-      struct weighted_value **wv = (m->wvp) ;
-      gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w);
-    }
-
 }
 
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5ab78fd8..696af98a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -35,6 +35,7 @@ TESTS = \
 	command/t-test-paired-missing-anal.sh \
 	command/t-test-paired-missing-list.sh \
 	command/t-test-pairs.sh \
+	command/trimmed-mean.sh \
 	command/tabs.sh \
 	command/use.sh \
 	command/weight.sh \
@@ -45,6 +46,7 @@ TESTS = \
 	bugs/data-crash.sh \
 	bugs/double-frequency.sh \
 	bugs/get.sh \
+	bugs/examine-1sample.sh \
 	bugs/html-frequency.sh \
 	bugs/multipass.sh \
 	bugs/random.sh \
diff --git a/tests/bugs/examine-1sample.sh b/tests/bugs/examine-1sample.sh
new file mode 100755
index 00000000..497ee892
--- /dev/null
+++ b/tests/bugs/examine-1sample.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# This program tests  for a bug in the EXAMINE command, where it
+# would crash if a data file with only one case was presented
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat > $TEMPDIR/out.stat <<EOF
+DATA LIST LIST /quality * .
+BEGIN DATA
+3  
+END DATA
+
+
+EXAMINE
+	quality 
+	/STATISTICS descriptives 
+	.
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/out.stat
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass
diff --git a/tests/command/trimmed-mean.sh b/tests/command/trimmed-mean.sh
new file mode 100755
index 00000000..0690c0ea
--- /dev/null
+++ b/tests/command/trimmed-mean.sh
@@ -0,0 +1,117 @@
+#!/bin/sh
+
+# This program tests  the Trimmed Mean calculation, in the case
+# where the data is weighted towards the centre
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+
+activity="create program"
+cat > $TEMPDIR/out.stat <<EOF
+DATA LIST LIST /x * c *.
+BEGIN DATA.
+1 1
+2 49
+3 2
+END DATA.
+
+WEIGHT BY c.
+
+EXAMINE
+	x
+	/STATISTICS=DESCRIPTIVES
+	.
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/out.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="compare results"
+diff $TEMPDIR/pspp.list - << EOF
+1.1 DATA LIST.  Reading free-form data from the command file.
++--------+------+
+|Variable|Format|
+#========#======#
+|X       |F8.0  |
+|C       |F8.0  |
++--------+------+
+
+2.1 EXAMINE.  Case Processing Summary
+#=#===============================#
+# #             Cases             #
+# #----------+---------+----------#
+# #   Valid  | Missing |   Total  #
+# #--+-------+-+-------+--+-------#
+# # N|Percent|N|Percent| N|Percent#
+#=#==#=======#=#=======#==#=======#
+#X#52|   100%|0|     0%|52|   100%#
+#=#==#=======#=#=======#==#=======#
+
+2.2 EXAMINE.  Descriptives
+#============================================#=========#==========#
+#                                            #Statistic|Std. Error#
+#============================================#=========#==========#
+#XMean                                       #   2.02  |   .034   #
+# 95% Confidence Interval for MeanLower Bound#  2.021  |          #
+#                                 Upper Bound#  2.017  |          #
+# 5% Trimmed Mean                            #   2.00  |          #
+# Median                                     #         |          #
+# Variance                                   #   .058  |          #
+# Std. Deviation                             #   .242  |          #
+# Minimum                                    #  1.000  |          #
+# Maximum                                    #  3.000  |          #
+# Range                                      #  2.000  |          #
+# Interquartile Range                        #         |          #
+# Skewness                                   #  1.194  |   .330   #
+# Kurtosis                                   #  15.732 |   .650   #
+#============================================#=========#==========#
+
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass