Fixed the trimmed mean calculation under various special cases.
authorJohn Darrington <john@darrington.wattle.id.au>
Sat, 11 Dec 2004 03:45:40 +0000 (03:45 +0000)
committerJohn Darrington <john@darrington.wattle.id.au>
Sat, 11 Dec 2004 03:45:40 +0000 (03:45 +0000)
src/ChangeLog
src/factor_stats.c
tests/Makefile.am
tests/bugs/examine-1sample.sh [new file with mode: 0755]
tests/command/trimmed-mean.sh [new file with mode: 0755]

index 8ccbc7ccfa78c4fc2fb5afe0cd907d1cea5a6a93..00f49780616a0bd7b55010796b105287fe95d3e6 100644 (file)
@@ -1,3 +1,8 @@
+Sat Dec 11 11:43:45 WST 2004 John Darrington <john@darrington.wattle.id.au>
+
+       * factor_stats.c Fixed calculation of trimmed mean under various
+       special conditions.
+
 Sat Dec  4 17:14:45 WST 2004 John Darrington <john@darrington.wattle.id.au>
 
        * histogram.c chart.[ch] factor_stats.c frequencies.q
index 2ba785a7d8e8c7044b2f42fa3ff2c94f96124d02..3a88ab44a4a234a8f64dec6d1d9674cc2ee79caf 100644 (file)
@@ -144,20 +144,26 @@ metrics_postcalc(struct metrics *m)
   m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data);
   m->n_data = hsh_count(m->ordered_data);
 
-  if ( m->n_data == 0 ) 
+  m->histogram = histogram_create(10, m->min, m->max);
+
+  for ( i = 0 ; i < m->n_data ; ++i ) 
     {
-      m->trimmed_mean = m->mean;
-      return;
+      struct weighted_value **wv = (m->wvp) ;
+      gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w);
     }
 
 
   /* Trimmed mean calculation */
+  if ( m->n_data <= 1 ) 
+    {
+      m->trimmed_mean = m->mean;
+      return;
+    }
 
   tc = m->n * 0.05 ;
   k1 = -1;
   k2 = -1;
 
-
   for ( i = 0 ; i < m->n_data ; ++i ) 
     {
       cc += m->wvp[i]->w;
@@ -179,6 +185,12 @@ metrics_postcalc(struct metrics *m)
        k2 = i;
     }
 
+  /* Special case here */
+  if ( k1 + 1 == k2 ) 
+    {
+      m->trimmed_mean = m->wvp[k2]->v.f;
+      return;
+    }
 
   m->trimmed_mean = 0;
   for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) 
@@ -191,14 +203,6 @@ metrics_postcalc(struct metrics *m)
   m->trimmed_mean += (m->wvp[k1 + 1]->cc - tc) * m->wvp[k1 + 1]->v.f ;
   m->trimmed_mean /= 0.9 * m->n ;
 
-  m->histogram = histogram_create(10, m->min, m->max);
-
-  for ( i = 0 ; i < m->n_data ; ++i ) 
-    {
-      struct weighted_value **wv = (m->wvp) ;
-      gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w);
-    }
-
 }
 
 
index 5ab78fd87f22effd5d4202ebcf3a7da97a5aee07..696af98ace73f9fe8e5255b966189c2b8ec50774 100644 (file)
@@ -35,6 +35,7 @@ TESTS = \
        command/t-test-paired-missing-anal.sh \
        command/t-test-paired-missing-list.sh \
        command/t-test-pairs.sh \
+       command/trimmed-mean.sh \
        command/tabs.sh \
        command/use.sh \
        command/weight.sh \
@@ -45,6 +46,7 @@ TESTS = \
        bugs/data-crash.sh \
        bugs/double-frequency.sh \
        bugs/get.sh \
+       bugs/examine-1sample.sh \
        bugs/html-frequency.sh \
        bugs/multipass.sh \
        bugs/random.sh \
diff --git a/tests/bugs/examine-1sample.sh b/tests/bugs/examine-1sample.sh
new file mode 100755 (executable)
index 0000000..497ee89
--- /dev/null
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# This program tests  for a bug in the EXAMINE command, where it
+# would crash if a data file with only one case was presented
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat > $TEMPDIR/out.stat <<EOF
+DATA LIST LIST /quality * .
+BEGIN DATA
+3  
+END DATA
+
+
+EXAMINE
+       quality 
+       /STATISTICS descriptives 
+       .
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/out.stat
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass
diff --git a/tests/command/trimmed-mean.sh b/tests/command/trimmed-mean.sh
new file mode 100755 (executable)
index 0000000..0690c0e
--- /dev/null
@@ -0,0 +1,117 @@
+#!/bin/sh
+
+# This program tests  the Trimmed Mean calculation, in the case
+# where the data is weighted towards the centre
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+
+activity="create program"
+cat > $TEMPDIR/out.stat <<EOF
+DATA LIST LIST /x * c *.
+BEGIN DATA.
+1 1
+2 49
+3 2
+END DATA.
+
+WEIGHT BY c.
+
+EXAMINE
+       x
+       /STATISTICS=DESCRIPTIVES
+       .
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/out.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="compare results"
+diff $TEMPDIR/pspp.list - << EOF
+1.1 DATA LIST.  Reading free-form data from the command file.
++--------+------+
+|Variable|Format|
+#========#======#
+|X       |F8.0  |
+|C       |F8.0  |
++--------+------+
+
+2.1 EXAMINE.  Case Processing Summary
+#=#===============================#
+# #             Cases             #
+# #----------+---------+----------#
+# #   Valid  | Missing |   Total  #
+# #--+-------+-+-------+--+-------#
+# # N|Percent|N|Percent| N|Percent#
+#=#==#=======#=#=======#==#=======#
+#X#52|   100%|0|     0%|52|   100%#
+#=#==#=======#=#=======#==#=======#
+
+2.2 EXAMINE.  Descriptives
+#============================================#=========#==========#
+#                                            #Statistic|Std. Error#
+#============================================#=========#==========#
+#XMean                                       #   2.02  |   .034   #
+# 95% Confidence Interval for MeanLower Bound#  2.021  |          #
+#                                 Upper Bound#  2.017  |          #
+# 5% Trimmed Mean                            #   2.00  |          #
+# Median                                     #         |          #
+# Variance                                   #   .058  |          #
+# Std. Deviation                             #   .242  |          #
+# Minimum                                    #  1.000  |          #
+# Maximum                                    #  3.000  |          #
+# Range                                      #  2.000  |          #
+# Interquartile Range                        #         |          #
+# Skewness                                   #  1.194  |   .330   #
+# Kurtosis                                   #  15.732 |   .650   #
+#============================================#=========#==========#
+
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass