Added an implementation for FREQUENCIES/PERCENTILES
authorJohn Darrington <john@darrington.wattle.id.au>
Fri, 19 Dec 2003 07:17:59 +0000 (07:17 +0000)
committerJohn Darrington <john@darrington.wattle.id.au>
Fri, 19 Dec 2003 07:17:59 +0000 (07:17 +0000)
src/frequencies.q
tests/Makefile.am
tests/command/percentiles.sh [new file with mode: 0755]

index 477eee07a5427d788f81c2e3b21e4cfbb640292c..acd52513945ec09925981d738b855caa02b5432f 100644 (file)
@@ -44,6 +44,7 @@
 #include "tab.h"
 #include "var.h"
 #include "vfm.h"
+#include "str.h"
 
 #include "debug-print.h"
 
@@ -108,8 +109,9 @@ static struct frq_info st_name[frq_n_stats + 1] =
 };
 
 /* Percentiles to calculate. */
-static double *percentiles;
-static int n_percentiles;
+static double *percentiles=0;
+static double *percentile_values=0;
+static int n_percentiles=0;
 
 /* Groups of statistics. */
 #define BI          BIT_INDEX
@@ -733,9 +735,17 @@ add_percentile (double x)
     {
       percentiles = pool_realloc (int_pool, percentiles,
                                  (n_percentiles + 1) * sizeof (double));
+      percentile_values = pool_realloc (int_pool, percentile_values,
+                                 (n_percentiles + 1) * sizeof (double));
+
       if (i < n_percentiles)
+       {
        memmove (&percentiles[i + 1], &percentiles[i],
                 (n_percentiles - i) * sizeof (double));
+       memmove (&percentile_values[i + 1], &percentile_values[i],
+                (n_percentiles - i) * sizeof (double));
+
+       }
       percentiles[i] = x;
       n_percentiles++;
     }
@@ -1061,12 +1071,35 @@ calc_stats (struct variable * v, double d[frq_n_stats])
   struct freq *f;
   int most_often;
 
+  double cum_percent=0;
+  int i=0;
+  double previous_value=SYSMIS;
+
+
+
   /* Calculate the mean and  mode */
   X_bar = 0.0;
   most_often = -1;
   X_mode = SYSMIS;
   for (f = v->p.frq.tab.valid; f < v->p.frq.tab.missing; f++)
     {
+
+      cum_percent += f->c / v->p.frq.tab.valid_cases ;
+
+
+      for(;i < n_percentiles ;  ++i) 
+       {
+         
+
+         if (cum_percent <= percentiles[i]) 
+           break;
+
+         percentile_values[i]=previous_value;
+
+       }
+
+
       /* mean */
       X_bar += f->v.f * f->c;
 
@@ -1081,6 +1114,8 @@ calc_stats (struct variable * v, double d[frq_n_stats])
          /* if there are 2 values , then mode is undefined */
          X_mode=SYSMIS;
        }
+
+      previous_value=f->v.f;
     }
   X_bar /= W;
 
@@ -1154,7 +1189,7 @@ dump_statistics (struct variable * v, int show_varname)
     }
   calc_stats (v, stat_value);
 
-  t = tab_create (2, n_stats, 0);
+  t = tab_create (2, n_stats + n_percentiles, 0);
   tab_dim (t, tab_natural_dimensions);
   tab_vline (t, TAL_1 | TAL_SPACING, 1, 0, n_stats - 1);
   for (i = r = 0; i < frq_n_stats; i++)
@@ -1166,6 +1201,20 @@ dump_statistics (struct variable * v, int show_varname)
        r++;
       }
 
+  for ( i=0 ; i < n_percentiles ; ++i,++r ) { 
+    struct string ds;
+
+    ds_init(gen_pool, &ds, 20 );
+
+    ds_printf(&ds,"%s %d",_("Percentile"),(int)(percentiles[i]*100));
+
+
+    tab_text(t,0,r, TAB_LEFT | TAT_TITLE, ds.string);
+    tab_float(t,1,r,TAB_NONE,percentile_values[i],11,3);
+
+    ds_destroy(&ds);
+  }
+
   tab_columns (t, SOM_COL_DOWN, 1);
   if (show_varname)
     {
index b4a92fd2f2d03f40c2a99db66889a8fdaab969e7..3ce1477be56b533409bce84e3cf47cfe46423205 100644 (file)
@@ -14,6 +14,7 @@ TESTS = command/aggregate.sh \
        command/lag.sh \
        command/list.sh \
        command/loop.sh \
+       command/percentiles.sh \
        command/print.sh \
        command/sample.sh \
        command/sort.sh \
diff --git a/tests/command/percentiles.sh b/tests/command/percentiles.sh
new file mode 100755 (executable)
index 0000000..0d72230
--- /dev/null
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+# This program tests the PERCENTILES subcommand of the FREQUENCIES cmd
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create prog"
+cat > $TEMPDIR/percents.stat <<EOF
+data list free /z x(f3.2) y(f3.0) d(a30).
+begin data.
+1 3 4 apples
+2 5 6 pairs
+3 4 5 bannanas
+4 3 9 pairs
+5 1 2 pairs
+6 4 5 apricots
+7 4 4 bannanas
+8 4 5 apples
+9 3 3 peaches
+10 2 3 coconuts
+end data.
+
+frequencies z  /statistics=all /percentiles = 5,10,30,90.
+
+finish.
+
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run prog"
+$here/../src/pspp -o raw-ascii $TEMPDIR/percents.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare output"
+diff -b -B $TEMPDIR/pspp.list - <<EOF
+1.1 FREQUENCIES.  Z: 
++-----------+--------+---------+--------+--------+--------+
+|           |        |         |        |  Valid |   Cum  |
+|Value Label|  Value |Frequency| Percent| Percent| Percent|
+#===========#========#=========#========#========#========#
+|           |     .01|        1|    10.0|    10.0|    10.0|
+|           |     .02|        1|    10.0|    10.0|    20.0|
+|           |     .03|        1|    10.0|    10.0|    30.0|
+|           |     .04|        1|    10.0|    10.0|    40.0|
+|           |     .05|        1|    10.0|    10.0|    50.0|
+|           |     .06|        1|    10.0|    10.0|    60.0|
+|           |     .07|        1|    10.0|    10.0|    70.0|
+|           |     .08|        1|    10.0|    10.0|    80.0|
+|           |     .09|        1|    10.0|    10.0|    90.0|
+|           |     .10|        1|    10.0|    10.0|   100.0|
+#===========#========#=========#========#========#========#
+|               Total|       10|   100.0|   100.0|        |
++--------------------+---------+--------+--------+--------+
+
+Mean            .055
+S.E. Mean       .010
+Median          .   
+Mode            .   
+Std Dev         .030
+Variance        .001
+Kurtosis      -1.200
+S.E. Kurt      1.334
+Skewness        .000
+S.E. Skew       .687
+Range           .090
+Minimum         .010
+Maximum         .100
+Sum             .550
+Percentile 5    .   
+Percentile 10   .010
+Percentile 29   .030
+Percentile 90   .090
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+pass;