From 40d9e4dabdd3f9a7d28d3b1dae071a8b1e7a6d1a Mon Sep 17 00:00:00 2001 From: John Darrington Date: Fri, 19 Dec 2003 07:17:59 +0000 Subject: [PATCH] Added an implementation for FREQUENCIES/PERCENTILES --- src/frequencies.q | 55 +++++++++++++++- tests/Makefile.am | 1 + tests/command/percentiles.sh | 119 +++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 3 deletions(-) create mode 100755 tests/command/percentiles.sh diff --git a/src/frequencies.q b/src/frequencies.q index 477eee07..acd52513 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -44,6 +44,7 @@ #include "tab.h" #include "var.h" #include "vfm.h" +#include "str.h" #include "debug-print.h" @@ -108,8 +109,9 @@ static struct frq_info st_name[frq_n_stats + 1] = }; /* Percentiles to calculate. */ -static double *percentiles; -static int n_percentiles; +static double *percentiles=0; +static double *percentile_values=0; +static int n_percentiles=0; /* Groups of statistics. */ #define BI BIT_INDEX @@ -733,9 +735,17 @@ add_percentile (double x) { percentiles = pool_realloc (int_pool, percentiles, (n_percentiles + 1) * sizeof (double)); + percentile_values = pool_realloc (int_pool, percentile_values, + (n_percentiles + 1) * sizeof (double)); + if (i < n_percentiles) + { memmove (&percentiles[i + 1], &percentiles[i], (n_percentiles - i) * sizeof (double)); + memmove (&percentile_values[i + 1], &percentile_values[i], + (n_percentiles - i) * sizeof (double)); + + } percentiles[i] = x; n_percentiles++; } @@ -1061,12 +1071,35 @@ calc_stats (struct variable * v, double d[frq_n_stats]) struct freq *f; int most_often; + double cum_percent=0; + int i=0; + double previous_value=SYSMIS; + + + /* Calculate the mean and mode */ X_bar = 0.0; most_often = -1; X_mode = SYSMIS; for (f = v->p.frq.tab.valid; f < v->p.frq.tab.missing; f++) { + + + cum_percent += f->c / v->p.frq.tab.valid_cases ; + + + for(;i < n_percentiles ; ++i) + { + + + if (cum_percent <= percentiles[i]) + break; + + percentile_values[i]=previous_value; + + } + + /* mean */ X_bar += f->v.f * f->c; @@ -1081,6 +1114,8 @@ calc_stats (struct variable * v, double d[frq_n_stats]) /* if there are 2 values , then mode is undefined */ X_mode=SYSMIS; } + + previous_value=f->v.f; } X_bar /= W; @@ -1154,7 +1189,7 @@ dump_statistics (struct variable * v, int show_varname) } calc_stats (v, stat_value); - t = tab_create (2, n_stats, 0); + t = tab_create (2, n_stats + n_percentiles, 0); tab_dim (t, tab_natural_dimensions); tab_vline (t, TAL_1 | TAL_SPACING, 1, 0, n_stats - 1); for (i = r = 0; i < frq_n_stats; i++) @@ -1166,6 +1201,20 @@ dump_statistics (struct variable * v, int show_varname) r++; } + for ( i=0 ; i < n_percentiles ; ++i,++r ) { + struct string ds; + + ds_init(gen_pool, &ds, 20 ); + + ds_printf(&ds,"%s %d",_("Percentile"),(int)(percentiles[i]*100)); + + + tab_text(t,0,r, TAB_LEFT | TAT_TITLE, ds.string); + tab_float(t,1,r,TAB_NONE,percentile_values[i],11,3); + + ds_destroy(&ds); + } + tab_columns (t, SOM_COL_DOWN, 1); if (show_varname) { diff --git a/tests/Makefile.am b/tests/Makefile.am index b4a92fd2..3ce1477b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -14,6 +14,7 @@ TESTS = command/aggregate.sh \ command/lag.sh \ command/list.sh \ command/loop.sh \ + command/percentiles.sh \ command/print.sh \ command/sample.sh \ command/sort.sh \ diff --git a/tests/command/percentiles.sh b/tests/command/percentiles.sh new file mode 100755 index 00000000..0d722300 --- /dev/null +++ b/tests/command/percentiles.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +# This program tests the PERCENTILES subcommand of the FREQUENCIES cmd + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create prog" +cat > $TEMPDIR/percents.stat <