From a489451ddf22935fcbe115aa4aa5869b29f18103 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 25 Nov 2004 11:02:53 +0000 Subject: [PATCH] Added the skewness and kurtosis calculations to the EXAMINE command. Added an option to configure to build without the termcap library. --- configure.ac | 8 ++++++++ src/examine.q | 35 +++++++++++++++++++++++++++++++++-- src/factor_stats.c | 38 +++++++++++++++++++++----------------- src/factor_stats.h | 25 +++++++++++++------------ tests/command/examine.sh | 16 ++++++++-------- 5 files changed, 83 insertions(+), 39 deletions(-) diff --git a/configure.ac b/configure.ac index 738ca16a..dbdfa543 100644 --- a/configure.ac +++ b/configure.ac @@ -36,9 +36,17 @@ AC_CHECK_LIB(gnugetopt,getopt_long) AC_CHECK_FUNC(getopt_long,, AC_MSG_ERROR(`This application depends upon getopt_long')) +AC_ARG_WITH(ncurses, +[ --without-ncurses don't compile in ncurses command line editing]) + + +if test "x$with_ncurses" = x"yes"; then AC_CHECK_LIB(ncurses, tgetent, LIBS="-lncurses $LIBS" termcap=yes, AC_CHECK_LIB(termcap, tgetent, LIBS="-ltermcap $LIBS" termcap=yes, termcap=no)) +fi + + if test "$termcap" = yes; then AC_CHECK_HEADERS(termcap.h) AC_DEFINE(HAVE_LIBTERMCAP, 1, diff --git a/src/examine.q b/src/examine.q index f8d8ad36..833980c2 100644 --- a/src/examine.q +++ b/src/examine.q @@ -41,6 +41,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "casefile.h" #include "factor_stats.h" +#include "moments.h" + /* (headers) */ #include "chart.h" @@ -120,8 +122,6 @@ print_factors(void) */ - printf("Sum is %g; ",(*fs)->m[0].sum); - printf("N is %g; ",(*fs)->m[0].n); printf("Mean is %g\n",(*fs)->m[0].mean); fs++ ; @@ -1408,10 +1408,41 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Skewness")); + + tab_float (tbl, col + 2, + row + 11, + TAB_CENTER, + m->skewness, + 8,3); + + /* stderr of skewness */ + tab_float (tbl, col + 3, + row + 11, + TAB_CENTER, + calc_seskew(m->n), + 8,3); + + tab_text (tbl, col, row + 12, TAB_LEFT | TAT_TITLE, _("Kurtosis")); + + + tab_float (tbl, col + 2, + row + 12, + TAB_CENTER, + m->kurtosis, + 8,3); + + /* stderr of kurtosis */ + tab_float (tbl, col + 3, + row + 12, + TAB_CENTER, + calc_sekurt(m->n), + 8,3); + + } diff --git a/src/factor_stats.c b/src/factor_stats.c index 6794043c..1b1de91a 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -24,6 +24,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "algorithm.h" #include "alloc.h" +#include "moments.h" #include #include @@ -37,13 +38,14 @@ metrics_precalc(struct metrics *fs) { assert (fs) ; - fs->n = 0; fs->n_missing = 0; - fs->ssq = 0; - fs->sum = 0; + fs->min = DBL_MAX; fs->max = -DBL_MAX; + + fs->moments = moments1_create(MOMENT_KURTOSIS); + fs->ordered_data = hsh_create(20, (hsh_compare_func *) compare_values, (hsh_hash_func *) hash_value, @@ -70,9 +72,9 @@ metrics_calc(struct metrics *fs, const union value *val, } x = val->f; - fs->n += weight; - fs->ssq += x * x * weight; - fs->sum += x * weight; + + moments1_add(fs->moments, x, weight); + if ( x < fs->min) fs->min = x; if ( x > fs->max) fs->max = x; @@ -116,25 +118,27 @@ metrics_calc(struct metrics *fs, const union value *val, void metrics_postcalc(struct metrics *m) { - double sample_var; double cc = 0.0; double tc ; int k1, k2 ; int i; int j = 1; - m->mean = m->sum / m->n; - sample_var = ( m->ssq / m->n - m->mean * m->mean ); + moments1_calculate (m->moments, &m->n, &m->mean, &m->var, + &m->skewness, &m->kurtosis); + + moments1_destroy (m->moments); - m->var = m->n * sample_var / ( m->n - 1) ; - m->stddev = sqrt(m->var); + m->stddev = sqrt(m->var); /* FIXME: Check this is correct ??? Shouldn't we use the sample variance ??? */ m->stderr = sqrt (m->var / m->n) ; + + m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data); m->n_data = hsh_count(m->ordered_data); @@ -251,7 +255,7 @@ factor_statistics_free(struct factor_statistics *f) int factor_statistics_compare(const struct factor_statistics *f0, - const struct factor_statistics *f1, void *aux) + const struct factor_statistics *f1, int width) { int cmp0; @@ -259,7 +263,7 @@ factor_statistics_compare(const struct factor_statistics *f0, assert(f0); assert(f1); - cmp0 = compare_values(&f0->id[0], &f1->id[0], aux); + cmp0 = compare_values(&f0->id[0], &f1->id[0], width); if ( cmp0 != 0 ) return cmp0; @@ -271,20 +275,20 @@ factor_statistics_compare(const struct factor_statistics *f0, if ( ( f0->id[1].f != SYSMIS ) && (f1->id[1].f == SYSMIS) ) return -1; - return compare_values(&f0->id[1], &f1->id[1], aux); + return compare_values(&f0->id[1], &f1->id[1], width); } unsigned int -factor_statistics_hash(const struct factor_statistics *f, void *aux) +factor_statistics_hash(const struct factor_statistics *f, int width) { unsigned int h; - h = hash_value(&f->id[0], aux); + h = hash_value(&f->id[0], width); if ( f->id[1].f != SYSMIS ) - h += hash_value(&f->id[1], aux); + h += hash_value(&f->id[1], width); return h; diff --git a/src/factor_stats.h b/src/factor_stats.h index c193f883..fcddb5f4 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -28,16 +28,15 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "val.h" + +struct moments1; + struct metrics { double n; double n_missing; - double ssq; - - double sum; - double min; double max; @@ -50,6 +49,11 @@ struct metrics double stddev; + struct moments1 *moments; + + double skewness; + double kurtosis; + double trimmed_mean; /* A hash of data for this factor. @@ -111,7 +115,7 @@ void weighted_value_free(struct weighted_value *wv); struct factor_statistics { - /* The value of the independent variable */ + /* The values of the independent variables */ union value id[2]; /* The an array stats for this factor, one for each dependent var */ @@ -129,18 +133,15 @@ create_factor_statistics (int n, union value *id0, union value *id1); void factor_statistics_free(struct factor_statistics *f); +/* Compare f0 and f1. + width is the width of the independent variable */ int factor_statistics_compare(const struct factor_statistics *f0, - const struct factor_statistics *f1, void *aux); + const struct factor_statistics *f1, int width); unsigned int -factor_statistics_hash(const struct factor_statistics *f, void *aux); - - - - - +factor_statistics_hash(const struct factor_statistics *f, int width); #endif diff --git a/tests/command/examine.sh b/tests/command/examine.sh index 13f7c888..11830edb 100755 --- a/tests/command/examine.sh +++ b/tests/command/examine.sh @@ -159,8 +159,8 @@ Case# QUALITY W BRAND # Maximum # 7.000 | # # Range # 6.000 | # # Interquartile Range # | # -# Skewness # | # -# Kurtosis # | # +# Skewness # .059 | .472 # +# Kurtosis # -.358 | .918 # #==========================================================#=========#==========# 2.4 EXAMINE. Case Processing Summary @@ -220,8 +220,8 @@ Case# QUALITY W BRAND # Maximum # 4.000 | # # Range # 3.000 | # # Interquartile Range # | # -# Skewness # | # -# Kurtosis # | # +# Skewness # .475 | .752 # +# Kurtosis # -1.546 | 1.481 # # -------------------------------------------------------#---------+----------# # Bloggs Mean # 3.50 | .378 # # 95% Confidence Interval for MeanLower Bound# 3.525 | # @@ -234,8 +234,8 @@ Case# QUALITY W BRAND # Maximum # 5.000 | # # Range # 3.000 | # # Interquartile Range # | # -# Skewness # | # -# Kurtosis # | # +# Skewness # -.468 | .752 # +# Kurtosis # -.831 | 1.481 # # -------------------------------------------------------#---------+----------# # Charlies Mean # 4.88 | .441 # # 95% Confidence Interval for MeanLower Bound# 4.904 | # @@ -248,8 +248,8 @@ Case# QUALITY W BRAND # Maximum # 7.000 | # # Range # 4.000 | # # Interquartile Range # | # -# Skewness # | # -# Kurtosis # | # +# Skewness # .304 | .752 # +# Kurtosis # .146 | 1.481 # #======================================================================#=========#==========# EOF -- 2.30.2