Added the skewness and kurtosis calculations to the EXAMINE command.
authorJohn Darrington <john@darrington.wattle.id.au>
Thu, 25 Nov 2004 11:02:53 +0000 (11:02 +0000)
committerJohn Darrington <john@darrington.wattle.id.au>
Thu, 25 Nov 2004 11:02:53 +0000 (11:02 +0000)
Added an option to configure to build without the termcap library.

configure.ac
src/examine.q
src/factor_stats.c
src/factor_stats.h
tests/command/examine.sh

index 738ca16a5742b482681012028a69cdb9c5a6dfbd..dbdfa543e5368523d00ec1495dd2d924b793ebe8 100644 (file)
@@ -36,9 +36,17 @@ AC_CHECK_LIB(gnugetopt,getopt_long)
 AC_CHECK_FUNC(getopt_long,,
        AC_MSG_ERROR(`This application depends upon getopt_long'))
 
+AC_ARG_WITH(ncurses,
+[  --without-ncurses         don't compile in ncurses command line editing])
+
+
+if test "x$with_ncurses" = x"yes"; then 
 AC_CHECK_LIB(ncurses, tgetent, LIBS="-lncurses $LIBS" termcap=yes,
   AC_CHECK_LIB(termcap, tgetent, LIBS="-ltermcap $LIBS" termcap=yes,
                termcap=no))
+fi
+
+
 if test "$termcap" = yes; then
   AC_CHECK_HEADERS(termcap.h)
   AC_DEFINE(HAVE_LIBTERMCAP, 1, 
index f8d8ad36ea8821c281e8cf9d6e38a1117309d3d6..833980c2e48b93da350bdd1fa02e4653dd7736cb 100644 (file)
@@ -41,6 +41,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #include "hash.h"
 #include "casefile.h"
 #include "factor_stats.h"
+#include "moments.h"
+
 /* (headers) */
 #include "chart.h"
 
@@ -120,8 +122,6 @@ print_factors(void)
          */
 
                 
-         printf("Sum is %g; ",(*fs)->m[0].sum);
-         printf("N is %g; ",(*fs)->m[0].n);
          printf("Mean is %g\n",(*fs)->m[0].mean);
 
          fs++ ;
@@ -1408,10 +1408,41 @@ populate_descriptives(struct tab_table *tbl, int col, int row,
            TAB_LEFT | TAT_TITLE,
            _("Skewness"));
 
+
+  tab_float (tbl, col + 2,
+            row + 11,
+            TAB_CENTER,
+            m->skewness,
+            8,3);
+
+  /* stderr of skewness */
+  tab_float (tbl, col + 3,
+            row + 11,
+            TAB_CENTER,
+            calc_seskew(m->n),
+            8,3);
+
+
   tab_text (tbl, col, 
            row + 12,
            TAB_LEFT | TAT_TITLE,
            _("Kurtosis"));
+
+
+  tab_float (tbl, col + 2,
+            row + 12,
+            TAB_CENTER,
+            m->kurtosis,
+            8,3);
+
+  /* stderr of kurtosis */
+  tab_float (tbl, col + 3,
+            row + 12,
+            TAB_CENTER,
+            calc_sekurt(m->n),
+            8,3);
+
+
 }
 
 
index 6794043cec20f5151e712fe4b63c63b184a112e7..1b1de91adb4c4dab8e6ffa8d2cb7e9957af0d9a9 100644 (file)
@@ -24,6 +24,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #include "hash.h"
 #include "algorithm.h"
 #include "alloc.h"
+#include "moments.h"
 
 #include <stdlib.h>
 #include <math.h>
@@ -37,13 +38,14 @@ metrics_precalc(struct metrics *fs)
 {
   assert (fs) ;
 
-  fs->n = 0;
   fs->n_missing = 0;
-  fs->ssq = 0;
-  fs->sum = 0;
+
   fs->min = DBL_MAX;
   fs->max = -DBL_MAX;
 
+
+  fs->moments = moments1_create(MOMENT_KURTOSIS);
+
   fs->ordered_data = hsh_create(20,
                                (hsh_compare_func *) compare_values,
                                (hsh_hash_func *) hash_value,
@@ -70,9 +72,9 @@ metrics_calc(struct metrics *fs, const union value *val,
     }
 
   x = val->f;
-  fs->n    += weight;
-  fs->ssq  += x * x * weight;
-  fs->sum  += x * weight;
+
+  moments1_add(fs->moments, x, weight);
+
 
   if ( x < fs->min) fs->min = x;
   if ( x > fs->max) fs->max = x;
@@ -116,25 +118,27 @@ metrics_calc(struct metrics *fs, const union value *val,
 void
 metrics_postcalc(struct metrics *m)
 {
-  double sample_var; 
   double cc = 0.0;
   double tc ;
   int k1, k2 ;
   int i;
   int j = 1;  
 
-  m->mean = m->sum / m->n;
 
-  sample_var = ( m->ssq / m->n  - m->mean * m->mean );
+  moments1_calculate (m->moments, &m->n, &m->mean, &m->var, 
+                     &m->skewness, &m->kurtosis);
+
+  moments1_destroy (m->moments);
 
-  m->var  = m->n * sample_var / ( m->n - 1) ;
-  m->stddev = sqrt(m->var);
 
+  m->stddev = sqrt(m->var);
 
   /* FIXME: Check this is correct ???
      Shouldn't we use the sample variance ??? */
   m->stderr = sqrt (m->var / m->n) ;
 
+
+
   m->wvp = (struct weighted_value **) hsh_sort(m->ordered_data);
   m->n_data = hsh_count(m->ordered_data);
 
@@ -251,7 +255,7 @@ factor_statistics_free(struct factor_statistics *f)
 
 int 
 factor_statistics_compare(const struct factor_statistics *f0,
-                         const struct factor_statistics *f1, void *aux)
+                         const struct factor_statistics *f1, int width)
 {
 
   int cmp0;
@@ -259,7 +263,7 @@ factor_statistics_compare(const struct factor_statistics *f0,
   assert(f0);
   assert(f1);
 
-  cmp0 = compare_values(&f0->id[0], &f1->id[0], aux);
+  cmp0 = compare_values(&f0->id[0], &f1->id[0], width);
 
   if ( cmp0 != 0 ) 
     return cmp0;
@@ -271,20 +275,20 @@ factor_statistics_compare(const struct factor_statistics *f0,
   if ( ( f0->id[1].f != SYSMIS )  && (f1->id[1].f == SYSMIS) ) 
     return -1;
 
-  return compare_values(&f0->id[1], &f1->id[1], aux);
+  return compare_values(&f0->id[1], &f1->id[1], width);
   
 }
 
 unsigned int 
-factor_statistics_hash(const struct factor_statistics *f, void *aux)
+factor_statistics_hash(const struct factor_statistics *f, int width)
 {
   
   unsigned int h;
 
-  h = hash_value(&f->id[0], aux);
+  h = hash_value(&f->id[0], width);
   
   if ( f->id[1].f != SYSMIS )
-    h += hash_value(&f->id[1], aux);
+    h += hash_value(&f->id[1], width);
 
 
   return h;
index c193f883e47384008781b39bd41333f0969990c5..fcddb5f4720b6c64563f5db815a9a335c3192c81 100644 (file)
@@ -28,16 +28,15 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 #include "hash.h"
 #include "val.h"
 
+
+struct moments1;
+
 struct metrics
 {
   double n;
 
   double n_missing;
   
-  double ssq;
-  
-  double sum;
-
   double min;
 
   double max;
@@ -50,6 +49,11 @@ struct metrics
 
   double stddev;
 
+  struct moments1 *moments;
+
+  double skewness;
+  double kurtosis;
+
   double trimmed_mean;
 
   /* A hash of data for this factor.
@@ -111,7 +115,7 @@ void weighted_value_free(struct weighted_value *wv);
 
 struct factor_statistics {
 
-  /* The value of the independent variable */
+  /* The values of the independent variables */
   union value id[2];
 
   /* The an array stats for this factor, one for each dependent var */
@@ -129,18 +133,15 @@ create_factor_statistics (int n, union value *id0, union value *id1);
 void factor_statistics_free(struct factor_statistics *f);
 
 
+/* Compare f0 and f1.
+   width is the width of the independent variable */
 int 
 factor_statistics_compare(const struct factor_statistics *f0,
-                         const struct factor_statistics *f1, void *aux);
+                         const struct factor_statistics *f1, int width);
 
                              
 
 unsigned int 
-factor_statistics_hash(const struct factor_statistics *f, void *aux);
-
-
-
-
-
+factor_statistics_hash(const struct factor_statistics *f, int width);
 
 #endif
index 13f7c888106f168752789b31ba22fdc61350255a..11830edbfba4227fac4e643b3de157bda41dfee0 100755 (executable)
@@ -159,8 +159,8 @@ Case#  QUALITY        W    BRAND
 #               Maximum                                    #  7.000  |          #
 #               Range                                      #  6.000  |          #
 #               Interquartile Range                        #         |          #
-#               Skewness                                   #         |          #
-#               Kurtosis                                   #         |          #
+#               Skewness                                   #   .059  |   .472   #
+#               Kurtosis                                   #  -.358  |   .918   #
 #==========================================================#=========#==========#
 
 2.4 EXAMINE.  Case Processing Summary
@@ -220,8 +220,8 @@ Case#  QUALITY        W    BRAND
 #                           Maximum                                    #  4.000  |          #
 #                           Range                                      #  3.000  |          #
 #                           Interquartile Range                        #         |          #
-#                           Skewness                                   #         |          #
-#                           Kurtosis                                   #         |          #
+#                           Skewness                                   #   .475  |   .752   #
+#                           Kurtosis                                   #  -1.546 |   1.481  #
 #               -------------------------------------------------------#---------+----------#
 #               Bloggs      Mean                                       #   3.50  |   .378   #
 #                           95% Confidence Interval for MeanLower Bound#  3.525  |          #
@@ -234,8 +234,8 @@ Case#  QUALITY        W    BRAND
 #                           Maximum                                    #  5.000  |          #
 #                           Range                                      #  3.000  |          #
 #                           Interquartile Range                        #         |          #
-#                           Skewness                                   #         |          #
-#                           Kurtosis                                   #         |          #
+#                           Skewness                                   #  -.468  |   .752   #
+#                           Kurtosis                                   #  -.831  |   1.481  #
 #               -------------------------------------------------------#---------+----------#
 #               Charlies    Mean                                       #   4.88  |   .441   #
 #                           95% Confidence Interval for MeanLower Bound#  4.904  |          #
@@ -248,8 +248,8 @@ Case#  QUALITY        W    BRAND
 #                           Maximum                                    #  7.000  |          #
 #                           Range                                      #  4.000  |          #
 #                           Interquartile Range                        #         |          #
-#                           Skewness                                   #         |          #
-#                           Kurtosis                                   #         |          #
+#                           Skewness                                   #   .304  |   .752   #
+#                           Kurtosis                                   #   .146  |   1.481  #
 #======================================================================#=========#==========#
 
 EOF