src/stats.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <math.h>
  22 #include "stats.h"
  23
  24 /* Returns the fourth power of its argument. */
  25 double
  26 hypercube (double x)
  27 {
  28   x *= x;
  29   return x * x;
  30 }
  31
  32 /* Returns the cube of its argument. */
  33 double
  34 cube (double x)
  35 {
  36   return x * x * x;
  37 }
  38
  39 /* Returns the square of its argument. */
  40 double
  41 sqr (double x)
  42 {
  43   return x * x;
  44 }
  45
  46 /*
  47  * kurtosis = [(n+1){n*sum(X**4) - 4*sum(X)*sum(X**3)
  48  *                   + 6*sum(X)**2*sum(X**2)/n - 3*sum(X)**4/n**2}]
  49  *           /[(n-1)(n-2)(n-3)*(variance)**2]
  50  *             -[3*{(n-1)**2}]
  51  *             /[(n-2)(n-3)]
  52  *
  53  * This and other formulas from _Biometry_, Sokal and Rohlf,
  54  * W. H. Freeman and Company, 1969.  See pages 117 and 136 especially.
  55  */
  56 double
  57 calc_kurt (const double d[4], double n, double variance)
  58 {
  59   return
  60     (((n + 1) * (n * d[3]
  61                  - 4.0 * d[0] * d[2]
  62                  + 6.0 * sqr (d[0]) * d[1] / n
  63                  - 3.0 * hypercube (d[0]) / sqr (n)))
  64      / ((n - 1.0) * (n - 2.0) * (n - 3.0) * sqr (variance))
  65      - (3.0 * sqr (n - 1.0))
  66      / ((n - 2.0) * (n - 3.)));
  67 }
  68
  69 /*
  70  * standard error of kurtosis = sqrt([24n((n-1)**2)]/[(n-3)(n-2)(n+3)(n+5)])
  71  */
  72 double
  73 calc_sekurt (double n)
  74 {
  75   return sqrt ((24.0 * n * sqr (n - 1.0))
  76                / ((n - 3.0) * (n - 2.0) * (n + 3.0) * (n + 5.0)));
  77 }
  78
  79 /*
  80  * skewness = [n*sum(X**3) - 3*sum(X)*sum(X**2) + 2*sum(X)**3/n]/
  81  *           /[(n-1)(n-2)*(variance)**3]
  82  */
  83 double
  84 calc_skew (const double d[3], double n, double stddev)
  85 {
  86   return
  87     ((n * d[2] - 3.0 * d[0] * d[1] + 2.0 * cube (d[0]) / n)
  88      / ((n - 1.0) * (n - 2.0) * cube (stddev)));
  89 }
  90
  91 /*
  92  * standard error of skewness = sqrt([6n(n-1)] / [(n-2)(n+1)(n+3)])
  93  */
  94 double
  95 calc_seskew (double n)
  96 {
  97   return
  98     sqrt ((6.0 * n * (n - 1.0))
  99           / ((n - 2.0) * (n + 1.0) * (n + 3.0)));
 100 }
 101
 102 /* Returns one-sided significance level corresponding to standard
 103    normal deviate X.  Algorithm from _SPSS Statistical Algorithms_,
 104    Appendix 1. */
 105 #if 0
 106 double
 107 normal_sig (double x)
 108 {
 109   const double a1 = .070523078;
 110   const double a2 = .0422820123;
 111   const double a3 = .0092705272;
 112   const double a4 = .0001520143;
 113   const double a5 = .0002765672;
 114   const double a6 = .0000430638;
 115
 116   const double z = fabs (x) <= 14.14 ? 0.7071067812 * fabs (x) : 10.;
 117   double r;
 118
 119   r = 1. + z * (a1 + z * (a2 + z * (a3 + z * (a4 + z * (a5 + z * a6)))));
 120   r *= r;       /* r ** 2 */
 121   r *= r;       /* r ** 4 */
 122   r *= r;       /* r ** 16 */
 123
 124   return .5 / r;
 125 }
 126 #else /* 1 */
 127 /* Taken from _BASIC Statistics: An Introduction to Problem Solving
 128    with Your Personal Computer_, Jerry W. O'Dell, TAB 1984, page 314-5. */
 129 double
 130 normal_sig (double z)
 131 {
 132   double h;
 133
 134   h = 1 + 0.0498673470 * z;
 135   z *= z;
 136   h += 0.0211410061 * z;
 137   z *= z;
 138   h += 0.0032776263 * z;
 139   z *= z;
 140   h += 0.0000380036 * z;
 141   z *= z;
 142   h += 0.0000488906 * z;
 143   z *= z;
 144   h += 0.0000053830 * z;
 145   return pow (h, -16.) / 2.;
 146 }
 147 #endif /* 1 */
 148
 149 /* Algorithm from _Turbo Pascal Programmer's Toolkit_, Rugg and
 150    Feldman, Que 1989.  Returns the significance level of chi-square
 151    value CHISQ with DF degrees of freedom, correct to at least 7
 152    decimal places.  */
 153 double
 154 chisq_sig (double x, int k)
 155 {
 156   if (x <= 0. || k < 1)
 157     return 1.0;
 158   else if (k == 1)
 159     return 2. * normal_sig (sqrt (x));
 160   else if (k <= 30)
 161     {
 162       double z, z_partial, term, denom, numerator, value;
 163
 164       z = 1.;
 165       z_partial = 1.;
 166       term = k;
 167       do
 168         {
 169           term += 2;
 170           z_partial *= x / term;
 171           if (z_partial >= 10000000.)
 172             return 0.;
 173           z += z_partial;
 174         }
 175       while (z_partial >= 1.e-7);
 176       denom = term = 2 - k % 2;
 177       while (term < k)
 178         {
 179           term += 2;
 180           denom *= term;
 181         }
 182       if (k % 2)
 183         {
 184           value = ((k + 1) / 2) * log (x) - x / 2.;
 185           numerator = exp (value) * sqrt (2. / x / PI);
 186         }
 187       else
 188         {
 189           value = k / 2. * log (x) - x / 2.;
 190           numerator = exp (value);
 191         }
 192       return 1. - numerator * z / denom;
 193     }
 194   else
 195     {
 196       double term, numer, norm_x;
 197
 198       term = 2. / 9. / k;
 199       numer = pow (x / k, 1. / 3.);
 200       norm_x = numer / sqrt (term);
 201       return 1.0 - normal_sig (norm_x);
 202     }
 203 }