pintos-os.org Git - pspp/blob - lib/tukey/ptukey.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18 /* This file is taken from the R project source code, and modified.
  19    The original copyright notice is reproduced below: */
  20
  21 /*
  22  *  Mathlib : A C Library of Special Functions
  23  *  Copyright (C) 1998       Ross Ihaka
  24  *  Copyright (C) 2000--2007 The R Development Core Team
  25  *
  26  *  This program is free software; you can redistribute it and/or modify
  27  *  it under the terms of the GNU General Public License as published by
  28  *  the Free Software Foundation; either version 2 of the License, or
  29  *  (at your option) any later version.
  30  *
  31  *  This program is distributed in the hope that it will be useful,
  32  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  33  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  34  *  GNU General Public License for more details.
  35  *
  36  *  You should have received a copy of the GNU General Public License
  37  *  along with this program; if not, a copy is available at
  38  *  http://www.r-project.org/Licenses/
  39  *
  40  *  SYNOPSIS
  41  *
  42  *    #include <Rmath.h>
  43  *    double ptukey(q, rr, cc, df, lower_tail, log_p);
  44  *
  45  *  DESCRIPTION
  46  *
  47  *    Computes the probability that the maximum of rr studentized
  48  *    ranges, each based on cc means and with df degrees of freedom
  49  *    for the standard error, is less than q.
  50  *
  51  *    The algorithm is based on that of the reference.
  52  *
  53  *  REFERENCE
  54  *
  55  *    Copenhaver, Margaret Diponzio & Holland, Burt S.
  56  *    Multiple comparisons of simple effects in
  57  *    the two-way analysis of variance with fixed effects.
  58  *    Journal of Statistical Computation and Simulation,
  59  *    Vol.30, pp.1-15, 1988.
  60  */
  61
  62
  63
  64 #include <config.h>
  65
  66 #include "libpspp/compiler.h"
  67 #include "tukey.h"
  68
  69 #include <gsl/gsl_sf_gamma.h>
  70 #include <gsl/gsl_cdf.h>
  71 #include <assert.h>
  72 #include <math.h>
  73
  74 #define R_D__0  (log_p ? ML_NEGINF : 0.)                /* 0 */
  75 #define R_D__1  (log_p ? 0. : 1.)                       /* 1 */
  76 #define R_DT_0  (lower_tail ? R_D__0 : R_D__1)          /* 0 */
  77 #define R_DT_1  (lower_tail ? R_D__1 : R_D__0)          /* 1 */
  78
  79 #define R_D_val(x)      (log_p  ? log(x) : (x))         /*  x  in pF(x,..) */
  80 #define R_D_Clog(p)     (log_p  ? log1p(-(p)) : (0.5 - (p) + 0.5)) /* [log](1-p) */
  81 #define R_DT_val(x)     (lower_tail ? R_D_val(x)  : R_D_Clog(x))
  82
  83
  84 #define ME_PRECISION    8
  85
  86
  87 static inline double
  88 pnorm(double x, double mu, double sigma, int lower_tail, int log_p)
  89 {
  90   assert (lower_tail == 1);
  91   assert (log_p == 0);
  92   assert (sigma == 1.0);
  93
  94   return gsl_cdf_gaussian_P (x - mu, sigma);
  95 }
  96
  97
  98 static double
  99 wprob (double w, double rr, double cc)
 100 {
 101   const double M_1_SQRT_2PI = 1 / sqrt (2 * M_PI);
 102
 103
 104 /*  wprob() :
 105
 106         This function calculates probability integral of Hartley's
 107         form of the range.
 108
 109         w     = value of range
 110         rr    = no. of rows or groups
 111         cc    = no. of columns or treatments
 112         ir    = error flag = 1 if pr_w probability > 1
 113         pr_w = returned probability integral from (0, w)
 114
 115         program will not terminate if ir is raised.
 116
 117         bb = upper limit of legendre integration
 118         iMax = maximum acceptable value of integral
 119         nleg = order of legendre quadrature
 120         ihalf = int ((nleg + 1) / 2)
 121         wlar = value of range above which wincr1 intervals are used to
 122                calculate second part of integral,
 123                else wincr2 intervals are used.
 124         C1, C2, C3 = values which are used as cutoffs for terminating
 125         or modifying a calculation.
 126
 127         M_1_SQRT_2PI = 1 / sqrt(2 * pi);  from abramowitz & stegun, p. 3.
 128         M_SQRT2 = sqrt(2)
 129         xleg = legendre 12-point nodes
 130         aleg = legendre 12-point coefficients
 131  */
 132 #define nleg    12
 133 #define ihalf   6
 134
 135   /* looks like this is suboptimal for double precision.
 136      (see how C1-C3 are used) <MM>
 137    */
 138   /* const double iMax  = 1.; not used if = 1 */
 139   static const double C1 = -30.;
 140   static const double C2 = -50.;
 141   static const double C3 = 60.;
 142   static const double bb = 8.;
 143   static const double wlar = 3.;
 144   static const double wincr1 = 2.;
 145   static const double wincr2 = 3.;
 146   static const double xleg[ihalf] = {
 147     0.981560634246719250690549090149,
 148     0.904117256370474856678465866119,
 149     0.769902674194304687036893833213,
 150     0.587317954286617447296702418941,
 151     0.367831498998180193752691536644,
 152     0.125233408511468915472441369464
 153   };
 154   static const double aleg[ihalf] = {
 155     0.047175336386511827194615961485,
 156     0.106939325995318430960254718194,
 157     0.160078328543346226334652529543,
 158     0.203167426723065921749064455810,
 159     0.233492536538354808760849898925,
 160     0.249147045813402785000562436043
 161   };
 162   double a, ac, pr_w, b, binc, blb, c, cc1,
 163     pminus, pplus, qexpo, qsqz, rinsum, wi, wincr, xx;
 164   long double bub, einsum, elsum;
 165   int j, jj;
 166
 167
 168   qsqz = w * 0.5;
 169
 170   /* if w >= 16 then the integral lower bound (occurs for c=20) */
 171   /* is 0.99999999999995 so return a value of 1. */
 172
 173   if (qsqz >= bb)
 174     return 1.0;
 175
 176   /* find (f(w/2) - 1) ^ cc */
 177   /* (first term in integral of hartley's form). */
 178
 179   pr_w = 2 * pnorm (qsqz, 0., 1., 1, 0) - 1.;   /* erf(qsqz / M_SQRT2) */
 180   /* if pr_w ^ cc < 2e-22 then set pr_w = 0 */
 181   if (pr_w >= exp (C2 / cc))
 182     pr_w = pow (pr_w, cc);
 183   else
 184     pr_w = 0.0;
 185
 186   /* if w is large then the second component of the */
 187   /* integral is small, so fewer intervals are needed. */
 188
 189   if (w > wlar)
 190     wincr = wincr1;
 191   else
 192     wincr = wincr2;
 193
 194   /* find the integral of second term of hartley's form */
 195   /* for the integral of the range for equal-length */
 196   /* intervals using legendre quadrature.  limits of */
 197   /* integration are from (w/2, 8).  two or three */
 198   /* equal-length intervals are used. */
 199
 200   /* blb and bub are lower and upper limits of integration. */
 201
 202   blb = qsqz;
 203   binc = (bb - qsqz) / wincr;
 204   bub = blb + binc;
 205   einsum = 0.0;
 206
 207   /* integrate over each interval */
 208
 209   cc1 = cc - 1.0;
 210   for (wi = 1; wi <= wincr; wi++)
 211     {
 212       elsum = 0.0;
 213       a = 0.5 * (bub + blb);
 214
 215       /* legendre quadrature with order = nleg */
 216
 217       b = 0.5 * (bub - blb);
 218
 219       for (jj = 1; jj <= nleg; jj++)
 220         {
 221           if (ihalf < jj)
 222             {
 223               j = (nleg - jj) + 1;
 224               xx = xleg[j - 1];
 225             }
 226           else
 227             {
 228               j = jj;
 229               xx = -xleg[j - 1];
 230             }
 231           c = b * xx;
 232           ac = a + c;
 233
 234           /* if exp(-qexpo/2) < 9e-14, */
 235           /* then doesn't contribute to integral */
 236
 237           qexpo = ac * ac;
 238           if (qexpo > C3)
 239             break;
 240
 241           pplus = 2 * pnorm (ac, 0., 1., 1, 0);
 242           pminus = 2 * pnorm (ac, w, 1., 1, 0);
 243
 244           /* if rinsum ^ (cc-1) < 9e-14, */
 245           /* then doesn't contribute to integral */
 246
 247           rinsum = (pplus * 0.5) - (pminus * 0.5);
 248           if (rinsum >= exp (C1 / cc1))
 249             {
 250               rinsum =
 251                 (aleg[j - 1] * exp (-(0.5 * qexpo))) * pow (rinsum, cc1);
 252               elsum += rinsum;
 253             }
 254         }
 255       elsum *= (((2.0 * b) * cc) * M_1_SQRT_2PI);
 256       einsum += elsum;
 257       blb = bub;
 258       bub += binc;
 259     }
 260
 261   /* if pr_w ^ rr < 9e-14, then return 0 */
 262   pr_w = einsum + pr_w;
 263   if (pr_w <= exp (C1 / rr))
 264     return 0.;
 265
 266   pr_w = pow (pr_w, rr);
 267   if (pr_w >= 1.)               /* 1 was iMax was eps */
 268     return 1.;
 269   return pr_w;
 270 }                               /* wprob() */
 271
 272 double
 273 ptukey (double q, double rr, double cc, double df, int lower_tail, int log_p)
 274 {
 275   const double ML_NEGINF = -1.0 / 0.0;
 276 /*  function ptukey() [was qprob() ]:
 277
 278         q = value of studentized range
 279         rr = no. of rows or groups
 280         cc = no. of columns or treatments
 281         df = degrees of freedom of error term
 282         ir[0] = error flag = 1 if wprob probability > 1
 283         ir[1] = error flag = 1 if qprob probability > 1
 284
 285         qprob = returned probability integral over [0, q]
 286
 287         The program will not terminate if ir[0] or ir[1] are raised.
 288
 289         All references in wprob to Abramowitz and Stegun
 290         are from the following reference:
 291
 292         Abramowitz, Milton and Stegun, Irene A.
 293         Handbook of Mathematical Functions.
 294         New York:  Dover publications, Inc. (1970).
 295
 296         All constants taken from this text are
 297         given to 25 significant digits.
 298
 299         nlegq = order of legendre quadrature
 300         ihalfq = int ((nlegq + 1) / 2)
 301         eps = max. allowable value of integral
 302         eps1 & eps2 = values below which there is
 303                       no contribution to integral.
 304
 305         d.f. <= dhaf:   integral is divided into ulen1 length intervals.  else
 306         d.f. <= dquar:  integral is divided into ulen2 length intervals.  else
 307         d.f. <= deigh:  integral is divided into ulen3 length intervals.  else
 308         d.f. <= dlarg:  integral is divided into ulen4 length intervals.
 309
 310         d.f. > dlarg:   the range is used to calculate integral.
 311
 312         M_LN2 = log(2)
 313
 314         xlegq = legendre 16-point nodes
 315         alegq = legendre 16-point coefficients
 316
 317         The coefficients and nodes for the legendre quadrature used in
 318         qprob and wprob were calculated using the algorithms found in:
 319
 320         Stroud, A. H. and Secrest, D.
 321         Gaussian Quadrature Formulas.
 322         Englewood Cliffs,
 323         New Jersey:  Prentice-Hall, Inc, 1966.
 324
 325         All values matched the tables (provided in same reference)
 326         to 30 significant digits.
 327
 328         f(x) = .5 + erf(x / sqrt(2)) / 2      for x > 0
 329
 330         f(x) = erfc( -x / sqrt(2)) / 2        for x < 0
 331
 332         where f(x) is standard normal c. d. f.
 333
 334         if degrees of freedom large, approximate integral
 335         with range distribution.
 336  */
 337 #define nlegq   16
 338 #define ihalfq  8
 339
 340 /*  const double eps = 1.0; not used if = 1 */
 341   static const double eps1 = -30.0;
 342   static const double eps2 = 1.0e-14;
 343   static const double dhaf = 100.0;
 344   static const double dquar = 800.0;
 345   static const double deigh = 5000.0;
 346   static const double dlarg = 25000.0;
 347   static const double ulen1 = 1.0;
 348   static const double ulen2 = 0.5;
 349   static const double ulen3 = 0.25;
 350   static const double ulen4 = 0.125;
 351   static const double xlegq[ihalfq] = {
 352     0.989400934991649932596154173450,
 353     0.944575023073232576077988415535,
 354     0.865631202387831743880467897712,
 355     0.755404408355003033895101194847,
 356     0.617876244402643748446671764049,
 357     0.458016777657227386342419442984,
 358     0.281603550779258913230460501460,
 359     0.950125098376374401853193354250e-1
 360   };
 361   static const double alegq[ihalfq] = {
 362     0.271524594117540948517805724560e-1,
 363     0.622535239386478928628438369944e-1,
 364     0.951585116824927848099251076022e-1,
 365     0.124628971255533872052476282192,
 366     0.149595988816576732081501730547,
 367     0.169156519395002538189312079030,
 368     0.182603415044923588866763667969,
 369     0.189450610455068496285396723208
 370   };
 371   double ans, f2, f21, f2lf, ff4, otsum, qsqz, rotsum, t1, twa1, ulen, wprb;
 372   int i, j, jj;
 373
 374   assert (! (isnan (q) || isnan (rr) || isnan (cc) || isnan (df)));
 375
 376   if (q <= 0)
 377     return R_DT_0;
 378
 379   /* df must be > 1 */
 380   /* there must be at least two values */
 381   assert (! (df < 2 || rr < 1 || cc < 2));
 382
 383   if (!isfinite (q))
 384     return R_DT_1;
 385
 386   if (df > dlarg)
 387     return R_DT_val (wprob (q, rr, cc));
 388
 389   /* calculate leading constant */
 390
 391   f2 = df * 0.5;
 392   /* lgammafn(u) = log(gamma(u)) */
 393   f2lf = ((f2 * log (df)) - (df * M_LN2)) - gsl_sf_lngamma (f2);
 394   f21 = f2 - 1.0;
 395
 396   /* integral is divided into unit, half-unit, quarter-unit, or */
 397   /* eighth-unit length intervals depending on the value of the */
 398   /* degrees of freedom. */
 399
 400   ff4 = df * 0.25;
 401   if (df <= dhaf)
 402     ulen = ulen1;
 403   else if (df <= dquar)
 404     ulen = ulen2;
 405   else if (df <= deigh)
 406     ulen = ulen3;
 407   else
 408     ulen = ulen4;
 409
 410   f2lf += log (ulen);
 411
 412   /* integrate over each subinterval */
 413
 414   ans = 0.0;
 415
 416   for (i = 1; i <= 50; i++)
 417     {
 418       otsum = 0.0;
 419
 420       /* legendre quadrature with order = nlegq */
 421       /* nodes (stored in xlegq) are symmetric around zero. */
 422
 423       twa1 = (2 * i - 1) * ulen;
 424
 425       for (jj = 1; jj <= nlegq; jj++)
 426         {
 427           if (ihalfq < jj)
 428             {
 429               j = jj - ihalfq - 1;
 430               t1 = (f2lf + (f21 * log (twa1 + (xlegq[j] * ulen))))
 431                 - (((xlegq[j] * ulen) + twa1) * ff4);
 432             }
 433           else
 434             {
 435               j = jj - 1;
 436               t1 = (f2lf + (f21 * log (twa1 - (xlegq[j] * ulen))))
 437                 + (((xlegq[j] * ulen) - twa1) * ff4);
 438
 439             }
 440
 441           /* if exp(t1) < 9e-14, then doesn't contribute to integral */
 442           if (t1 >= eps1)
 443             {
 444               if (ihalfq < jj)
 445                 {
 446                   qsqz = q * sqrt (((xlegq[j] * ulen) + twa1) * 0.5);
 447                 }
 448               else
 449                 {
 450                   qsqz = q * sqrt (((-(xlegq[j] * ulen)) + twa1) * 0.5);
 451                 }
 452
 453               /* call wprob to find integral of range portion */
 454
 455               wprb = wprob (qsqz, rr, cc);
 456               rotsum = (wprb * alegq[j]) * exp (t1);
 457               otsum += rotsum;
 458             }
 459           /* end legendre integral for interval i */
 460           /* L200: */
 461         }
 462
 463       /* if integral for interval i < 1e-14, then stop.
 464        * However, in order to avoid small area under left tail,
 465        * at least  1 / ulen  intervals are calculated.
 466        */
 467       if (i * ulen >= 1.0 && otsum <= eps2)
 468         break;
 469
 470       /* end of interval i */
 471       /* L330: */
 472
 473       ans += otsum;
 474     }
 475
 476   assert (otsum <= eps2); /* not converged */
 477
 478   if (ans > 1.)
 479     ans = 1.;
 480   return R_DT_val (ans);
 481 }
 482
 483
 484