lib/gsl-extras/betadistinv.c

   1 /* cdf/betadistinv.c
   2  *
   3  * Copyright (C) 2004 Jason H. Stover.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or (at
   8  * your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
  18  */
  19
  20 /*
  21  * Invert the Beta distribution.
  22  *
  23  * References:
  24  *
  25  * Roger W. Abernathy and Robert P. Smith. "Applying Series Expansion
  26  * to the Inverse Beta Distribution to Find Percentiles of the F-Distribution,"
  27  * ACM Transactions on Mathematical Software, volume 19, number 4, December 1993,
  28  * pages 474-480.
  29  *
  30  * G.W. Hill and A.W. Davis. "Generalized asymptotic expansions of a
  31  * Cornish-Fisher type," Annals of Mathematical Statistics, volume 39, number 8,
  32  * August 1968, pages 1264-1273.
  33  */
  34 #include <config.h>
  35 #include <math.h>
  36 #include <gsl/gsl_math.h>
  37 #include <gsl/gsl_errno.h>
  38 #include <gsl/gsl_sf_gamma.h>
  39 #include <gsl/gsl_cdf.h>
  40 #include <gsl/gsl_randist.h>
  41 #include "gsl-extras.h"
  42
  43 #define BETAINV_INIT_ERR .01
  44 #define BETADISTINV_N_TERMS 3
  45 #define BETADISTINV_MAXITER 20
  46
  47 static double
  48 s_bisect (double x, double y)
  49 {
  50   double result = GSL_MIN(x,y) + fabs(x - y) / 2.0;
  51   return result;
  52 }
  53 static double
  54 new_guess_P ( double old_guess, double x, double y,
  55               double prob, double a, double b)
  56 {
  57   double result;
  58   double p_hat;
  59   double end_point;
  60
  61   p_hat = gsl_cdf_beta_P(old_guess, a, b);
  62   if (p_hat < prob)
  63     {
  64       end_point = GSL_MAX(x,y);
  65     }
  66   else if ( p_hat > prob )
  67     {
  68       end_point = GSL_MIN(x,y);
  69     }
  70   else
  71     {
  72       end_point = old_guess;
  73     }
  74   result = s_bisect(old_guess, end_point);
  75
  76   return result;
  77 }
  78
  79 static double
  80 new_guess_Q ( double old_guess, double x, double y,
  81               double prob, double a, double b)
  82 {
  83   double result;
  84   double q_hat;
  85   double end_point;
  86
  87   q_hat = gsl_cdf_beta_Q(old_guess, a, b);
  88   if (q_hat >= prob)
  89     {
  90       end_point = GSL_MAX(x,y);
  91     }
  92   else if ( q_hat < prob )
  93     {
  94       end_point = GSL_MIN(x,y);
  95     }
  96   else
  97     {
  98       end_point = old_guess;
  99     }
 100   result = s_bisect(old_guess, end_point);
 101
 102   return result;
 103 }
 104
 105 /*
 106  * The get_corn_fish_* functions below return the first
 107  * three terms of the Cornish-Fisher expansion
 108  * without recursion. The recursive functions
 109  * make the code more legible when higher order coefficients
 110  * are used, but terms beyond the cubic do not
 111  * improve accuracy.
 112  */
 113   /*
 114    * Linear coefficient for the
 115    * Cornish-Fisher expansion.
 116    */
 117 static double
 118 get_corn_fish_lin (const double x, const double a, const double b)
 119 {
 120   double result;
 121
 122   result = gsl_ran_beta_pdf (x, a, b);
 123   if(result > 0)
 124     {
 125       result = 1.0 / result;
 126     }
 127   else
 128     {
 129       result = GSL_DBL_MAX;
 130     }
 131
 132   return result;
 133 }
 134   /*
 135    * Quadratic coefficient for the
 136    * Cornish-Fisher expansion.
 137    */
 138 static double
 139 get_corn_fish_quad (const double x, const double a, const double b)
 140 {
 141   double result;
 142   double gam_ab;
 143   double gam_a;
 144   double gam_b;
 145   double num;
 146   double den;
 147
 148   gam_ab =  gsl_sf_lngamma(a + b);
 149   gam_a = gsl_sf_lngamma (a);
 150   gam_b = gsl_sf_lngamma (b);
 151   num = exp(2 * (gam_a + gam_b - gam_ab)) * (1 - a + x * (b + a - 2));
 152   den = 2.0 * pow ( x, 2*a - 1 ) * pow ( 1 - x, 2 * b - 1 );
 153   if ( fabs(den) > 0.0)
 154     {
 155       result = num / den;
 156     }
 157   else
 158     {
 159       result = 0.0;
 160     }
 161
 162   return result;
 163 }
 164 /*
 165  * The cubic term for the Cornish-Fisher expansion.
 166  * Theoretically, use of this term should give a better approximation,
 167  * but in practice inclusion of the cubic term worsens the
 168  * iterative procedure in gsl_cdf_beta_Pinv and gsl_cdf_beta_Qinv
 169  * for extreme values of p, a or b.
 170  */
 171 #if 0
 172 static double
 173 get_corn_fish_cube (const double x, const double a, const double b)
 174 {
 175   double result;
 176   double am1 = a - 1.0;
 177   double am2 = a - 2.0;
 178   double apbm2 = a+b-2.0;
 179   double apbm3 = a+b-3.0;
 180   double apbm4 = a+b-4.0;
 181   double ab1ab2 = am1 * am2;
 182   double tmp;
 183   double num;
 184   double den;
 185
 186   num =  (am1 - x * apbm2) * (am1 - x * apbm2);
 187   tmp = ab1ab2 - x * (apbm2 * ( ab1ab2 * apbm4 + 1) + x * apbm2 * apbm3);
 188   num += tmp;
 189   tmp = gsl_ran_beta_pdf(x,a,b);
 190   den = 2.0 * x * x * (1 - x) * (1 - x) * pow(tmp,3.0);
 191   if ( fabs(den) > 0.0)
 192     {
 193       result = num / den;
 194     }
 195   else
 196     {
 197       result = 0.0;
 198     }
 199
 200   return result;
 201 }
 202 #endif
 203 /*
 204  * The Cornish-Fisher coefficients can be defined recursivley,
 205  * starting with the nth derivative of s_psi = -f'(x)/f(x),
 206  * where f is the beta density.
 207  *
 208  * The section below was commented out since
 209  * the recursive generation of the coeficients did
 210  * not improve the accuracy of the directly coded
 211  * the first three coefficients.
 212  */
 213 #if 0
 214 static double
 215 s_d_psi (double x, double a, double b, int n)
 216 {
 217   double result;
 218   double np1 = (double) n + 1;
 219   double asgn;
 220   double bsgn;
 221   double bm1 = b - 1.0;
 222   double am1 = a - 1.0;
 223   double mx = 1.0 - x;
 224
 225   asgn = (n % 2) ? 1.0:-1.0;
 226   bsgn = (n % 2) ? -1.0:1.0;
 227   result = gsl_sf_gamma(np1) * ((bsgn * bm1 / (pow(mx, np1)))
 228                                 + (asgn * am1 / (pow(x,np1))));
 229   return result;
 230 }
 231 /*
 232  * nth derivative of a coefficient with respect
 233  * to x.
 234  */
 235 static double
 236 get_d_coeff ( double x, double a,
 237               double b, double n, double k)
 238 {
 239   double result;
 240   double d_psi;
 241   double k_fac;
 242   double i_fac;
 243   double kmi_fac;
 244   double i;
 245
 246   if (n == 2)
 247     {
 248       result = s_d_psi(x, a, b, k);
 249     }
 250   else
 251     {
 252       result = 0.0;
 253       for (i = 0.0; i < (k+1); i++)
 254         {
 255           k_fac = gsl_sf_lngamma(k+1.0);
 256           i_fac = gsl_sf_lngamma(i+1.0);
 257           kmi_fac = gsl_sf_lngamma(k-i+1.0);
 258
 259           result += exp(k_fac - i_fac - kmi_fac)
 260             * get_d_coeff( x, a, b, 2.0, i)
 261             * get_d_coeff( x, a, b, (n - 1.0), (k - i));
 262         }
 263       result += get_d_coeff ( x, a, b, (n-1.0), (k+1.0));
 264     }
 265
 266   return result;
 267 }
 268 /*
 269  * Cornish-Fisher coefficient.
 270  */
 271 static double
 272 get_corn_fish (double c, double x,
 273                double a, double b, double n)
 274 {
 275   double result;
 276   double dc;
 277   double c_prev;
 278
 279   if(n == 1.0)
 280     {
 281       result = 1;
 282     }
 283   else if (n==2.0)
 284     {
 285       result = s_d_psi(x, a, b, 0);
 286     }
 287   else
 288     {
 289       dc = get_d_coeff(x, a, b, (n-1.0), 1.0);
 290       c_prev = get_corn_fish(c, x, a, b, (n-1.0));
 291       result = (n-1) * s_d_psi(x,a,b,0) * c_prev + dc;
 292     }
 293   return result;
 294 }
 295 #endif
 296
 297 double
 298 gslextras_cdf_beta_Pinv ( const double p, const double a, const double b)
 299 {
 300   double result;
 301   double state;
 302   double beta_result;
 303   double lower = 0.0;
 304   double upper = 1.0;
 305   double c1;
 306   double c2;
 307 #if 0
 308   double c3;
 309 #endif
 310   double frac1;
 311   double frac2;
 312   double frac3;
 313   double frac4;
 314   double p0;
 315   double p1;
 316   double p2;
 317   double tmp;
 318   double err;
 319   double abserr;
 320   double relerr;
 321   double min_err;
 322   int n_iter = 0;
 323
 324   if ( p < 0.0 )
 325     {
 326       GSLEXTRAS_CDF_ERROR("p < 0", GSL_EDOM);
 327     }
 328   if ( p > 1.0 )
 329     {
 330       GSLEXTRAS_CDF_ERROR("p > 1",GSL_EDOM);
 331     }
 332   if ( a < 0.0 )
 333     {
 334       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 335     }
 336   if ( b < 0.0 )
 337     {
 338       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 339     }
 340   if ( p == 0.0 )
 341     {
 342       return 0.0;
 343     }
 344   if ( p == 1.0 )
 345     {
 346       return 1.0;
 347     }
 348
 349   if (p > (1.0 - GSL_DBL_EPSILON))
 350     {
 351       /*
 352        * When p is close to 1.0, the bisection
 353        * works better with gsl_cdf_Q.
 354        */
 355       state = gslextras_cdf_beta_Qinv ( p, a, b);
 356       result = 1.0 - state;
 357       return result;
 358     }
 359   if (p < GSL_DBL_EPSILON )
 360     {
 361       /*
 362        * Start at a small value and rise until
 363        * we are above the correct result. This
 364        * avoids overflow. When p is very close to
 365        * 0, an initial state value of a/(a+b) will
 366        * cause the interpolating polynomial
 367        * to overflow.
 368        */
 369       upper = GSL_DBL_MIN;
 370       beta_result = gsl_cdf_beta_P (upper, a, b);
 371       while (beta_result < p)
 372         {
 373           lower = upper;
 374           upper *= 4.0;
 375           beta_result = gsl_cdf_beta_P (upper, a, b);
 376         }
 377       state = (lower + upper) / 2.0;
 378     }
 379   else
 380     {
 381       /*
 382        * First guess is the expected value.
 383        */
 384       lower = 0.0;
 385       upper = 1.0;
 386       state = a/(a+b);
 387       beta_result = gsl_cdf_beta_P (state, a, b);
 388     }
 389   err = beta_result - p;
 390   abserr = fabs(err);
 391   relerr = abserr / p;
 392   while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
 393     {
 394       tmp = new_guess_P ( state, lower, upper,
 395                           p, a, b);
 396       lower = ( tmp < state ) ? lower:state;
 397       upper = ( tmp < state ) ? state:upper;
 398       state = tmp;
 399       beta_result = gsl_cdf_beta_P (state, a, b);
 400       err = p - beta_result;
 401       abserr = fabs(err);
 402       relerr = abserr / p;
 403     }
 404
 405   result = state;
 406   min_err = relerr;
 407   /*
 408    * Use a second order Lagrange interpolating
 409    * polynomial to get closer before switching to
 410    * the iterative method.
 411    */
 412   p0 = gsl_cdf_beta_P (lower, a, b);
 413   p1 = gsl_cdf_beta_P (state, a, b);
 414   p2 = gsl_cdf_beta_P (upper, a, b);
 415   if( p0 < p1 && p1 < p2)
 416     {
 417       frac1 = (p - p2) / (p0 - p1);
 418       frac2 = (p - p1) / (p0 - p2);
 419       frac3 = (p - p0) / (p1 - p2);
 420       frac4 = (p - p0) * (p - p1) / ((p2 - p0) * (p2 - p1));
 421       state = frac1 * (frac2 * lower - frac3 * state)
 422         + frac4 * upper;
 423
 424       beta_result = gsl_cdf_beta_P( state, a, b);
 425       err = p - beta_result;
 426       abserr = fabs(err);
 427       relerr = abserr / p;
 428       if (relerr < min_err)
 429         {
 430           result = state;
 431           min_err = relerr;
 432         }
 433     }
 434
 435   n_iter = 0;
 436
 437   /*
 438    * Newton-type iteration using the terms from the
 439    * Cornish-Fisher expansion. If only the first term
 440    * of the expansion is used, this is Newton's method.
 441    */
 442   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 443     {
 444       n_iter++;
 445       c1 = get_corn_fish_lin (state, a, b);
 446       c2 = get_corn_fish_quad (state, a, b);
 447       /*
 448        * The cubic term does not help, and can can
 449        * harm the approximation for extreme values of
 450        * p, a, or b.
 451        */
 452 #if 0
 453       c3 = get_corn_fish_cube (state, a, b);
 454       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 455 #endif
 456       state += err * (c1 + (c2 * err / 2.0 ));
 457       /*
 458        * The section below which is commented out uses
 459        * a recursive function to get the coefficients.
 460        * The recursion makes coding higher-order terms
 461        * easier, but did not improve the result beyond
 462        * the use of three terms. Since explicitly coding
 463        * those three terms in the get_corn_fish_* functions
 464        * was not difficult, the recursion was abandoned.
 465        */
 466 #if 0
 467       coeff = 1.0;
 468       for(i = 1.0; i < BETADISTINV_N_TERMS; i += 1.0)
 469         {
 470           i_fac *= i;
 471           coeff = get_corn_fish (coeff, prior_state, a, b, i);
 472           state += coeff * pow(err, i) /
 473             (i_fac * pow (gsl_ran_beta_pdf(prior_state,a,b), i));
 474         }
 475 #endif
 476       beta_result = gsl_cdf_beta_P ( state, a, b );
 477       err = p - beta_result;
 478       abserr = fabs(err);
 479       relerr = abserr / p;
 480       if (relerr < min_err)
 481         {
 482           result = state;
 483           min_err = relerr;
 484         }
 485     }
 486
 487   return result;
 488 }
 489
 490 double
 491 gslextras_cdf_beta_Qinv (double q, double a, double b)
 492 {
 493   double result;
 494   double state;
 495   double beta_result;
 496   double lower = 0.0;
 497   double upper = 1.0;
 498   double c1;
 499   double c2;
 500 #if 0
 501   double c3;
 502 #endif
 503   double p0;
 504   double p1;
 505   double p2;
 506   double frac1;
 507   double frac2;
 508   double frac3;
 509   double frac4;
 510   double tmp;
 511   double err;
 512   double abserr;
 513   double relerr;
 514   double min_err;
 515   int n_iter = 0;
 516
 517   if ( q < 0.0 )
 518     {
 519       GSLEXTRAS_CDF_ERROR("q < 0", GSL_EDOM);
 520     }
 521   if ( q > 1.0 )
 522     {
 523       GSLEXTRAS_CDF_ERROR("q > 1",GSL_EDOM);
 524     }
 525   if ( a < 0.0 )
 526     {
 527       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 528     }
 529   if ( b < 0.0 )
 530     {
 531       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 532     }
 533   if ( q == 0.0 )
 534     {
 535       return 1.0;
 536     }
 537   if ( q == 1.0 )
 538     {
 539       return 0.0;
 540     }
 541
 542   if ( q < GSL_DBL_EPSILON )
 543     {
 544       /*
 545        * When q is close to 0, the bisection
 546        * and interpolation done in the rest of
 547        * this routine will not give the correct
 548        * value within double precision, so
 549        * gsl_cdf_beta_Qinv is called instead.
 550        */
 551       state = gslextras_cdf_beta_Pinv ( q, a, b);
 552       result = 1.0 - state;
 553       return result;
 554     }
 555   if ( q > 1.0 - GSL_DBL_EPSILON )
 556     {
 557       /*
 558        * Make the initial guess close to 0.0.
 559        */
 560       upper = GSL_DBL_MIN;
 561       beta_result = gsl_cdf_beta_Q ( upper, a, b);
 562       while (beta_result > q )
 563         {
 564           lower = upper;
 565           upper *= 4.0;
 566           beta_result = gsl_cdf_beta_Q ( upper, a, b);
 567         }
 568       state = (upper + lower) / 2.0;
 569     }
 570   else
 571     {
 572       /* Bisection to get an initial approximation.
 573        * First guess is the expected value.
 574        */
 575       state = a/(a+b);
 576       lower = 0.0;
 577       upper = 1.0;
 578     }
 579   beta_result = gsl_cdf_beta_Q (state, a, b);
 580   err = beta_result - q;
 581   abserr = fabs(err);
 582   relerr = abserr / q;
 583   while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
 584     {
 585       n_iter++;
 586       tmp = new_guess_Q ( state, lower, upper,
 587                           q, a, b);
 588       lower = ( tmp < state ) ? lower:state;
 589       upper = ( tmp < state ) ? state:upper;
 590       state = tmp;
 591       beta_result = gsl_cdf_beta_Q (state, a, b);
 592       err = q - beta_result;
 593       abserr = fabs(err);
 594       relerr = abserr / q;
 595     }
 596   result = state;
 597   min_err = relerr;
 598
 599   /*
 600    * Use a second order Lagrange interpolating
 601    * polynomial to get closer before switching to
 602    * the iterative method.
 603    */
 604   p0 = gsl_cdf_beta_Q (lower, a, b);
 605   p1 = gsl_cdf_beta_Q (state, a, b);
 606   p2 = gsl_cdf_beta_Q (upper, a, b);
 607   if(p0 > p1 && p1 > p2)
 608     {
 609       frac1 = (q - p2) / (p0 - p1);
 610       frac2 = (q - p1) / (p0 - p2);
 611       frac3 = (q - p0) / (p1 - p2);
 612       frac4 = (q - p0) * (q - p1) / ((p2 - p0) * (p2 - p1));
 613       state = frac1 * (frac2 * lower - frac3 * state)
 614         + frac4 * upper;
 615       beta_result = gsl_cdf_beta_Q( state, a, b);
 616       err = beta_result - q;
 617       abserr = fabs(err);
 618       relerr = abserr / q;
 619       if (relerr < min_err)
 620         {
 621           result = state;
 622           min_err = relerr;
 623         }
 624     }
 625
 626   /*
 627    * Iteration using the terms from the
 628    * Cornish-Fisher expansion. If only the first term
 629    * of the expansion is used, this is Newton's method.
 630    */
 631
 632   n_iter = 0;
 633   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 634     {
 635       n_iter++;
 636       c1 = get_corn_fish_lin (state, a, b);
 637       c2 = get_corn_fish_quad (state, a, b);
 638       /*
 639        * The cubic term does not help, and can harm
 640        * the approximation for extreme values of p, a and b.
 641        */
 642 #if 0
 643       c3 = get_corn_fish_cube (state, a, b);
 644       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 645 #endif
 646       state += err * (c1 + (c2 * err / 2.0 ));
 647       beta_result = gsl_cdf_beta_Q ( state, a, b );
 648       err = beta_result - q;
 649       abserr = fabs(err);
 650       relerr = abserr / q;
 651       if (relerr < min_err)
 652         {
 653           result = state;
 654           min_err = relerr;
 655         }
 656     }
 657
 658   return result;
 659 }