lib/gsl-extras/betadistinv.c

   1 /* cdf/betadistinv.c
   2  *
   3  * Copyright (C) 2004 Free Software Foundation, Inc.
   4  * Written by Jason H. Stover.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or (at
   9  * your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  19  */
  20
  21 /*
  22  * Invert the Beta distribution.
  23  *
  24  * References:
  25  *
  26  * Roger W. Abernathy and Robert P. Smith. "Applying Series Expansion
  27  * to the Inverse Beta Distribution to Find Percentiles of the F-Distribution,"
  28  * ACM Transactions on Mathematical Software, volume 19, number 4, December 1993,
  29  * pages 474-480.
  30  *
  31  * G.W. Hill and A.W. Davis. "Generalized asymptotic expansions of a
  32  * Cornish-Fisher type," Annals of Mathematical Statistics, volume 39, number 8,
  33  * August 1968, pages 1264-1273.
  34  */
  35
  36 #include <config.h>
  37 #include <math.h>
  38 #include <gsl/gsl_math.h>
  39 #include <gsl/gsl_errno.h>
  40 #include <gsl/gsl_sf_gamma.h>
  41 #include <gsl/gsl_cdf.h>
  42 #include <gsl/gsl_randist.h>
  43 #include "gsl-extras.h"
  44
  45 #define BETAINV_INIT_ERR .001
  46 #define BETADISTINV_N_TERMS 3
  47 #define BETADISTINV_MAXITER 20
  48
  49 static double
  50 s_bisect (double x, double y)
  51 {
  52   double result = GSL_MIN(x,y) + fabs(x - y) / 2.0;
  53   return result;
  54 }
  55 static double
  56 new_guess_P ( double old_guess, double x, double y,
  57               double prob, double a, double b)
  58 {
  59   double result;
  60   double p_hat;
  61   double end_point;
  62
  63   p_hat = gsl_cdf_beta_P(old_guess, a, b);
  64   if (p_hat < prob)
  65     {
  66       end_point = GSL_MAX(x,y);
  67     }
  68   else if ( p_hat > prob )
  69     {
  70       end_point = GSL_MIN(x,y);
  71     }
  72   else
  73     {
  74       end_point = old_guess;
  75     }
  76   result = s_bisect(old_guess, end_point);
  77
  78   return result;
  79 }
  80
  81 static double
  82 new_guess_Q ( double old_guess, double x, double y,
  83               double prob, double a, double b)
  84 {
  85   double result;
  86   double q_hat;
  87   double end_point;
  88
  89   q_hat = gsl_cdf_beta_Q(old_guess, a, b);
  90   if (q_hat >= prob)
  91     {
  92       end_point = GSL_MAX(x,y);
  93     }
  94   else if ( q_hat < prob )
  95     {
  96       end_point = GSL_MIN(x,y);
  97     }
  98   else
  99     {
 100       end_point = old_guess;
 101     }
 102   result = s_bisect(old_guess, end_point);
 103
 104   return result;
 105 }
 106
 107 /*
 108  * The get_corn_fish_* functions below return the first
 109  * three terms of the Cornish-Fisher expansion
 110  * without recursion. The recursive functions
 111  * make the code more legible when higher order coefficients
 112  * are used, but terms beyond the cubic do not
 113  * improve accuracy.
 114  */
 115   /*
 116    * Linear coefficient for the
 117    * Cornish-Fisher expansion.
 118    */
 119 static double
 120 get_corn_fish_lin (const double x, const double a, const double b)
 121 {
 122   double result;
 123
 124   result = gsl_ran_beta_pdf (x, a, b);
 125   if(result > 0)
 126     {
 127       result = 1.0 / result;
 128     }
 129   else
 130     {
 131       result = GSL_DBL_MAX;
 132     }
 133
 134   return result;
 135 }
 136   /*
 137    * Quadratic coefficient for the
 138    * Cornish-Fisher expansion.
 139    */
 140 static double
 141 get_corn_fish_quad (const double x, const double a, const double b)
 142 {
 143   double result;
 144   double gam_ab;
 145   double gam_a;
 146   double gam_b;
 147   double num;
 148   double den;
 149
 150   gam_ab =  gsl_sf_lngamma(a + b);
 151   gam_a = gsl_sf_lngamma (a);
 152   gam_b = gsl_sf_lngamma (b);
 153   num = exp(2 * (gam_a + gam_b - gam_ab)) * (1 - a + x * (b + a - 2));
 154   den = 2.0 * pow ( x, 2*a - 1 ) * pow ( 1 - x, 2 * b - 1 );
 155   if ( fabs(den) > 0.0)
 156     {
 157       result = num / den;
 158     }
 159   else
 160     {
 161       result = 0.0;
 162     }
 163
 164   return result;
 165 }
 166 /*
 167  * The cubic term for the Cornish-Fisher expansion.
 168  * Theoretically, use of this term should give a better approximation,
 169  * but in practice inclusion of the cubic term worsens the
 170  * iterative procedure in gsl_cdf_beta_Pinv and gsl_cdf_beta_Qinv
 171  * for extreme values of p, a or b.
 172  */
 173 #if 0
 174 static double
 175 get_corn_fish_cube (const double x, const double a, const double b)
 176 {
 177   double result;
 178   double am1 = a - 1.0;
 179   double am2 = a - 2.0;
 180   double apbm2 = a+b-2.0;
 181   double apbm3 = a+b-3.0;
 182   double apbm4 = a+b-4.0;
 183   double ab1ab2 = am1 * am2;
 184   double tmp;
 185   double num;
 186   double den;
 187
 188   num =  (am1 - x * apbm2) * (am1 - x * apbm2);
 189   tmp = ab1ab2 - x * (apbm2 * ( ab1ab2 * apbm4 + 1) + x * apbm2 * apbm3);
 190   num += tmp;
 191   tmp = gsl_ran_beta_pdf(x,a,b);
 192   den = 2.0 * x * x * (1 - x) * (1 - x) * pow(tmp,3.0);
 193   if ( fabs(den) > 0.0)
 194     {
 195       result = num / den;
 196     }
 197   else
 198     {
 199       result = 0.0;
 200     }
 201
 202   return result;
 203 }
 204 #endif
 205 /*
 206  * The Cornish-Fisher coefficients can be defined recursivley,
 207  * starting with the nth derivative of s_psi = -f'(x)/f(x),
 208  * where f is the beta density.
 209  *
 210  * The section below was commented out since
 211  * the recursive generation of the coeficients did
 212  * not improve the accuracy of the directly coded
 213  * the first three coefficients.
 214  */
 215 #if 0
 216 static double
 217 s_d_psi (double x, double a, double b, int n)
 218 {
 219   double result;
 220   double np1 = (double) n + 1;
 221   double asgn;
 222   double bsgn;
 223   double bm1 = b - 1.0;
 224   double am1 = a - 1.0;
 225   double mx = 1.0 - x;
 226
 227   asgn = (n % 2) ? 1.0:-1.0;
 228   bsgn = (n % 2) ? -1.0:1.0;
 229   result = gsl_sf_gamma(np1) * ((bsgn * bm1 / (pow(mx, np1)))
 230                                 + (asgn * am1 / (pow(x,np1))));
 231   return result;
 232 }
 233 /*
 234  * nth derivative of a coefficient with respect
 235  * to x.
 236  */
 237 static double
 238 get_d_coeff ( double x, double a,
 239               double b, double n, double k)
 240 {
 241   double result;
 242   double d_psi;
 243   double k_fac;
 244   double i_fac;
 245   double kmi_fac;
 246   double i;
 247
 248   if (n == 2)
 249     {
 250       result = s_d_psi(x, a, b, k);
 251     }
 252   else
 253     {
 254       result = 0.0;
 255       for (i = 0.0; i < (k+1); i++)
 256         {
 257           k_fac = gsl_sf_lngamma(k+1.0);
 258           i_fac = gsl_sf_lngamma(i+1.0);
 259           kmi_fac = gsl_sf_lngamma(k-i+1.0);
 260
 261           result += exp(k_fac - i_fac - kmi_fac)
 262             * get_d_coeff( x, a, b, 2.0, i)
 263             * get_d_coeff( x, a, b, (n - 1.0), (k - i));
 264         }
 265       result += get_d_coeff ( x, a, b, (n-1.0), (k+1.0));
 266     }
 267
 268   return result;
 269 }
 270 /*
 271  * Cornish-Fisher coefficient.
 272  */
 273 static double
 274 get_corn_fish (double c, double x,
 275                double a, double b, double n)
 276 {
 277   double result;
 278   double dc;
 279   double c_prev;
 280
 281   if(n == 1.0)
 282     {
 283       result = 1;
 284     }
 285   else if (n==2.0)
 286     {
 287       result = s_d_psi(x, a, b, 0);
 288     }
 289   else
 290     {
 291       dc = get_d_coeff(x, a, b, (n-1.0), 1.0);
 292       c_prev = get_corn_fish(c, x, a, b, (n-1.0));
 293       result = (n-1) * s_d_psi(x,a,b,0) * c_prev + dc;
 294     }
 295   return result;
 296 }
 297 #endif
 298
 299 double
 300 gslextras_cdf_beta_Pinv ( const double p, const double a, const double b)
 301 {
 302   double result;
 303   double state;
 304   double beta_result;
 305   double lower = 0.0;
 306   double upper = 1.0;
 307   double c1;
 308   double c2;
 309 #if 0
 310   double c3;
 311 #endif
 312   double frac1;
 313   double frac2;
 314   double frac3;
 315   double frac4;
 316   double p0;
 317   double p1;
 318   double p2;
 319   double tmp;
 320   double err;
 321   double abserr;
 322   double relerr;
 323   double min_err;
 324   int n_iter = 0;
 325
 326   if ( p < 0.0 )
 327     {
 328       GSLEXTRAS_CDF_ERROR("p < 0", GSL_EDOM);
 329     }
 330   if ( p > 1.0 )
 331     {
 332       GSLEXTRAS_CDF_ERROR("p > 1",GSL_EDOM);
 333     }
 334   if ( a < 0.0 )
 335     {
 336       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 337     }
 338   if ( b < 0.0 )
 339     {
 340       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 341     }
 342   if ( p == 0.0 )
 343     {
 344       return 0.0;
 345     }
 346   if ( p == 1.0 )
 347     {
 348       return 1.0;
 349     }
 350
 351   if (p > (1.0 - GSL_DBL_EPSILON))
 352     {
 353       /*
 354        * When p is close to 1.0, the bisection
 355        * works better with gsl_cdf_Q.
 356        */
 357       state = gslextras_cdf_beta_Qinv ( p, a, b);
 358       result = 1.0 - state;
 359       return result;
 360     }
 361   if (p < GSL_DBL_EPSILON )
 362     {
 363       /*
 364        * Start at a small value and rise until
 365        * we are above the correct result. This
 366        * avoids overflow. When p is very close to
 367        * 0, an initial state value of a/(a+b) will
 368        * cause the interpolating polynomial
 369        * to overflow.
 370        */
 371       upper = GSL_DBL_MIN;
 372       beta_result = gsl_cdf_beta_P (upper, a, b);
 373       while (beta_result < p)
 374         {
 375           lower = upper;
 376           upper *= 4.0;
 377           beta_result = gsl_cdf_beta_P (upper, a, b);
 378         }
 379       state = (lower + upper) / 2.0;
 380     }
 381   else
 382     {
 383       /*
 384        * First guess is the expected value.
 385        */
 386       lower = 0.0;
 387       upper = 1.0;
 388       state = a/(a+b);
 389       beta_result = gsl_cdf_beta_P (state, a, b);
 390     }
 391   err = beta_result - p;
 392   abserr = fabs(err);
 393   relerr = abserr / p;
 394   while ( relerr > BETAINV_INIT_ERR)
 395     {
 396       tmp = new_guess_P ( state, lower, upper,
 397                           p, a, b);
 398       lower = ( tmp < state ) ? lower:state;
 399       upper = ( tmp < state ) ? state:upper;
 400       state = tmp;
 401       beta_result = gsl_cdf_beta_P (state, a, b);
 402       err = p - beta_result;
 403       abserr = fabs(err);
 404       relerr = abserr / p;
 405     }
 406
 407   result = state;
 408   min_err = relerr;
 409   /*
 410    * Use a second order Lagrange interpolating
 411    * polynomial to get closer before switching to
 412    * the iterative method.
 413    */
 414   p0 = gsl_cdf_beta_P (lower, a, b);
 415   p1 = gsl_cdf_beta_P (state, a, b);
 416   p2 = gsl_cdf_beta_P (upper, a, b);
 417   if( p0 < p1 && p1 < p2)
 418     {
 419       frac1 = (p - p2) / (p0 - p1);
 420       frac2 = (p - p1) / (p0 - p2);
 421       frac3 = (p - p0) / (p1 - p2);
 422       frac4 = (p - p0) * (p - p1) / ((p2 - p0) * (p2 - p1));
 423       state = frac1 * (frac2 * lower - frac3 * state)
 424         + frac4 * upper;
 425
 426       beta_result = gsl_cdf_beta_P( state, a, b);
 427       err = p - beta_result;
 428       abserr = fabs(err);
 429       relerr = abserr / p;
 430       if (relerr < min_err)
 431         {
 432           result = state;
 433           min_err = relerr;
 434         }
 435       else
 436         {
 437           /*
 438            * Lagrange polynomial failed to reduce the error.
 439            * This will happen with a very skewed beta density.
 440            * Undo previous steps.
 441            */
 442           state = result;
 443           beta_result = gsl_cdf_beta_P(state,a,b);
 444           err = p - beta_result;
 445           abserr = fabs(err);
 446           relerr = abserr / p;
 447         }
 448     }
 449
 450   n_iter = 0;
 451
 452   /*
 453    * Newton-type iteration using the terms from the
 454    * Cornish-Fisher expansion. If only the first term
 455    * of the expansion is used, this is Newton's method.
 456    */
 457   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 458     {
 459       n_iter++;
 460       c1 = get_corn_fish_lin (state, a, b);
 461       c2 = get_corn_fish_quad (state, a, b);
 462       /*
 463        * The cubic term does not help, and can can
 464        * harm the approximation for extreme values of
 465        * p, a, or b.
 466        */
 467 #if 0
 468       c3 = get_corn_fish_cube (state, a, b);
 469       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 470 #endif
 471       state += err * (c1 + (c2 * err / 2.0 ));
 472       /*
 473        * The section below which is commented out uses
 474        * a recursive function to get the coefficients.
 475        * The recursion makes coding higher-order terms
 476        * easier, but did not improve the result beyond
 477        * the use of three terms. Since explicitly coding
 478        * those three terms in the get_corn_fish_* functions
 479        * was not difficult, the recursion was abandoned.
 480        */
 481 #if 0
 482       coeff = 1.0;
 483       for(i = 1.0; i < BETADISTINV_N_TERMS; i += 1.0)
 484         {
 485           i_fac *= i;
 486           coeff = get_corn_fish (coeff, prior_state, a, b, i);
 487           state += coeff * pow(err, i) /
 488             (i_fac * pow (gsl_ran_beta_pdf(prior_state,a,b), i));
 489         }
 490 #endif
 491       beta_result = gsl_cdf_beta_P ( state, a, b );
 492       err = p - beta_result;
 493       abserr = fabs(err);
 494       relerr = abserr / p;
 495       if (relerr < min_err)
 496         {
 497           result = state;
 498           min_err = relerr;
 499         }
 500     }
 501
 502   return result;
 503 }
 504
 505 double
 506 gslextras_cdf_beta_Qinv (double q, double a, double b)
 507 {
 508   double result;
 509   double state;
 510   double beta_result;
 511   double lower = 0.0;
 512   double upper = 1.0;
 513   double c1;
 514   double c2;
 515 #if 0
 516   double c3;
 517 #endif
 518   double p0;
 519   double p1;
 520   double p2;
 521   double frac1;
 522   double frac2;
 523   double frac3;
 524   double frac4;
 525   double tmp;
 526   double err;
 527   double abserr;
 528   double relerr;
 529   double min_err;
 530   int n_iter = 0;
 531
 532   if ( q < 0.0 )
 533     {
 534       GSLEXTRAS_CDF_ERROR("q < 0", GSL_EDOM);
 535     }
 536   if ( q > 1.0 )
 537     {
 538       GSLEXTRAS_CDF_ERROR("q > 1",GSL_EDOM);
 539     }
 540   if ( a < 0.0 )
 541     {
 542       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 543     }
 544   if ( b < 0.0 )
 545     {
 546       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 547     }
 548   if ( q == 0.0 )
 549     {
 550       return 1.0;
 551     }
 552   if ( q == 1.0 )
 553     {
 554       return 0.0;
 555     }
 556
 557   if ( q < GSL_DBL_EPSILON )
 558     {
 559       /*
 560        * When q is close to 0, the bisection
 561        * and interpolation done in the rest of
 562        * this routine will not give the correct
 563        * value within double precision, so
 564        * gsl_cdf_beta_Qinv is called instead.
 565        */
 566       state = gslextras_cdf_beta_Pinv ( q, a, b);
 567       result = 1.0 - state;
 568       return result;
 569     }
 570   if ( q > 1.0 - GSL_DBL_EPSILON )
 571     {
 572       /*
 573        * Make the initial guess close to 0.0.
 574        */
 575       upper = GSL_DBL_MIN;
 576       beta_result = gsl_cdf_beta_Q ( upper, a, b);
 577       while (beta_result > q )
 578         {
 579           lower = upper;
 580           upper *= 4.0;
 581           beta_result = gsl_cdf_beta_Q ( upper, a, b);
 582         }
 583       state = (upper + lower) / 2.0;
 584     }
 585   else
 586     {
 587       /* Bisection to get an initial approximation.
 588        * First guess is the expected value.
 589        */
 590       state = a/(a+b);
 591       lower = 0.0;
 592       upper = 1.0;
 593     }
 594   beta_result = gsl_cdf_beta_Q (state, a, b);
 595   err = beta_result - q;
 596   abserr = fabs(err);
 597   relerr = abserr / q;
 598   while ( relerr > BETAINV_INIT_ERR)
 599     {
 600       n_iter++;
 601       tmp = new_guess_Q ( state, lower, upper,
 602                           q, a, b);
 603       lower = ( tmp < state ) ? lower:state;
 604       upper = ( tmp < state ) ? state:upper;
 605       state = tmp;
 606       beta_result = gsl_cdf_beta_Q (state, a, b);
 607       err = q - beta_result;
 608       abserr = fabs(err);
 609       relerr = abserr / q;
 610     }
 611   result = state;
 612   min_err = relerr;
 613
 614   /*
 615    * Use a second order Lagrange interpolating
 616    * polynomial to get closer before switching to
 617    * the iterative method.
 618    */
 619   p0 = gsl_cdf_beta_Q (lower, a, b);
 620   p1 = gsl_cdf_beta_Q (state, a, b);
 621   p2 = gsl_cdf_beta_Q (upper, a, b);
 622   if(p0 > p1 && p1 > p2)
 623     {
 624       frac1 = (q - p2) / (p0 - p1);
 625       frac2 = (q - p1) / (p0 - p2);
 626       frac3 = (q - p0) / (p1 - p2);
 627       frac4 = (q - p0) * (q - p1) / ((p2 - p0) * (p2 - p1));
 628       state = frac1 * (frac2 * lower - frac3 * state)
 629         + frac4 * upper;
 630       beta_result = gsl_cdf_beta_Q( state, a, b);
 631       err = beta_result - q;
 632       abserr = fabs(err);
 633       relerr = abserr / q;
 634       if (relerr < min_err)
 635         {
 636           result = state;
 637           min_err = relerr;
 638         }
 639       else
 640         {
 641           /*
 642            * Lagrange polynomial failed to reduce the error.
 643            * This will happen with a very skewed beta density.
 644            * Undo previous steps.
 645            */
 646           state = result;
 647           beta_result = gsl_cdf_beta_P(state,a,b);
 648           err = q - beta_result;
 649           abserr = fabs(err);
 650           relerr = abserr / q;
 651         }
 652     }
 653
 654   /*
 655    * Iteration using the terms from the
 656    * Cornish-Fisher expansion. If only the first term
 657    * of the expansion is used, this is Newton's method.
 658    */
 659
 660   n_iter = 0;
 661   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 662     {
 663       n_iter++;
 664       c1 = get_corn_fish_lin (state, a, b);
 665       c2 = get_corn_fish_quad (state, a, b);
 666       /*
 667        * The cubic term does not help, and can harm
 668        * the approximation for extreme values of p, a and b.
 669        */
 670 #if 0
 671       c3 = get_corn_fish_cube (state, a, b);
 672       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 673 #endif
 674       state += err * (c1 + (c2 * err / 2.0 ));
 675       beta_result = gsl_cdf_beta_Q ( state, a, b );
 676       err = beta_result - q;
 677       abserr = fabs(err);
 678       relerr = abserr / q;
 679       if (relerr < min_err)
 680         {
 681           result = state;
 682           min_err = relerr;
 683         }
 684     }
 685
 686   return result;
 687 }