lib/gsl-extras/betadistinv.c

   1 /* cdf/betadistinv.c
   2  *
   3  * Copyright (C) 2004 Free Software Foundation, Inc.
   4  *
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or (at
   9  * your option) any later version.
  10  *
  11  * This program is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  19  */
  20
  21 /*
  22  * Invert the Beta distribution.
  23  *
  24  * References:
  25  *
  26  * Roger W. Abernathy and Robert P. Smith. "Applying Series Expansion
  27  * to the Inverse Beta Distribution to Find Percentiles of the F-Distribution,"
  28  * ACM Transactions on Mathematical Software, volume 19, number 4, December 1993,
  29  * pages 474-480.
  30  *
  31  * G.W. Hill and A.W. Davis. "Generalized asymptotic expansions of a
  32  * Cornish-Fisher type," Annals of Mathematical Statistics, volume 39, number 8,
  33  * August 1968, pages 1264-1273.
  34  */
  35
  36 #include <math.h>
  37 #include <gsl/gsl_math.h>
  38 #include <gsl/gsl_errno.h>
  39 #include <gsl/gsl_sf_gamma.h>
  40 #include <gsl/gsl_cdf.h>
  41 #include <gsl/gsl_randist.h>
  42 #include "gsl-extras.h"
  43
  44 #define BETAINV_INIT_ERR .001
  45 #define BETADISTINV_N_TERMS 3
  46 #define BETADISTINV_MAXITER 20
  47
  48 static double
  49 s_bisect (double x, double y)
  50 {
  51   double result = GSL_MIN(x,y) + fabs(x - y) / 2.0;
  52   return result;
  53 }
  54 static double
  55 new_guess_P ( double old_guess, double x, double y,
  56               double prob, double a, double b)
  57 {
  58   double result;
  59   double p_hat;
  60   double end_point;
  61
  62   p_hat = gsl_cdf_beta_P(old_guess, a, b);
  63   if (p_hat < prob)
  64     {
  65       end_point = GSL_MAX(x,y);
  66     }
  67   else if ( p_hat > prob )
  68     {
  69       end_point = GSL_MIN(x,y);
  70     }
  71   else
  72     {
  73       end_point = old_guess;
  74     }
  75   result = s_bisect(old_guess, end_point);
  76
  77   return result;
  78 }
  79
  80 static double
  81 new_guess_Q ( double old_guess, double x, double y,
  82               double prob, double a, double b)
  83 {
  84   double result;
  85   double q_hat;
  86   double end_point;
  87
  88   q_hat = gsl_cdf_beta_Q(old_guess, a, b);
  89   if (q_hat >= prob)
  90     {
  91       end_point = GSL_MAX(x,y);
  92     }
  93   else if ( q_hat < prob )
  94     {
  95       end_point = GSL_MIN(x,y);
  96     }
  97   else
  98     {
  99       end_point = old_guess;
 100     }
 101   result = s_bisect(old_guess, end_point);
 102
 103   return result;
 104 }
 105
 106 /*
 107  * The get_corn_fish_* functions below return the first
 108  * three terms of the Cornish-Fisher expansion
 109  * without recursion. The recursive functions
 110  * make the code more legible when higher order coefficients
 111  * are used, but terms beyond the cubic do not
 112  * improve accuracy.
 113  */
 114   /*
 115    * Linear coefficient for the
 116    * Cornish-Fisher expansion.
 117    */
 118 static double
 119 get_corn_fish_lin (const double x, const double a, const double b)
 120 {
 121   double result;
 122
 123   result = gsl_ran_beta_pdf (x, a, b);
 124   if(result > 0)
 125     {
 126       result = 1.0 / result;
 127     }
 128   else
 129     {
 130       result = GSL_DBL_MAX;
 131     }
 132
 133   return result;
 134 }
 135   /*
 136    * Quadratic coefficient for the
 137    * Cornish-Fisher expansion.
 138    */
 139 static double
 140 get_corn_fish_quad (const double x, const double a, const double b)
 141 {
 142   double result;
 143   double gam_ab;
 144   double gam_a;
 145   double gam_b;
 146   double num;
 147   double den;
 148
 149   gam_ab =  gsl_sf_lngamma(a + b);
 150   gam_a = gsl_sf_lngamma (a);
 151   gam_b = gsl_sf_lngamma (b);
 152   num = exp(2 * (gam_a + gam_b - gam_ab)) * (1 - a + x * (b + a - 2));
 153   den = 2.0 * pow ( x, 2*a - 1 ) * pow ( 1 - x, 2 * b - 1 );
 154   if ( fabs(den) > 0.0)
 155     {
 156       result = num / den;
 157     }
 158   else
 159     {
 160       result = 0.0;
 161     }
 162
 163   return result;
 164 }
 165 /*
 166  * The cubic term for the Cornish-Fisher expansion.
 167  * Theoretically, use of this term should give a better approximation,
 168  * but in practice inclusion of the cubic term worsens the
 169  * iterative procedure in gsl_cdf_beta_Pinv and gsl_cdf_beta_Qinv
 170  * for extreme values of p, a or b.
 171  */
 172 #if 0
 173 static double
 174 get_corn_fish_cube (const double x, const double a, const double b)
 175 {
 176   double result;
 177   double am1 = a - 1.0;
 178   double am2 = a - 2.0;
 179   double apbm2 = a+b-2.0;
 180   double apbm3 = a+b-3.0;
 181   double apbm4 = a+b-4.0;
 182   double ab1ab2 = am1 * am2;
 183   double tmp;
 184   double num;
 185   double den;
 186
 187   num =  (am1 - x * apbm2) * (am1 - x * apbm2);
 188   tmp = ab1ab2 - x * (apbm2 * ( ab1ab2 * apbm4 + 1) + x * apbm2 * apbm3);
 189   num += tmp;
 190   tmp = gsl_ran_beta_pdf(x,a,b);
 191   den = 2.0 * x * x * (1 - x) * (1 - x) * pow(tmp,3.0);
 192   if ( fabs(den) > 0.0)
 193     {
 194       result = num / den;
 195     }
 196   else
 197     {
 198       result = 0.0;
 199     }
 200
 201   return result;
 202 }
 203 #endif
 204 /*
 205  * The Cornish-Fisher coefficients can be defined recursivley,
 206  * starting with the nth derivative of s_psi = -f'(x)/f(x),
 207  * where f is the beta density.
 208  *
 209  * The section below was commented out since
 210  * the recursive generation of the coeficients did
 211  * not improve the accuracy of the directly coded
 212  * the first three coefficients.
 213  */
 214 #if 0
 215 static double
 216 s_d_psi (double x, double a, double b, int n)
 217 {
 218   double result;
 219   double np1 = (double) n + 1;
 220   double asgn;
 221   double bsgn;
 222   double bm1 = b - 1.0;
 223   double am1 = a - 1.0;
 224   double mx = 1.0 - x;
 225
 226   asgn = (n % 2) ? 1.0:-1.0;
 227   bsgn = (n % 2) ? -1.0:1.0;
 228   result = gsl_sf_gamma(np1) * ((bsgn * bm1 / (pow(mx, np1)))
 229                                 + (asgn * am1 / (pow(x,np1))));
 230   return result;
 231 }
 232 /*
 233  * nth derivative of a coefficient with respect
 234  * to x.
 235  */
 236 static double
 237 get_d_coeff ( double x, double a,
 238               double b, double n, double k)
 239 {
 240   double result;
 241   double d_psi;
 242   double k_fac;
 243   double i_fac;
 244   double kmi_fac;
 245   double i;
 246
 247   if (n == 2)
 248     {
 249       result = s_d_psi(x, a, b, k);
 250     }
 251   else
 252     {
 253       result = 0.0;
 254       for (i = 0.0; i < (k+1); i++)
 255         {
 256           k_fac = gsl_sf_lngamma(k+1.0);
 257           i_fac = gsl_sf_lngamma(i+1.0);
 258           kmi_fac = gsl_sf_lngamma(k-i+1.0);
 259
 260           result += exp(k_fac - i_fac - kmi_fac)
 261             * get_d_coeff( x, a, b, 2.0, i)
 262             * get_d_coeff( x, a, b, (n - 1.0), (k - i));
 263         }
 264       result += get_d_coeff ( x, a, b, (n-1.0), (k+1.0));
 265     }
 266
 267   return result;
 268 }
 269 /*
 270  * Cornish-Fisher coefficient.
 271  */
 272 static double
 273 get_corn_fish (double c, double x,
 274                double a, double b, double n)
 275 {
 276   double result;
 277   double dc;
 278   double c_prev;
 279
 280   if(n == 1.0)
 281     {
 282       result = 1;
 283     }
 284   else if (n==2.0)
 285     {
 286       result = s_d_psi(x, a, b, 0);
 287     }
 288   else
 289     {
 290       dc = get_d_coeff(x, a, b, (n-1.0), 1.0);
 291       c_prev = get_corn_fish(c, x, a, b, (n-1.0));
 292       result = (n-1) * s_d_psi(x,a,b,0) * c_prev + dc;
 293     }
 294   return result;
 295 }
 296 #endif
 297
 298 double
 299 gslextras_cdf_beta_Pinv ( const double p, const double a, const double b)
 300 {
 301   double result;
 302   double state;
 303   double beta_result;
 304   double lower = 0.0;
 305   double upper = 1.0;
 306   double c1;
 307   double c2;
 308 #if 0
 309   double c3;
 310 #endif
 311   double frac1;
 312   double frac2;
 313   double frac3;
 314   double frac4;
 315   double p0;
 316   double p1;
 317   double p2;
 318   double tmp;
 319   double err;
 320   double abserr;
 321   double relerr;
 322   double min_err;
 323   int n_iter = 0;
 324
 325   if ( p < 0.0 )
 326     {
 327       GSLEXTRAS_CDF_ERROR("p < 0", GSL_EDOM);
 328     }
 329   if ( p > 1.0 )
 330     {
 331       GSLEXTRAS_CDF_ERROR("p > 1",GSL_EDOM);
 332     }
 333   if ( a < 0.0 )
 334     {
 335       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 336     }
 337   if ( b < 0.0 )
 338     {
 339       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 340     }
 341   if ( p == 0.0 )
 342     {
 343       return 0.0;
 344     }
 345   if ( p == 1.0 )
 346     {
 347       return 1.0;
 348     }
 349
 350   if (p > (1.0 - GSL_DBL_EPSILON))
 351     {
 352       /*
 353        * When p is close to 1.0, the bisection
 354        * works better with gsl_cdf_Q.
 355        */
 356       state = gslextras_cdf_beta_Qinv ( p, a, b);
 357       result = 1.0 - state;
 358       return result;
 359     }
 360   if (p < GSL_DBL_EPSILON )
 361     {
 362       /*
 363        * Start at a small value and rise until
 364        * we are above the correct result. This
 365        * avoids overflow. When p is very close to
 366        * 0, an initial state value of a/(a+b) will
 367        * cause the interpolating polynomial
 368        * to overflow.
 369        */
 370       upper = GSL_DBL_MIN;
 371       beta_result = gsl_cdf_beta_P (upper, a, b);
 372       while (beta_result < p)
 373         {
 374           lower = upper;
 375           upper *= 4.0;
 376           beta_result = gsl_cdf_beta_P (upper, a, b);
 377         }
 378       state = (lower + upper) / 2.0;
 379     }
 380   else
 381     {
 382       /*
 383        * First guess is the expected value.
 384        */
 385       lower = 0.0;
 386       upper = 1.0;
 387       state = a/(a+b);
 388       beta_result = gsl_cdf_beta_P (state, a, b);
 389     }
 390   err = beta_result - p;
 391   abserr = fabs(err);
 392   relerr = abserr / p;
 393   while ( relerr > BETAINV_INIT_ERR)
 394     {
 395       tmp = new_guess_P ( state, lower, upper,
 396                           p, a, b);
 397       lower = ( tmp < state ) ? lower:state;
 398       upper = ( tmp < state ) ? state:upper;
 399       state = tmp;
 400       beta_result = gsl_cdf_beta_P (state, a, b);
 401       err = p - beta_result;
 402       abserr = fabs(err);
 403       relerr = abserr / p;
 404     }
 405
 406   result = state;
 407   min_err = relerr;
 408   /*
 409    * Use a second order Lagrange interpolating
 410    * polynomial to get closer before switching to
 411    * the iterative method.
 412    */
 413   p0 = gsl_cdf_beta_P (lower, a, b);
 414   p1 = gsl_cdf_beta_P (state, a, b);
 415   p2 = gsl_cdf_beta_P (upper, a, b);
 416   if( p0 < p1 && p1 < p2)
 417     {
 418       frac1 = (p - p2) / (p0 - p1);
 419       frac2 = (p - p1) / (p0 - p2);
 420       frac3 = (p - p0) / (p1 - p2);
 421       frac4 = (p - p0) * (p - p1) / ((p2 - p0) * (p2 - p1));
 422       state = frac1 * (frac2 * lower - frac3 * state)
 423         + frac4 * upper;
 424
 425       beta_result = gsl_cdf_beta_P( state, a, b);
 426       err = p - beta_result;
 427       abserr = fabs(err);
 428       relerr = abserr / p;
 429       if (relerr < min_err)
 430         {
 431           result = state;
 432           min_err = relerr;
 433         }
 434       else
 435         {
 436           /*
 437            * Lagrange polynomial failed to reduce the error.
 438            * This will happen with a very skewed beta density.
 439            * Undo previous steps.
 440            */
 441           state = result;
 442           beta_result = gsl_cdf_beta_P(state,a,b);
 443           err = p - beta_result;
 444           abserr = fabs(err);
 445           relerr = abserr / p;
 446         }
 447     }
 448
 449   n_iter = 0;
 450
 451   /*
 452    * Newton-type iteration using the terms from the
 453    * Cornish-Fisher expansion. If only the first term
 454    * of the expansion is used, this is Newton's method.
 455    */
 456   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 457     {
 458       n_iter++;
 459       c1 = get_corn_fish_lin (state, a, b);
 460       c2 = get_corn_fish_quad (state, a, b);
 461       /*
 462        * The cubic term does not help, and can can
 463        * harm the approximation for extreme values of
 464        * p, a, or b.
 465        */
 466 #if 0
 467       c3 = get_corn_fish_cube (state, a, b);
 468       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 469 #endif
 470       state += err * (c1 + (c2 * err / 2.0 ));
 471       /*
 472        * The section below which is commented out uses
 473        * a recursive function to get the coefficients.
 474        * The recursion makes coding higher-order terms
 475        * easier, but did not improve the result beyond
 476        * the use of three terms. Since explicitly coding
 477        * those three terms in the get_corn_fish_* functions
 478        * was not difficult, the recursion was abandoned.
 479        */
 480 #if 0
 481       coeff = 1.0;
 482       for(i = 1.0; i < BETADISTINV_N_TERMS; i += 1.0)
 483         {
 484           i_fac *= i;
 485           coeff = get_corn_fish (coeff, prior_state, a, b, i);
 486           state += coeff * pow(err, i) /
 487             (i_fac * pow (gsl_ran_beta_pdf(prior_state,a,b), i));
 488         }
 489 #endif
 490       beta_result = gsl_cdf_beta_P ( state, a, b );
 491       err = p - beta_result;
 492       abserr = fabs(err);
 493       relerr = abserr / p;
 494       if (relerr < min_err)
 495         {
 496           result = state;
 497           min_err = relerr;
 498         }
 499     }
 500
 501   return result;
 502 }
 503
 504 double
 505 gslextras_cdf_beta_Qinv (double q, double a, double b)
 506 {
 507   double result;
 508   double state;
 509   double beta_result;
 510   double lower = 0.0;
 511   double upper = 1.0;
 512   double c1;
 513   double c2;
 514 #if 0
 515   double c3;
 516 #endif
 517   double p0;
 518   double p1;
 519   double p2;
 520   double frac1;
 521   double frac2;
 522   double frac3;
 523   double frac4;
 524   double tmp;
 525   double err;
 526   double abserr;
 527   double relerr;
 528   double min_err;
 529   int n_iter = 0;
 530
 531   if ( q < 0.0 )
 532     {
 533       GSLEXTRAS_CDF_ERROR("q < 0", GSL_EDOM);
 534     }
 535   if ( q > 1.0 )
 536     {
 537       GSLEXTRAS_CDF_ERROR("q > 1",GSL_EDOM);
 538     }
 539   if ( a < 0.0 )
 540     {
 541       GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
 542     }
 543   if ( b < 0.0 )
 544     {
 545       GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
 546     }
 547   if ( q == 0.0 )
 548     {
 549       return 1.0;
 550     }
 551   if ( q == 1.0 )
 552     {
 553       return 0.0;
 554     }
 555
 556   if ( q < GSL_DBL_EPSILON )
 557     {
 558       /*
 559        * When q is close to 0, the bisection
 560        * and interpolation done in the rest of
 561        * this routine will not give the correct
 562        * value within double precision, so
 563        * gsl_cdf_beta_Qinv is called instead.
 564        */
 565       state = gslextras_cdf_beta_Pinv ( q, a, b);
 566       result = 1.0 - state;
 567       return result;
 568     }
 569   if ( q > 1.0 - GSL_DBL_EPSILON )
 570     {
 571       /*
 572        * Make the initial guess close to 0.0.
 573        */
 574       upper = GSL_DBL_MIN;
 575       beta_result = gsl_cdf_beta_Q ( upper, a, b);
 576       while (beta_result > q )
 577         {
 578           lower = upper;
 579           upper *= 4.0;
 580           beta_result = gsl_cdf_beta_Q ( upper, a, b);
 581         }
 582       state = (upper + lower) / 2.0;
 583     }
 584   else
 585     {
 586       /* Bisection to get an initial approximation.
 587        * First guess is the expected value.
 588        */
 589       state = a/(a+b);
 590       lower = 0.0;
 591       upper = 1.0;
 592     }
 593   beta_result = gsl_cdf_beta_Q (state, a, b);
 594   err = beta_result - q;
 595   abserr = fabs(err);
 596   relerr = abserr / q;
 597   while ( relerr > BETAINV_INIT_ERR)
 598     {
 599       n_iter++;
 600       tmp = new_guess_Q ( state, lower, upper,
 601                           q, a, b);
 602       lower = ( tmp < state ) ? lower:state;
 603       upper = ( tmp < state ) ? state:upper;
 604       state = tmp;
 605       beta_result = gsl_cdf_beta_Q (state, a, b);
 606       err = q - beta_result;
 607       abserr = fabs(err);
 608       relerr = abserr / q;
 609     }
 610   result = state;
 611   min_err = relerr;
 612
 613   /*
 614    * Use a second order Lagrange interpolating
 615    * polynomial to get closer before switching to
 616    * the iterative method.
 617    */
 618   p0 = gsl_cdf_beta_Q (lower, a, b);
 619   p1 = gsl_cdf_beta_Q (state, a, b);
 620   p2 = gsl_cdf_beta_Q (upper, a, b);
 621   if(p0 > p1 && p1 > p2)
 622     {
 623       frac1 = (q - p2) / (p0 - p1);
 624       frac2 = (q - p1) / (p0 - p2);
 625       frac3 = (q - p0) / (p1 - p2);
 626       frac4 = (q - p0) * (q - p1) / ((p2 - p0) * (p2 - p1));
 627       state = frac1 * (frac2 * lower - frac3 * state)
 628         + frac4 * upper;
 629       beta_result = gsl_cdf_beta_Q( state, a, b);
 630       err = beta_result - q;
 631       abserr = fabs(err);
 632       relerr = abserr / q;
 633       if (relerr < min_err)
 634         {
 635           result = state;
 636           min_err = relerr;
 637         }
 638       else
 639         {
 640           /*
 641            * Lagrange polynomial failed to reduce the error.
 642            * This will happen with a very skewed beta density.
 643            * Undo previous steps.
 644            */
 645           state = result;
 646           beta_result = gsl_cdf_beta_P(state,a,b);
 647           err = q - beta_result;
 648           abserr = fabs(err);
 649           relerr = abserr / q;
 650         }
 651     }
 652
 653   /*
 654    * Iteration using the terms from the
 655    * Cornish-Fisher expansion. If only the first term
 656    * of the expansion is used, this is Newton's method.
 657    */
 658
 659   n_iter = 0;
 660   while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
 661     {
 662       n_iter++;
 663       c1 = get_corn_fish_lin (state, a, b);
 664       c2 = get_corn_fish_quad (state, a, b);
 665       /*
 666        * The cubic term does not help, and can harm
 667        * the approximation for extreme values of p, a and b.
 668        */
 669 #if 0
 670       c3 = get_corn_fish_cube (state, a, b);
 671       state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
 672 #endif
 673       state += err * (c1 + (c2 * err / 2.0 ));
 674       beta_result = gsl_cdf_beta_Q ( state, a, b );
 675       err = beta_result - q;
 676       abserr = fabs(err);
 677       relerr = abserr / q;
 678       if (relerr < min_err)
 679         {
 680           result = state;
 681           min_err = relerr;
 682         }
 683     }
 684
 685   return result;
 686 }