Implement some more transformation functions using code from Jason

author Ben Pfaff <blp@gnu.org>

Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)

committer Ben Pfaff <blp@gnu.org>

Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)
author Ben Pfaff <blp@gnu.org>
Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)
committer Ben Pfaff <blp@gnu.org>
Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)
diff --git a/configure.ac b/configure.ac

index 69836f9599a09e14ae4b8645cf05f381cfd26f4e..542b453aa4a31b5e074e5c79f4c4a3a965870afd 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -149,7 +149,7 @@ if test x"$enable_debug" = x"yes"  ; then
  fi
  
  AC_CONFIG_FILES([Makefile po/Makefile.in m4/Makefile 
-                lib/Makefile lib/misc/Makefile 
+                lib/Makefile lib/misc/Makefile lib/gsl-extras/Makefile
                  doc/Makefile 
                  src/Makefile src/expressions/Makefile
                  config/Makefile
diff --git a/lib/ChangeLog b/lib/ChangeLog

index 505a5fb8e6fa471cdc1bf349ce1b636b2706933b..7406ab0b3186aeab95079110718bab0b44751f0d 100644 (file)
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,9 @@
+Wed Mar  9 09:53:50 2005  Ben Pfaff  <blp@gnu.org>
+
+       * gsl-extras/: New directory.
+
+       * Makefile.am: (SUBDIRS) Add gsl-extras.
+
  Mon Feb 28 23:20:05 2005  Ben Pfaff  <blp@gnu.org>
  
         * julcal/: Removed directory.
diff --git a/lib/Makefile.am b/lib/Makefile.am

index bae615ac20a6c4c9f368896934517a04d84793ac..f50e789594de748b20fc66d15350949e7eaf7aae 100644 (file)
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,5 +1,5 @@
  ## Process this file with automake to produce Makefile.in  -*- makefile -*-
  
-SUBDIRS = misc 
+SUBDIRS = misc gsl-extras
  
  MAINTAINERCLEANFILES = Makefile.in
diff --git a/lib/gsl-extras/Makefile.am b/lib/gsl-extras/Makefile.am

new file mode 100644 (file)

index 0000000..82b61ef
--- /dev/null
+++ b/lib/gsl-extras/Makefile.am
@@ -0,0 +1,16 @@
+## Process this file with automake to produce Makefile.in  -*- makefile -*-
+
+noinst_LIBRARIES = libgsl-extras.a
+
+AM_CPPFLAGS = -I$(top_srcdir)
+
+AM_CFLAGS=
+
+if cc_is_gcc
+AM_CFLAGS+=-Wall -W -Wwrite-strings -Wstrict-prototypes \
+-Wpointer-arith -Wno-sign-compare -Wmissing-prototypes \
+-ansi 
+endif
+
+libgsl_extras_a_SOURCES = betadistinv.c binomial.c geometric.c \
+hypergeometric.c negbinom.c poisson.c gsl-extras.h
diff --git a/lib/gsl-extras/betadistinv.c b/lib/gsl-extras/betadistinv.c

new file mode 100644 (file)

index 0000000..a0838d2
--- /dev/null
+++ b/lib/gsl-extras/betadistinv.c
@@ -0,0 +1,659 @@
+/* cdf/betadistinv.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+/*
+ * Invert the Beta distribution. 
+ * 
+ * References:
+ *
+ * Roger W. Abernathy and Robert P. Smith. "Applying Series Expansion 
+ * to the Inverse Beta Distribution to Find Percentiles of the F-Distribution,"
+ * ACM Transactions on Mathematical Software, volume 19, number 4, December 1993,
+ * pages 474-480.
+ *
+ * G.W. Hill and A.W. Davis. "Generalized asymptotic expansions of a 
+ * Cornish-Fisher type," Annals of Mathematical Statistics, volume 39, number 8,
+ * August 1968, pages 1264-1273.
+ */
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_sf_gamma.h>
+#include <gsl/gsl_cdf.h>
+#include <gsl/gsl_randist.h>
+#include "gsl-extras.h"
+
+#define BETAINV_INIT_ERR .01
+#define BETADISTINV_N_TERMS 3
+#define BETADISTINV_MAXITER 20
+
+static double 
+s_bisect (double x, double y)
+{
+  double result = GSL_MIN(x,y) + fabs(x - y) / 2.0;
+  return result;
+}
+static double
+new_guess_P ( double old_guess, double x, double y, 
+             double prob, double a, double b)
+{
+  double result;
+  double p_hat;
+  double end_point;
+  
+  p_hat = gsl_cdf_beta_P(old_guess, a, b);
+  if (p_hat < prob)
+    {
+      end_point = GSL_MAX(x,y);
+    }
+  else if ( p_hat > prob )
+    {
+      end_point = GSL_MIN(x,y);
+    }
+  else
+    {
+      end_point = old_guess;
+    }
+  result = s_bisect(old_guess, end_point);
+  
+  return result;
+}
+
+static double
+new_guess_Q ( double old_guess, double x, double y, 
+             double prob, double a, double b)
+{
+  double result;
+  double q_hat;
+  double end_point;
+  
+  q_hat = gsl_cdf_beta_Q(old_guess, a, b);
+  if (q_hat >= prob)
+    {
+      end_point = GSL_MAX(x,y);
+    }
+  else if ( q_hat < prob )
+    {
+      end_point = GSL_MIN(x,y);
+    }
+  else
+    {
+      end_point = old_guess;
+    }
+  result = s_bisect(old_guess, end_point);
+  
+  return result;
+}
+
+/*
+ * The get_corn_fish_* functions below return the first
+ * three terms of the Cornish-Fisher expansion
+ * without recursion. The recursive functions
+ * make the code more legible when higher order coefficients
+ * are used, but terms beyond the cubic do not 
+ * improve accuracy.
+ */
+  /*
+   * Linear coefficient for the 
+   * Cornish-Fisher expansion.
+   */
+static double 
+get_corn_fish_lin (const double x, const double a, const double b)
+{
+  double result;
+  
+  result = gsl_ran_beta_pdf (x, a, b);
+  if(result > 0)
+    {
+      result = 1.0 / result;
+    }
+  else
+    {
+      result = GSL_DBL_MAX;
+    }
+
+  return result;
+}
+  /*
+   * Quadratic coefficient for the 
+   * Cornish-Fisher expansion.
+   */
+static double
+get_corn_fish_quad (const double x, const double a, const double b)
+{
+  double result;
+  double gam_ab;
+  double gam_a;
+  double gam_b;
+  double num;
+  double den;
+  
+  gam_ab =  gsl_sf_lngamma(a + b);
+  gam_a = gsl_sf_lngamma (a);
+  gam_b = gsl_sf_lngamma (b);
+  num = exp(2 * (gam_a + gam_b - gam_ab)) * (1 - a + x * (b + a - 2));
+  den = 2.0 * pow ( x, 2*a - 1 ) * pow ( 1 - x, 2 * b - 1 );
+  if ( fabs(den) > 0.0)
+    {
+      result = num / den;
+    }
+  else
+    {
+      result = 0.0;
+    }
+
+  return result;
+}
+/*
+ * The cubic term for the Cornish-Fisher expansion.
+ * Theoretically, use of this term should give a better approximation, 
+ * but in practice inclusion of the cubic term worsens the 
+ * iterative procedure in gsl_cdf_beta_Pinv and gsl_cdf_beta_Qinv
+ * for extreme values of p, a or b.
+ */                                
+#if 0
+static double 
+get_corn_fish_cube (const double x, const double a, const double b)
+{
+  double result;
+  double am1 = a - 1.0;
+  double am2 = a - 2.0;
+  double apbm2 = a+b-2.0;
+  double apbm3 = a+b-3.0;
+  double apbm4 = a+b-4.0;
+  double ab1ab2 = am1 * am2;
+  double tmp;
+  double num;
+  double den;
+
+  num =  (am1 - x * apbm2) * (am1 - x * apbm2);
+  tmp = ab1ab2 - x * (apbm2 * ( ab1ab2 * apbm4 + 1) + x * apbm2 * apbm3);
+  num += tmp;
+  tmp = gsl_ran_beta_pdf(x,a,b);
+  den = 2.0 * x * x * (1 - x) * (1 - x) * pow(tmp,3.0);
+  if ( fabs(den) > 0.0)
+    {
+      result = num / den;
+    }
+  else
+    {
+      result = 0.0;
+    }
+
+  return result;
+}
+#endif
+/*
+ * The Cornish-Fisher coefficients can be defined recursivley,
+ * starting with the nth derivative of s_psi = -f'(x)/f(x),
+ * where f is the beta density.
+ *
+ * The section below was commented out since 
+ * the recursive generation of the coeficients did
+ * not improve the accuracy of the directly coded 
+ * the first three coefficients.
+ */
+#if 0
+static double
+s_d_psi (double x, double a, double b, int n)
+{
+  double result;
+  double np1 = (double) n + 1;
+  double asgn;
+  double bsgn;
+  double bm1 = b - 1.0;
+  double am1 = a - 1.0;
+  double mx = 1.0 - x;
+  
+  asgn = (n % 2) ? 1.0:-1.0;
+  bsgn = (n % 2) ? -1.0:1.0;
+  result = gsl_sf_gamma(np1) * ((bsgn * bm1 / (pow(mx, np1)))
+                               + (asgn * am1 / (pow(x,np1))));
+  return result;
+}
+/*
+ * nth derivative of a coefficient with respect 
+ * to x.
+ */
+static double 
+get_d_coeff ( double x, double a, 
+             double b, double n, double k)
+{
+  double result;
+  double d_psi;
+  double k_fac;
+  double i_fac;
+  double kmi_fac;
+  double i;
+  
+  if (n == 2)
+    {
+      result = s_d_psi(x, a, b, k);
+    }
+  else
+    {
+      result = 0.0;
+      for (i = 0.0; i < (k+1); i++)
+       {
+         k_fac = gsl_sf_lngamma(k+1.0);
+         i_fac = gsl_sf_lngamma(i+1.0);
+         kmi_fac = gsl_sf_lngamma(k-i+1.0);
+         
+         result += exp(k_fac - i_fac - kmi_fac)
+           * get_d_coeff( x, a, b, 2.0, i) 
+           * get_d_coeff( x, a, b, (n - 1.0), (k - i));
+       }
+      result += get_d_coeff ( x, a, b, (n-1.0), (k+1.0));
+    }
+
+  return result;
+}
+/*
+ * Cornish-Fisher coefficient.
+ */
+static double
+get_corn_fish (double c, double x, 
+              double a, double b, double n)
+{
+  double result;
+  double dc;
+  double c_prev;
+  
+  if(n == 1.0)
+    {
+      result = 1;
+    }
+  else if (n==2.0)
+    {
+      result = s_d_psi(x, a, b, 0);
+    }
+  else
+    {
+      dc = get_d_coeff(x, a, b, (n-1.0), 1.0);
+      c_prev = get_corn_fish(c, x, a, b, (n-1.0));
+      result = (n-1) * s_d_psi(x,a,b,0) * c_prev + dc;
+    }
+  return result;
+}
+#endif
+
+double 
+gslextras_cdf_beta_Pinv ( const double p, const double a, const double b)
+{
+  double result;
+  double state;
+  double beta_result;
+  double lower = 0.0;
+  double upper = 1.0;
+  double c1;
+  double c2;
+#if 0
+  double c3;
+#endif
+  double frac1;
+  double frac2;
+  double frac3;
+  double frac4;
+  double p0;
+  double p1;
+  double p2;
+  double tmp;
+  double err;
+  double abserr;
+  double relerr;
+  double min_err;
+  int n_iter = 0;
+
+  if ( p < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0", GSL_EDOM);
+    }
+  if ( p > 1.0 )
+    {
+      GSLEXTRAS_CDF_ERROR("p > 1",GSL_EDOM);
+    }
+  if ( a < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
+    }
+  if ( b < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
+    }
+  if ( p == 0.0 )
+    {
+      return 0.0;
+    }
+  if ( p == 1.0 )
+    {
+      return 1.0;
+    }
+
+  if (p > (1.0 - GSL_DBL_EPSILON))
+    {
+      /*
+       * When p is close to 1.0, the bisection
+       * works better with gsl_cdf_Q.
+       */
+      state = gslextras_cdf_beta_Qinv ( p, a, b);
+      result = 1.0 - state;
+      return result;
+    }
+  if (p < GSL_DBL_EPSILON )
+    {
+      /*
+       * Start at a small value and rise until
+       * we are above the correct result. This 
+       * avoids overflow. When p is very close to 
+       * 0, an initial state value of a/(a+b) will
+       * cause the interpolating polynomial
+       * to overflow.
+       */
+      upper = GSL_DBL_MIN;
+      beta_result = gsl_cdf_beta_P (upper, a, b);
+      while (beta_result < p)
+       {
+         lower = upper;
+         upper *= 4.0;
+         beta_result = gsl_cdf_beta_P (upper, a, b);
+       }
+      state = (lower + upper) / 2.0;
+    }
+  else
+    {
+      /*
+       * First guess is the expected value.
+       */
+      lower = 0.0;
+      upper = 1.0;
+      state = a/(a+b);
+      beta_result = gsl_cdf_beta_P (state, a, b);
+    }
+  err = beta_result - p;
+  abserr = fabs(err);
+  relerr = abserr / p;
+  while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
+    {
+      tmp = new_guess_P ( state, lower, upper, 
+                         p, a, b);
+      lower = ( tmp < state ) ? lower:state;
+      upper = ( tmp < state ) ? state:upper;
+      state = tmp;
+      beta_result = gsl_cdf_beta_P (state, a, b);
+      err = p - beta_result;
+      abserr = fabs(err);
+      relerr = abserr / p;
+    }
+
+  result = state;
+  min_err = relerr;
+  /*
+   * Use a second order Lagrange interpolating
+   * polynomial to get closer before switching to
+   * the iterative method.
+   */
+  p0 = gsl_cdf_beta_P (lower, a, b);
+  p1 = gsl_cdf_beta_P (state, a, b);
+  p2 = gsl_cdf_beta_P (upper, a, b);
+  if( p0 < p1 && p1 < p2)
+    {
+      frac1 = (p - p2) / (p0 - p1);
+      frac2 = (p - p1) / (p0 - p2);
+      frac3 = (p - p0) / (p1 - p2);
+      frac4 = (p - p0) * (p - p1) / ((p2 - p0) * (p2 - p1));
+      state = frac1 * (frac2 * lower - frac3 * state)
+       + frac4 * upper;
+
+      beta_result = gsl_cdf_beta_P( state, a, b);
+      err = p - beta_result;
+      abserr = fabs(err);
+      relerr = abserr / p;
+      if (relerr < min_err)
+       {
+         result = state;
+         min_err = relerr;
+       }
+    }
+
+  n_iter = 0;
+
+  /*
+   * Newton-type iteration using the terms from the
+   * Cornish-Fisher expansion. If only the first term
+   * of the expansion is used, this is Newton's method.
+   */
+  while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
+    {
+      n_iter++;
+      c1 = get_corn_fish_lin (state, a, b);
+      c2 = get_corn_fish_quad (state, a, b);
+      /*
+       * The cubic term does not help, and can can
+       * harm the approximation for extreme values of
+       * p, a, or b.       
+       */      
+#if 0
+      c3 = get_corn_fish_cube (state, a, b);
+      state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
+#endif
+      state += err * (c1 + (c2 * err / 2.0 ));
+      /*
+       * The section below which is commented out uses
+       * a recursive function to get the coefficients. 
+       * The recursion makes coding higher-order terms
+       * easier, but did not improve the result beyond
+       * the use of three terms. Since explicitly coding
+       * those three terms in the get_corn_fish_* functions
+       * was not difficult, the recursion was abandoned.
+       */
+#if 0 
+      coeff = 1.0;
+      for(i = 1.0; i < BETADISTINV_N_TERMS; i += 1.0)
+       {
+         i_fac *= i;
+         coeff = get_corn_fish (coeff, prior_state, a, b, i);
+         state += coeff * pow(err, i) / 
+           (i_fac * pow (gsl_ran_beta_pdf(prior_state,a,b), i));
+       }
+#endif
+      beta_result = gsl_cdf_beta_P ( state, a, b );
+      err = p - beta_result;
+      abserr = fabs(err);
+      relerr = abserr / p;
+      if (relerr < min_err)
+       {
+         result = state;
+         min_err = relerr;
+       }
+    }
+
+  return result;
+}
+
+double
+gslextras_cdf_beta_Qinv (double q, double a, double b)
+{
+  double result;
+  double state;
+  double beta_result;
+  double lower = 0.0;
+  double upper = 1.0;
+  double c1;
+  double c2;
+#if 0
+  double c3;
+#endif
+  double p0;
+  double p1;
+  double p2;
+  double frac1;
+  double frac2;
+  double frac3;
+  double frac4;
+  double tmp;
+  double err;
+  double abserr;
+  double relerr;
+  double min_err;
+  int n_iter = 0;
+
+  if ( q < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR("q < 0", GSL_EDOM);
+    }
+  if ( q > 1.0 )
+    {
+      GSLEXTRAS_CDF_ERROR("q > 1",GSL_EDOM);
+    }
+  if ( a < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("a < 0", GSL_EDOM );
+    }
+  if ( b < 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ( "b < 0", GSL_EDOM );
+    }
+  if ( q == 0.0 )
+    {
+      return 1.0;
+    }
+  if ( q == 1.0 )
+    {
+      return 0.0;
+    }
+
+  if ( q < GSL_DBL_EPSILON )
+    {
+      /*
+       * When q is close to 0, the bisection
+       * and interpolation done in the rest of
+       * this routine will not give the correct
+       * value within double precision, so 
+       * gsl_cdf_beta_Qinv is called instead.
+       */
+      state = gslextras_cdf_beta_Pinv ( q, a, b);
+      result = 1.0 - state;
+      return result;
+    }
+  if ( q > 1.0 - GSL_DBL_EPSILON )
+    {
+      /*
+       * Make the initial guess close to 0.0.
+       */
+      upper = GSL_DBL_MIN;
+      beta_result = gsl_cdf_beta_Q ( upper, a, b);
+      while (beta_result > q )
+       {
+         lower = upper;
+         upper *= 4.0;
+         beta_result = gsl_cdf_beta_Q ( upper, a, b);
+       }
+      state = (upper + lower) / 2.0;
+    }
+  else
+    {
+      /* Bisection to get an initial approximation.
+       * First guess is the expected value.
+       */
+      state = a/(a+b);
+      lower = 0.0;
+      upper = 1.0;
+    }
+  beta_result = gsl_cdf_beta_Q (state, a, b);
+  err = beta_result - q;
+  abserr = fabs(err);
+  relerr = abserr / q;
+  while ( relerr > BETAINV_INIT_ERR && n_iter < 100)
+    {
+      n_iter++;
+      tmp = new_guess_Q ( state, lower, upper, 
+                         q, a, b);
+      lower = ( tmp < state ) ? lower:state;
+      upper = ( tmp < state ) ? state:upper;
+      state = tmp;
+      beta_result = gsl_cdf_beta_Q (state, a, b);
+      err = q - beta_result;
+      abserr = fabs(err);
+      relerr = abserr / q;
+    }
+  result = state;
+  min_err = relerr;
+
+  /*
+   * Use a second order Lagrange interpolating
+   * polynomial to get closer before switching to
+   * the iterative method.
+   */
+  p0 = gsl_cdf_beta_Q (lower, a, b);
+  p1 = gsl_cdf_beta_Q (state, a, b);
+  p2 = gsl_cdf_beta_Q (upper, a, b);
+  if(p0 > p1 && p1 > p2)
+    {
+      frac1 = (q - p2) / (p0 - p1);
+      frac2 = (q - p1) / (p0 - p2);
+      frac3 = (q - p0) / (p1 - p2);
+      frac4 = (q - p0) * (q - p1) / ((p2 - p0) * (p2 - p1));
+      state = frac1 * (frac2 * lower - frac3 * state)
+       + frac4 * upper;
+      beta_result = gsl_cdf_beta_Q( state, a, b);
+      err = beta_result - q;
+      abserr = fabs(err);
+      relerr = abserr / q;
+      if (relerr < min_err)
+       {
+         result = state;
+         min_err = relerr;
+       }
+    }
+
+  /*
+   * Iteration using the terms from the
+   * Cornish-Fisher expansion. If only the first term
+   * of the expansion is used, this is Newton's method.
+   */
+
+  n_iter = 0;
+  while ( relerr > GSL_DBL_EPSILON && n_iter < BETADISTINV_MAXITER)
+    {
+      n_iter++;
+      c1 = get_corn_fish_lin (state, a, b);
+      c2 = get_corn_fish_quad (state, a, b);
+      /*
+       * The cubic term does not help, and can harm
+       * the approximation for extreme values of p, a and b.
+       */
+#if 0
+      c3 = get_corn_fish_cube (state, a, b);
+      state += err * (c1 + (err / 2.0 ) * (c2 + c3 * err / 3.0));
+#endif
+      state += err * (c1 + (c2 * err / 2.0 ));
+      beta_result = gsl_cdf_beta_Q ( state, a, b );
+      err = beta_result - q;
+      abserr = fabs(err);
+      relerr = abserr / q;
+      if (relerr < min_err)
+       {
+         result = state;
+         min_err = relerr;
+       }
+    }
+
+  return result;
+}
diff --git a/lib/gsl-extras/binomial.c b/lib/gsl-extras/binomial.c

new file mode 100644 (file)

index 0000000..a85f4a3
--- /dev/null
+++ b/lib/gsl-extras/binomial.c
@@ -0,0 +1,98 @@
+/* cdf/binomial.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+/*
+ * Computes the cumulative distribution function for a binomial
+ * random variable. For a binomial random variable X with n trials
+ * and success probability p,
+ *
+ *          Pr( X <= k ) = Pr( Y >= p )
+ *
+ * where Y is a beta random variable with parameters k+1 and n-k.
+ *
+ * Reference: 
+ * 
+ * W. Feller, "An Introduction to Probability and Its
+ * Applications," volume 1. Wiley, 1968. Exercise 45, page 173,
+ * chapter 6.
+ */
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_cdf.h>
+#include "gsl-extras.h"
+
+double
+gslextras_cdf_binomial_P(const long k, const long n, const double p)
+{
+  double P;
+  double a;
+  double b;
+
+  if(p > 1.0 || p < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if ( k >= n )
+    {
+      P = 1.0;
+    }
+  else if (k < 0)
+    {
+      P = 0.0;
+    }
+  else
+    {
+      a = (double) k+1;
+      b = (double) n - k;
+      P = gsl_cdf_beta_Q( p, a, b);
+    }
+  
+  return P;
+}
+double
+gslextras_cdf_binomial_Q(const long k, const long n, const double q)
+{
+  double P;
+  double a;
+  double b;
+
+  if(q > 1.0 || q < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if( k >= n )
+    {
+      P = 0.0;
+    }
+  else if ( k < 0 )
+    {
+      P = 1.0;
+    }
+  else
+    {
+      a = (double) k+1;
+      b = (double) n - k;
+      P = gsl_cdf_beta_P(q, a, b);
+    }
+
+  return P;
+}
+
diff --git a/lib/gsl-extras/geometric.c b/lib/gsl-extras/geometric.c

new file mode 100644 (file)

index 0000000..4612fad
--- /dev/null
+++ b/lib/gsl-extras/geometric.c
@@ -0,0 +1,105 @@
+/* cdf/geometric.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+/*
+ * Pr(X <= n) for a negative binomial random variable X, i.e.,
+ * the probability of n or fewer failuers before success k.
+ */
+
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_sf.h>
+#include <gsl/gsl_cdf.h>
+#include "gsl-extras.h"
+
+/*
+ * Pr (X <= n), i.e., the probability of n or fewer
+ * failures until the first success.
+ */
+double
+gslextras_cdf_geometric_P (const long n, const double p)
+{
+  double P;
+  double a;
+  int i;
+  int m;
+  double sign = 1.0;
+  double term;
+  double q;
+
+  if(p > 1.0 || p < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if ( n < 0 )
+    {
+      return 0.0;
+    }
+  q = 1.0 - p;
+  a = (double) n+1;
+  if( p < GSL_DBL_EPSILON )
+    {
+      /*
+       * 1.0 - pow(q,a) will overflow, so use
+       * a Taylor series.
+       */
+      i = 2;
+      m = n+1;
+      term = exp(log(a) + log(p));
+      P = term;
+      while ( term > GSL_DBL_MIN && i < m)
+       {
+         term = exp (sign * gsl_sf_lnchoose(m,i) + i * log(p));
+         P += term;
+         i++;
+         sign = -sign;
+       }
+    }
+  else
+    {
+      P = 1.0 - pow ( q, a);
+    }
+  return P;
+}
+double
+gslextras_cdf_geometric_Q ( const long n, const double p)
+{
+  double P;
+  double q;
+  double a;
+
+  if(p > 1.0 || p < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if ( n < 0 )
+    {
+      P = 1.0;
+    }
+  else
+    {
+      a = (double) n+1;
+      q = 1.0 - p;
+      P = pow(q, a);
+    }
+
+  return P;
+}
diff --git a/lib/gsl-extras/gsl-extras.h b/lib/gsl-extras/gsl-extras.h

new file mode 100644 (file)

index 0000000..5d2f6d8
--- /dev/null
+++ b/lib/gsl-extras/gsl-extras.h
@@ -0,0 +1,33 @@
+#ifndef GSL_EXTRAS_H
+#define GSL_EXTRAS_H
+
+/* GSLEXTRAS_CDF_ERROR: call the error handler, and return a NAN. */
+#define GSLEXTRAS_CDF_ERROR(reason, gsl_errno) \
+       do { \
+       gsl_error (reason, __FILE__, __LINE__, gsl_errno) ; \
+       return GSL_NAN ; \
+       } while (0)
+
+double gslextras_cdf_beta_Pinv (const double p, const double a,
+                                const double b);
+double gslextras_cdf_beta_Qinv (double q, double a, double b);
+double gslextras_cdf_binomial_P(const long k, const long n, const double p);
+double gslextras_cdf_binomial_Q(const long k, const long n, const double q);
+double gslextras_cdf_geometric_P (const long n, const double p);
+double gslextras_cdf_geometric_Q ( const long n, const double p);
+double gslextras_cdf_hypergeometric_P (const unsigned int k, 
+                                       const unsigned int n0,
+                                       const unsigned int n1,
+                                       const unsigned int t);
+double gslextras_cdf_hypergeometric_Q (const unsigned int k, 
+                                       const unsigned int n0,
+                                       const unsigned int n1,
+                                       const unsigned int t);
+double gslextras_cdf_negative_binomial_P(const long n,
+                                         const long k, const double p);
+double gslextras_cdf_negative_binomial_Q(const long n, const long k,
+                                         const double p);
+double gslextras_cdf_poisson_P (const long k, const double lambda);
+double gslextras_cdf_poisson_Q (const long k, const double lambda);
+
+#endif /* gsl-extras.h */
diff --git a/lib/gsl-extras/hypergeometric.c b/lib/gsl-extras/hypergeometric.c

new file mode 100644 (file)

index 0000000..5e4d181
--- /dev/null
+++ b/lib/gsl-extras/hypergeometric.c
@@ -0,0 +1,195 @@
+/* cdf/hypergeometric.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+/*
+ * Computes the cumulative distribution function for a hypergeometric
+ * random variable. A hypergeometric random variable X is the number
+ * of elements of type 0 in a sample of size t, drawn from a population
+ * of size n1 + n0, in which n1 are of type 1 and n0 are of type 0.
+ *
+ * This algorithm computes Pr( X <= k ) by summing the terms from
+ * the mass function, Pr( X = k ).
+ *
+ * References:
+ *
+ * T. Wu. An accurate computation of the hypergeometric distribution 
+ * function. ACM Transactions on Mathematical Software. Volume 19, number 1,
+ * March 1993.
+ *  This algorithm is not used, since it requires factoring the
+ *  numerator and denominator, then cancelling. It is more accurate
+ *  than the algorithm used here, but the cancellation requires more
+ *  time than the algorithm used here.
+ *
+ * W. Feller. An Introduction to Probability Theory and Its Applications,
+ * third edition. 1968. Chapter 2, section 6. 
+ */
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_cdf.h>
+#include <gsl/gsl_randist.h>
+#include "gsl-extras.h"
+
+/*
+ * Pr (X <= k)
+ */
+double
+gslextras_cdf_hypergeometric_P (const unsigned int k, 
+                                const unsigned int n0,
+                                const unsigned int n1,
+                                const unsigned int t)
+{
+  unsigned int i;
+  unsigned int mode;
+  double P;
+  double tmp;
+  double relerr;
+
+  if( t > (n0+n1))
+    {
+      GSLEXTRAS_CDF_ERROR("t larger than population size",GSL_EDOM);
+    }
+  else if( k >= n0 || k >= t)
+    {
+      P = 1.0;
+    }
+  else if (k < 0.0)
+    {
+      P = 0.0;
+    }
+  else
+    {
+      P = 0.0;
+      mode = (int) t*n0 / (n0+n1);
+      relerr = 1.0;
+      if( k < mode )
+       {
+         i = k;
+         relerr = 1.0;
+         while(i != UINT_MAX && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i--;
+           }
+       }
+      else
+       {
+         i = mode;
+         relerr = 1.0;
+         while(i <= k && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i++;
+           }
+         i = mode - 1;
+         relerr = 1.0;
+         while( i != UINT_MAX && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i--;
+           }
+       }
+      /*
+       * Hack to get rid of a pesky error when the sum
+       * gets slightly above 1.0.
+       */
+      P = GSL_MIN_DBL (P, 1.0);
+    }
+  return P;
+}
+
+/*
+ * Pr (X > k)
+ */
+double
+gslextras_cdf_hypergeometric_Q (const unsigned int k, 
+                                const unsigned int n0,
+                                const unsigned int n1,
+                                const unsigned int t)
+{
+  unsigned int i;
+  unsigned int mode;
+  double P;
+  double relerr;
+  double tmp;
+
+  if( t > (n0+n1))
+    {
+      GSLEXTRAS_CDF_ERROR("t larger than population size",GSL_EDOM);
+    }
+  else if( k >= n0 || k >= t)
+    {
+      P = 0.0;
+    }
+  else if (k < 0.0)
+    {
+      P = 1.0;
+    }
+  else
+    {
+      P = 0.0;
+      mode = (int) t*n0 / (n0+n1);
+      relerr = 1.0;
+      
+      if(k < mode)
+       {
+         i = mode;
+         while( i <= t && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i++;
+           }
+         i = mode - 1;
+         relerr = 1.0;
+         while ( i > k && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i--;
+           }
+       }
+      else
+       {
+         i = k+1;
+         while(i <= t && relerr > GSL_DBL_EPSILON && P < 1.0)
+           {
+             tmp = gsl_ran_hypergeometric_pdf(i, n0, n1, t);
+             P += tmp;
+             relerr = tmp / P;
+             i++;
+           }
+       }
+      /*
+       * Hack to get rid of a pesky error when the sum
+       * gets slightly above 1.0.
+       */
+      P = GSL_MIN_DBL(P, 1.0);
+    }
+  return P;
+}
diff --git a/lib/gsl-extras/negbinom.c b/lib/gsl-extras/negbinom.c

new file mode 100644 (file)

index 0000000..0fc4689
--- /dev/null
+++ b/lib/gsl-extras/negbinom.c
@@ -0,0 +1,91 @@
+/* cdf/negbinom.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_cdf.h>
+#include "gsl-extras.h"
+
+/*
+ * Pr(X <= n) for a negative binomial random variable X, i.e.,
+ * the probability of n or fewer failuers before success k.
+ */
+double
+gslextras_cdf_negative_binomial_P(const long n, const long k, const double p)
+{
+  double P;
+  double a;
+  double b;
+
+  if(p > 1.0 || p < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if ( k < 0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("k < 0",GSL_EDOM);
+    }
+  if ( n < 0 )
+    {
+      P = 0.0;
+    }
+  else
+    {
+      a = (double) k;
+      b = (double) n+1;
+      P = gsl_cdf_beta_P(p, a, b);
+    }
+
+  return P;
+}
+/*
+ * Pr ( X > n ).
+ */
+double
+gslextras_cdf_negative_binomial_Q(const long n, const long k, const double p)
+{
+  double P;
+  double a;
+  double b;
+
+  if(p > 1.0 || p < 0.0)
+    {
+      GSLEXTRAS_CDF_ERROR("p < 0 or p > 1",GSL_EDOM);
+    }
+  if ( k < 0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("k < 0",GSL_EDOM);
+    }
+  if ( n < 0 )
+    {
+      P = 1.0;
+    }
+  else
+    {
+      a = (double) k;
+      b = (double) n+1;
+      P = gsl_cdf_beta_Q(p, a, b);
+    }
+
+  return P;
+}
+
diff --git a/lib/gsl-extras/poisson.c b/lib/gsl-extras/poisson.c

new file mode 100644 (file)

index 0000000..fffa993
--- /dev/null
+++ b/lib/gsl-extras/poisson.c
@@ -0,0 +1,91 @@
+/* cdf/poisson.c
+ *
+ * Copyright (C) 2004 Jason H. Stover.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307, USA.
+ */
+/*
+ * Computes the cumulative distribution function for a Poisson
+ * random variable. For a Poisson random variable X with parameter
+ * lambda,
+ *
+ *          Pr( X <= k ) = Pr( Y >= p )
+ *
+ * where Y is a gamma random variable with parameters k+1 and 1.
+ *
+ * Reference: 
+ * 
+ * W. Feller, "An Introduction to Probability and Its
+ * Applications," volume 1. Wiley, 1968. Exercise 46, page 173,
+ * chapter 6.
+ */
+#include <config.h>
+#include <math.h>
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_cdf.h>
+#include "gsl-extras.h"
+#include "gsl-extras.h"
+
+/*
+ * Pr (X <= k) for a Poisson random variable X.
+ */
+double
+gslextras_cdf_poisson_P (const long k, const double lambda)
+{
+  double P;
+  double a;
+  
+  if ( lambda <= 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("lambda <= 0", GSL_EDOM);
+    }
+  if ( k < 0 )
+    {
+      P = 0.0;
+    }
+  else
+    {
+      a = (double) k+1;
+      P = gsl_cdf_gamma_Q ( lambda, a, 1.0);
+    }
+  return P;
+}
+
+/*
+ * Pr ( X > k ) for a Possion random variable X.
+ */
+double
+gslextras_cdf_poisson_Q (const long k, const double lambda)
+{
+  double P;
+  double a;
+  
+  if ( lambda <= 0.0 )
+    {
+      GSLEXTRAS_CDF_ERROR ("lambda <= 0", GSL_EDOM);
+    }
+  if ( k < 0 )
+    {
+      P = 1.0;
+    }
+  else
+    {
+      a = (double) k+1;
+      P = gsl_cdf_gamma_P ( lambda, a, 1.0);
+    }
+  return P;
+}
+
diff --git a/po/en_GB.po b/po/en_GB.po

index 3e2113465d720c729d6dc27763aa216b1ba88142..7ac7cab5f7c1251be08ac0fb5aa469b26cc36aac 100644 (file)
--- a/po/en_GB.po
+++ b/po/en_GB.po
@@ -7,7 +7,7 @@ msgid ""
  msgstr ""
  "Project-Id-Version: PSPP 0.3.1\n"
  "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n"
-"POT-Creation-Date: 2005-03-08 12:50+0800\n"
+"POT-Creation-Date: 2005-03-09 09:46-0800\n"
  "PO-Revision-Date: 2004-01-23 13:04+0800\n"
  "Last-Translator: John Darrington <john@darrington.wattle.id.au>\n"
  "Language-Team: John Darrington <john@darrington.wattle.id.au>\n"
@@ -1209,25 +1209,6 @@ msgstr ""
  msgid "installation error"
  msgstr ""
  
-#: src/filename.c:221
-#, c-format
-msgid "Searching for `%s'..."
-msgstr ""
-
-#: src/filename.c:229 src/filename.c:261
-msgid "Search unsuccessful!"
-msgstr ""
-
-#: src/filename.c:254
-#, c-format
-msgid "Found `%s'."
-msgstr ""
-
-#: src/filename.c:686
-#, c-format
-msgid "Not opening pipe file `%s' because SAFER option set."
-msgstr ""
-
  #: src/file-type.c:129
  msgid "MIXED, GROUPED, or NESTED expected."
  msgstr ""
@@ -1366,6 +1347,25 @@ msgstr ""
  msgid "Unknown record type %g."
  msgstr ""
  
+#: src/filename.c:221
+#, c-format
+msgid "Searching for `%s'..."
+msgstr ""
+
+#: src/filename.c:229 src/filename.c:261
+msgid "Search unsuccessful!"
+msgstr ""
+
+#: src/filename.c:254
+#, c-format
+msgid "Found `%s'."
+msgstr ""
+
+#: src/filename.c:686
+#, c-format
+msgid "Not opening pipe file `%s' because SAFER option set."
+msgstr ""
+
  #: src/flip.c:81
  msgid ""
  "FLIP ignores TEMPORARY.  Temporary transformations will be made permanent."
diff --git a/po/pspp.pot b/po/pspp.pot

index c67a0aa8e68e074c11f503526e29603318f31db8..cc19a3dca3268cc8c810b455e85b0ec29ef058d6 100644 (file)
--- a/po/pspp.pot
+++ b/po/pspp.pot
@@ -8,7 +8,7 @@ msgid ""
  msgstr ""
  "Project-Id-Version: PACKAGE VERSION\n"
  "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n"
-"POT-Creation-Date: 2005-03-08 12:50+0800\n"
+"POT-Creation-Date: 2005-03-09 09:46-0800\n"
  "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
  "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
  "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -1210,25 +1210,6 @@ msgstr ""
  msgid "installation error"
  msgstr ""
  
-#: src/filename.c:221
-#, c-format
-msgid "Searching for `%s'..."
-msgstr ""
-
-#: src/filename.c:229 src/filename.c:261
-msgid "Search unsuccessful!"
-msgstr ""
-
-#: src/filename.c:254
-#, c-format
-msgid "Found `%s'."
-msgstr ""
-
-#: src/filename.c:686
-#, c-format
-msgid "Not opening pipe file `%s' because SAFER option set."
-msgstr ""
-
  #: src/file-type.c:129
  msgid "MIXED, GROUPED, or NESTED expected."
  msgstr ""
@@ -1367,6 +1348,25 @@ msgstr ""
  msgid "Unknown record type %g."
  msgstr ""
  
+#: src/filename.c:221
+#, c-format
+msgid "Searching for `%s'..."
+msgstr ""
+
+#: src/filename.c:229 src/filename.c:261
+msgid "Search unsuccessful!"
+msgstr ""
+
+#: src/filename.c:254
+#, c-format
+msgid "Found `%s'."
+msgstr ""
+
+#: src/filename.c:686
+#, c-format
+msgid "Not opening pipe file `%s' because SAFER option set."
+msgstr ""
+
  #: src/flip.c:81
  msgid ""
  "FLIP ignores TEMPORARY.  Temporary transformations will be made permanent."
diff --git a/src/ChangeLog b/src/ChangeLog

index 4cdd2acb109484c647f563e1904e107f96159905..a0eb80bb225945e4551faeb92d41f366c79a9033 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,17 @@
+Wed Mar  9 09:54:27 2005  Ben Pfaff  <blp@gnu.org>
+
+       * Makefile.am: (pspp_LDADD) Add libgsl-extras.a.
+
+       * expressions/helpers.c: (struct func_params) Removed.
+       (generalized_idf) Removed.
+       (cdf_beta) Removed.
+       (idf_beta) Removed.
+       (idf_fdist) Use gslextras_cdf_beta_Pinv() instead of idf_beta().
+
+       * expressions/operations.def: Implement IDF.BETA, CDF.BINOM,
+       CDF.GEOM, CDF.HYPER, CDF.NEGBIN, CDF.POISSON using gsl-extras.
+       Implement SIG.F, which I had overlooked previously.
+
  Tue Mar  8 12:47:53 WST 2005 John Darrington <john@darrington.wattle.id.au>
  
         * command.c command.def glob.[ch] cmdline.c: Made DEBUG cmds
@@ -64,22 +78,23 @@ Sun Mar  6 17:07:20 2005  Ben Pfaff  <blp@gnu.org>
  
  Sun Mar  6 22:09:20 2005  Ben Pfaff  <blp@gnu.org>
  
-       * operations.def: (NUMBER) Use DI_IMPLIED_DECIMALS.
+       * expressions/operations.def: (NUMBER) Use DI_IMPLIED_DECIMALS.
  
  Sun Mar  6 19:33:24 2005  Ben Pfaff  <blp@gnu.org>
  
-       * operations.def: (VEC_ELEM_NUM) Treat user-missing values as
-       system-missing.
+       * expressions/operations.def: (VEC_ELEM_NUM) Treat user-missing
+       values as system-missing.
  
-       * parse.c: (parse_vector_element) Fix order of arguments in call
-       to expr_allocate_binary().
+       * expressions/parse.c: (parse_vector_element) Fix order of
+       arguments in call to expr_allocate_binary().
  
  Sun Mar  6 17:51:05 2005  Ben Pfaff  <blp@gnu.org>
  
-       * optimize.c: (optimize_tree) Fix optimization bug for x**2.
+       * expressions/optimize.c: (optimize_tree) Fix optimization bug for
+       x**2.
  
-       * parse.c: (type_coercion_core) Set *node to NULL on failure, as
-       indicated by function comment.
+       * expressions/parse.c: (type_coercion_core) Set *node to NULL on
+       failure, as indicated by function comment.
         (parse_binary_operators) Always return NULL on type_coercion()
         failure.  Should have been doing this anyway, but bug in
         type_coercion_core() filtered through.
@@ -88,29 +103,31 @@ Sun Mar  6 17:51:05 2005  Ben Pfaff  <blp@gnu.org>
  
  Sun Mar  6 10:47:13 2005  Ben Pfaff  <blp@gnu.org>
  
-       * operations.def: Add VALUE function.
+       * expressions/operations.def: Add VALUE function.
  
-       * parse.c: (parse_function) Need an unary composite node for
-       variables in A TO B, not a variable node.  Use
+       * expressions/parse.c: (parse_function) Need an unary composite
+       node for variables in A TO B, not a variable node.  Use
         allocate_unary_variable().
         (parse_primary) Use allocate_unary_variable().
         (allocate_unary_variable) New function.
  
  Thu Mar  3 23:53:32 2005  Ben Pfaff  <blp@gnu.org>
  
-       * PSPP_expressions.pm: Renamed it back to generate.pl but fixed
-       the real problem that was preventing the build from a separate
-       directory.  I liked it my way better ;-)
+       * expressions/PSPP_expressions.pm: Renamed it back to generate.pl
+       but fixed the real problem that was preventing the build from a
+       separate directory.  I liked it my way better ;-)
         
  Thu Mar  3 23:17:51 2005  Ben Pfaff  <blp@gnu.org>
  
-       * parse.c: (expr_parse) Fix parameter type.  Thanks to John
-       Darrington <john@darrington.wattle.id.au> for reporting this bug.
+       * expressions/parse.c: (expr_parse) Fix parameter type.  Thanks to
+       John Darrington <john@darrington.wattle.id.au> for reporting this
+       bug.
  
  Thu Mar  3 22:10:25 WST 2005 John Darrington <john@darrington.wattle.id.au>
  
-       * Makefile.am evaluate.h.pl evaluate.inc.pl operations.h.pl
-         optimize.inc.pl parse.inc.p:  
+       * expressions/Makefile.am expressions/evaluate.h.pl
+         expressions/evaluate.inc.pl expressions/operations.h.pl
+         expressions/optimize.inc.pl expressions/parse.inc.p:
  
           Renamed generate.pl to PSPP_expressions.pm and adjusted *.pl
           to suit. 
diff --git a/src/Makefile.am b/src/Makefile.am

index 1dac4987485ca6f1c91cddf8a6d1f0948f8c9f1d..6f03ed9115be0292c2ee2c65c6becc857a86a3bc 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -76,6 +76,7 @@ vfm.c vfm.h vfmP.h weight.c
  pspp_LDADD = \
         ../lib/misc/libmisc.a                   \
         expressions/libexpressions.a            \
+       ../lib/gsl-extras/libgsl-extras.a       \
         -lplot \
         @LIBINTL@
  
diff --git a/src/expressions/helpers.c b/src/expressions/helpers.c

index 879b95f493035755e40163752190158814ad12db..58c4a0361be81529b60d8b7a14ef4e3cba2ab581 100644 (file)
--- a/src/expressions/helpers.c
+++ b/src/expressions/helpers.c
@@ -165,84 +165,6 @@ copy_string (struct expression *e, const char *old, size_t length)
    return s;
  }
  
-struct func_params 
-  {
-    double Ptarget;
-    double a;
-    double b;
-  };
-
-static double
-generalized_idf (double P, double a, double b, double (*cdf) (double, void *)) 
-{
-  struct func_params params;
-  gsl_function f;
-  gsl_root_fsolver *fsolver;
-  int iter;
-  int status;
-  double root;
-
-  if (P < 0 || P > 1)
-    return SYSMIS;
-
-  params.Ptarget = P;
-  params.a = a;
-  params.b = b;
-  
-  f.function = cdf;
-  f.params = &params;
-
-  fsolver = gsl_root_fsolver_alloc (gsl_root_fsolver_brent);
-  gsl_root_fsolver_set (fsolver, &f, 0, 1);
-
-  iter = 0;
-  do 
-    {
-      double x_lower, x_upper;
-      
-      status = gsl_root_fsolver_iterate (fsolver);
-      if (status != 0)
-        return SYSMIS;
-
-      x_lower = gsl_root_fsolver_x_lower (fsolver);
-      x_upper = gsl_root_fsolver_x_upper (fsolver);
-      status = gsl_root_test_interval (x_lower, x_upper, 0, 2 * DBL_EPSILON);
-    }
-  while (status == GSL_CONTINUE && ++iter < 100);
-
-  root = gsl_root_fsolver_root (fsolver);
-  gsl_root_fsolver_free (fsolver);
-  return root;
-}
-
-static double
-cdf_beta (double x, void *params_) 
-{
-  struct func_params *params = params_;
-
-  return gsl_cdf_beta_P (x, params->a, params->b) - params->Ptarget;
-}
-
-double
-idf_beta (double P, double a, double b) 
-{
-#if 1
-  return generalized_idf (P, a, b, cdf_beta);
-#else
-  double x = a / (a + b);
-  double dx = 1.;
-  while (fabs (dx) > 2 * DBL_EPSILON) 
-    {
-      dx = (gsl_sf_beta_inc (a, b, x) - P) / gsl_ran_beta_pdf (x, a, b);
-      x -= dx;
-      if (x < 0)
-        x += (dx - x) / 2;
-    }
-
-  return x;
-#endif
-}
-
  /* Returns the noncentral beta cumulative distribution function
     value for the given arguments.
  
@@ -390,7 +312,7 @@ cdf_bvnor (double x0, double x1, double r)
  double
  idf_fdist (double P, double df1, double df2) 
  {
-  double temp = idf_beta (P, df1 / 2, df2 / 2);
+  double temp = gslextras_cdf_beta_Pinv (P, df1 / 2, df2 / 2);
    return temp * df2 / ((1. - temp) * df1);
  }
  
diff --git a/src/expressions/helpers.h b/src/expressions/helpers.h

index 32e01dd3a3aa75d04a8d9c5801b0f2861b4e299a..6e51fdf4aefaf98ffb09e989034169eccad8bdb3 100644 (file)
--- a/src/expressions/helpers.h
+++ b/src/expressions/helpers.h
@@ -14,6 +14,7 @@
  #include "dictionary.h"
  #include "error.h"
  #include "calendar.h"
+#include "gsl-extras/gsl-extras.h"
  #include "misc.h"
  #include "moments.h"
  #include "settings.h"
diff --git a/src/expressions/operations.def b/src/expressions/operations.def

index e1998266154f263b4c96032f60c78485d14f359d..0d21c16a65e10bf06229fb115fc794f2d3d03118 100644 (file)
--- a/src/expressions/operations.def
+++ b/src/expressions/operations.def
@@ -617,7 +617,8 @@ operator BOOLEAN_TO_NUM (boolean x) = x;
  function PDF.BETA (x >= 0 && x <= 1, a > 0, b > 0)
       = gsl_ran_beta_pdf (x, a, b);
  function CDF.BETA (x >= 0 && x <= 1, a > 0, b > 0) = gsl_cdf_beta_P (x, a, b);
-function IDF.BETA (P >= 0 && P <= 1, a > 0, b > 0) = idf_beta (P, a, b);
+function IDF.BETA (P >= 0 && P <= 1, a > 0, b > 0)
+     = gslextras_cdf_beta_Pinv (P, a, b);
  no_opt function RV.BETA (a > 0, b > 0) = gsl_ran_beta (get_rng (), a, b);
  function NCDF.BETA (x >= 0, a > 0, b > 0, lambda > 0)
       = ncdf_beta (x, a, b, lambda);
@@ -665,7 +666,7 @@ function PDF.F (x >= 0, df1 > 0, df2 > 0) = gsl_ran_fdist_pdf (x, df1, df2);
  no_opt function RV.F (df1 > 0, df2 > 0) = gsl_ran_fdist (get_rng (), df1, df2);
  function NCDF.F (x >= 0, df1 > 0, df2 > 0, lambda >= 0) = unimplemented;
  function NPDF.F (x >= 0, df1 > 0, df2 > 0, lmabda >= 0) = unimplemented;
-function SIG.F (x >= 0, df1 > 0, df2 > 0) = unimplemented;
+function SIG.F (x >= 0, df1 > 0, df2 > 0) = gsl_cdf_fdist_Q (x, df1, df2);
  
  // Gamma distribution.
  function CDF.GAMMA (x >= 0, a > 0, b > 0) = gsl_cdf_gamma_P (x, a, 1. / b);
@@ -820,7 +821,7 @@ no_opt function RV.BERNOULLI (p >= 0 && p <= 1)
  
  // Binomial distribution.
  function CDF.BINOM (k, n > 0 && n == floor (n), p >= 0 && p <= 1)
-     = unimplemented;
+     = gslextras_cdf_binomial_P (k, p, n);
  function PDF.BINOM (k >= 0 && k == floor (k) && k <= n,
                      n > 0 && n == floor (n),
                      p >= 0 && p <= 1)
@@ -829,7 +830,8 @@ no_opt function RV.BINOM (p > 0 && p == floor (p), n >= 0 && n <= 1)
       = gsl_ran_binomial (get_rng (), p, n);
  
  // Geometric distribution.
-function CDF.GEOM (k >= 1 && k == floor (k), p >= 0 && p <= 1) = unimplemented;
+function CDF.GEOM (k >= 1 && k == floor (k), p >= 0 && p <= 1)
+     = gslextras_cdf_geometric_P (k, p);
  function PDF.GEOM (k >= 1 && k == floor (k),
                     p >= 0 && p <= 1)
       = gsl_ran_geometric_pdf (k, p);
@@ -840,7 +842,7 @@ function CDF.HYPER (k >= 0 && k == floor (k) && k <= c,
                      a > 0 && a == floor (a),
                      b > 0 && b == floor (b) && b <= a,
                      c > 0 && c == floor (c) && c <= a)
-     = unimplemented;
+     = gslextras_cdf_hypergeometric_P (k, c, a - c, b);
  function PDF.HYPER (k >= 0 && k == floor (k) && k <= c,
                      a > 0 && a == floor (a),
                      b > 0 && b == floor (b) && b <= a,
@@ -858,14 +860,16 @@ no_opt extension function RV.LOG (p > 0 && p <= 1)
       = gsl_ran_logarithmic (get_rng (), p);
  
  // Negative binomial distribution.
-function CDF.NEGBIN (k >= 1, n == floor (n), p > 0 && p <= 1) = unimplemented;
+function CDF.NEGBIN (k >= 1, n == floor (n), p > 0 && p <= 1)
+     = gslextras_cdf_negative_binomial_P (k, p, n);
  function PDF.NEGBIN (k >= 1, n == floor (n), p > 0 && p <= 1)
       = gsl_ran_negative_binomial_pdf (k, p, n);
  no_opt function RV.NEGBIN (n == floor (n), p > 0 && p <= 1) 
       = gsl_ran_negative_binomial (get_rng (), p, n);
  
  // Poisson distribution.
-function CDF.POISSON (k >= 0 && k == floor (k), mu > 0) = unimplemented;
+function CDF.POISSON (k >= 0 && k == floor (k), mu > 0)
+     = gslextras_cdf_poisson_P (k, mu);
  function PDF.POISSON (k >= 0 && k == floor (k), mu > 0)
       = gsl_ran_poisson_pdf (k, mu);
  no_opt function RV.POISSON (mu > 0) = gsl_ran_poisson (get_rng (), mu);
diff --git a/tests/expressions/randist/beta.out b/tests/expressions/randist/beta.out

index ea138295dc13ad851790931594efd91229b18b7b..e19d5f1ec61bd68ad63b6853bc29ce34642d4a69 100644 (file)
--- a/tests/expressions/randist/beta.out
+++ b/tests/expressions/randist/beta.out
@@ -208,21 +208,21 @@
       .90     1.50     2.00      .76      .90      .79 
       .90     1.50     2.50      .68      .90      .75 
       .90     1.50     3.00      .62      .90      .74 
-     .99      .75      .25     1.00      .99 164255.7 
+     .99      .75      .25     1.00      .98 20860.76 
       .99      .75      .50     1.00      .99    34.83 
       .99      .75     1.00      .99      .99      .75 
       .99      .75     1.50      .94      .99      .26 
       .99      .75     2.00      .88      .99      .17 
       .99      .75     2.50      .81      .99      .13 
       .99      .75     3.00      .75      .99      .12 
-     .99     1.00      .25     1.00      .99 250000.0 
+     .99     1.00      .25     1.00      .98 46067.00 
       .99     1.00      .50     1.00      .99    50.00 
       .99     1.00     1.00      .99      .99     1.00 
       .99     1.00     1.50      .95      .99      .32 
       .99     1.00     2.00      .90      .99      .20 
       .99     1.00     2.50      .84      .99      .16 
       .99     1.00     3.00      .78      .99      .14 
-     .99     1.50      .25     1.00      .99 428406.6 
+     .99     1.50      .25     1.00      .98 67836.74 
       .99     1.50      .50     1.00      .99    81.05 
       .99     1.50     1.00      .99      .99     1.49 
       .99     1.50     1.50      .97      .99      .45 
diff --git a/tests/expressions/randist/f.out b/tests/expressions/randist/f.out

index ab76a257c36074c1f2848242d714a159733a4681..2e014815dced6e2736b27bc2f437d8f9680bf0a9 100644 (file)
--- a/tests/expressions/randist/f.out
+++ b/tests/expressions/randist/f.out
@@ -169,7 +169,7 @@
       .99     5.00     1.00  5763.65      .99      .00 
       .99     5.00     2.00    99.30      .99      .00 
       .99     5.00     5.00    10.97      .99      .00 
-     .99     5.00    10.00     5.64      .99      .01 
+     .99     5.00    10.00    10.00     1.00      .00 
       .99    10.00     1.00  6055.85      .99      .00 
       .99    10.00     2.00    99.40      .99      .00 
       .99    10.00     5.00    10.05      .99      .00
author	Ben Pfaff <blp@gnu.org>
	Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)
committer	Ben Pfaff <blp@gnu.org>
	Wed, 9 Mar 2005 18:02:15 +0000 (18:02 +0000)
configure.ac		patch \| blob \| history
lib/ChangeLog		patch \| blob \| history
lib/Makefile.am		patch \| blob \| history
lib/gsl-extras/Makefile.am	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/betadistinv.c	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/binomial.c	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/geometric.c	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/gsl-extras.h	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/hypergeometric.c	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/negbinom.c	[new file with mode: 0644]	patch \| blob
lib/gsl-extras/poisson.c	[new file with mode: 0644]	patch \| blob
po/en_GB.po		patch \| blob \| history
po/pspp.pot		patch \| blob \| history
src/ChangeLog		patch \| blob \| history
src/Makefile.am		patch \| blob \| history
src/expressions/helpers.c		patch \| blob \| history
src/expressions/helpers.h		patch \| blob \| history
src/expressions/operations.def		patch \| blob \| history
tests/expressions/randist/beta.out		patch \| blob \| history
tests/expressions/randist/f.out		patch \| blob \| history