src/expressions/helpers.c

   1 #include <config.h>
   2 #include "helpers.h"
   3 #include <gsl/gsl_roots.h>
   4 #include <gsl/gsl_sf.h>
   5 #include "pool.h"
   6 #include "private.h"
   7
   8 const struct fixed_string empty_string = {NULL, 0};
   9
  10 static void
  11 expr_error (void *aux UNUSED, const char *format, ...)
  12 {
  13   struct error e;
  14   va_list args;
  15
  16   /* FIXME: we can do better about saying where the error
  17      occurred. */
  18   e.class = SE;
  19   err_location (&e.where);
  20   e.title = NULL;
  21
  22   va_start (args, format);
  23   err_vmsg (&e, format, args);
  24   va_end (args);
  25 }
  26
  27 double
  28 expr_ymd_to_ofs (double year, double month, double day)
  29 {
  30   int y = year;
  31   int m = month;
  32   int d = day;
  33
  34   if (y != year || m != month || d != day)
  35     {
  36       msg (SE, _("One of the arguments to a DATE function is not an integer.  "
  37                  "The result will be system-missing."));
  38       return SYSMIS;
  39     }
  40
  41   return calendar_gregorian_to_offset (y, m, d, expr_error, NULL);
  42 }
  43
  44 double
  45 expr_ymd_to_date (double year, double month, double day)
  46 {
  47   double ofs = expr_ymd_to_ofs (year, month, day);
  48   return ofs != SYSMIS ? ofs * DAY_S : SYSMIS;
  49 }
  50
  51 double
  52 expr_wkyr_to_date (double week, double year)
  53 {
  54   int w = week;
  55
  56   if (w != week)
  57     {
  58       msg (SE, _("The week argument to DATE.WKYR is not an integer.  "
  59                  "The result will be system-missing."));
  60       return SYSMIS;
  61     }
  62   else if (w < 1 || w > 53)
  63     {
  64       msg (SE, _("The week argument to DATE.WKYR is outside the acceptable "
  65                  "range of 1 to 53.  "
  66                  "The result will be system-missing."));
  67       return SYSMIS;
  68     }
  69   else
  70     {
  71       double yr_1_1 = expr_ymd_to_ofs (year, 1, 1);
  72       if (yr_1_1 != SYSMIS)
  73         return DAY_S * (yr_1_1 + WEEK_DAY * (w - 1));
  74       else
  75         return SYSMIS;
  76     }
  77 }
  78
  79 double
  80 expr_yrday_to_date (double year, double yday)
  81 {
  82   int yd = yday;
  83
  84   if (yd != yday)
  85     {
  86       msg (SE, _("The day argument to DATE.YRDAY is not an integer.  "
  87                  "The result will be system-missing."));
  88       return SYSMIS;
  89     }
  90   else if (yd < 1 || yd > 366)
  91     {
  92       msg (SE, _("The day argument to DATE.YRDAY is outside the acceptable "
  93                  "range of 1 to 366.  "
  94                  "The result will be system-missing."));
  95       return SYSMIS;
  96     }
  97   else
  98     {
  99       double yr_1_1 = expr_ymd_to_ofs (year, 1, 1);
 100       if (yr_1_1 != SYSMIS)
 101         return DAY_S * (yr_1_1 + yd - 1.);
 102       else
 103         return SYSMIS;
 104     }
 105 }
 106
 107 double
 108 expr_yrmoda (double year, double month, double day)
 109 {
 110   if (year >= 0 && year <= 99)
 111     year += 1900;
 112   else if (year != (int) year && year > 47516)
 113     {
 114       msg (SE, _("The year argument to YRMODA is greater than 47516.  "
 115                  "The result will be system-missing."));
 116       return SYSMIS;
 117     }
 118
 119   return expr_ymd_to_ofs (year, month, day);
 120 }
 121
 122 int
 123 compare_string (const struct fixed_string *a, const struct fixed_string *b)
 124 {
 125   size_t i;
 126
 127   for (i = 0; i < a->length && i < b->length; i++)
 128     if (a->string[i] != b->string[i])
 129       return a->string[i] < b->string[i] ? -1 : 1;
 130   for (; i < a->length; i++)
 131     if (a->string[i] != ' ')
 132       return 1;
 133   for (; i < b->length; i++)
 134     if (b->string[i] != ' ')
 135       return -1;
 136   return 0;
 137 }
 138
 139 size_t
 140 count_valid (double *d, size_t d_cnt)
 141 {
 142   size_t valid_cnt;
 143   size_t i;
 144
 145   valid_cnt = 0;
 146   for (i = 0; i < d_cnt; i++)
 147     valid_cnt += is_valid (d[i]);
 148   return valid_cnt;
 149 }
 150
 151 struct fixed_string
 152 alloc_string (struct expression *e, size_t length)
 153 {
 154   struct fixed_string s;
 155   s.length = length;
 156   s.string = pool_alloc (e->eval_pool, length);
 157   return s;
 158 }
 159
 160 struct fixed_string
 161 copy_string (struct expression *e, const char *old, size_t length)
 162 {
 163   struct fixed_string s = alloc_string (e, length);
 164   memcpy (s.string, old, length);
 165   return s;
 166 }
 167
 168 struct func_params
 169   {
 170     double Ptarget;
 171     double a;
 172     double b;
 173   };
 174
 175 static double
 176 generalized_idf (double P, double a, double b, double (*cdf) (double, void *))
 177 {
 178   struct func_params params;
 179   gsl_function f;
 180   gsl_root_fsolver *fsolver;
 181   int iter;
 182   int status;
 183   double root;
 184
 185   if (P < 0 || P > 1)
 186     return SYSMIS;
 187
 188   params.Ptarget = P;
 189   params.a = a;
 190   params.b = b;
 191
 192   f.function = cdf;
 193   f.params = &params;
 194
 195   fsolver = gsl_root_fsolver_alloc (gsl_root_fsolver_brent);
 196   gsl_root_fsolver_set (fsolver, &f, 0, 1);
 197
 198   iter = 0;
 199   do
 200     {
 201       double x_lower, x_upper;
 202
 203       status = gsl_root_fsolver_iterate (fsolver);
 204       if (status != 0)
 205         return SYSMIS;
 206
 207       x_lower = gsl_root_fsolver_x_lower (fsolver);
 208       x_upper = gsl_root_fsolver_x_upper (fsolver);
 209       status = gsl_root_test_interval (x_lower, x_upper, 0, 2 * DBL_EPSILON);
 210     }
 211   while (status == GSL_CONTINUE && ++iter < 100);
 212
 213   root = gsl_root_fsolver_root (fsolver);
 214   gsl_root_fsolver_free (fsolver);
 215   return root;
 216 }
 217
 218 static double
 219 cdf_beta (double x, void *params_)
 220 {
 221   struct func_params *params = params_;
 222
 223   return gsl_cdf_beta_P (x, params->a, params->b) - params->Ptarget;
 224 }
 225
 226 double
 227 idf_beta (double P, double a, double b)
 228 {
 229 #if 1
 230   return generalized_idf (P, a, b, cdf_beta);
 231 #else
 232   double x = a / (a + b);
 233   double dx = 1.;
 234   while (fabs (dx) > 2 * DBL_EPSILON)
 235     {
 236       dx = (gsl_sf_beta_inc (a, b, x) - P) / gsl_ran_beta_pdf (x, a, b);
 237       x -= dx;
 238       if (x < 0)
 239         x += (dx - x) / 2;
 240     }
 241
 242   return x;
 243 #endif
 244 }
 245
 246 /* Returns the noncentral beta cumulative distribution function
 247    value for the given arguments.
 248
 249    FIXME: The accuracy of this function is not entirely
 250    satisfactory.  We only match the example values given in AS
 251    310 to the first 5 significant digits. */
 252 double
 253 ncdf_beta (double x, double a, double b, double lambda)
 254 {
 255   double c;
 256
 257   if (x <= 0. || x >= 1. || a <= 0. || b <= 0. || lambda <= 0.)
 258     return SYSMIS;
 259
 260   c = lambda / 2.;
 261   if (lambda < 54.)
 262     {
 263       /* Algorithm AS 226. */
 264       double x0, a0, beta, temp, gx, q, ax, sumq, sum;
 265       double err_max = 2 * DBL_EPSILON;
 266       double err_bound;
 267       int iter_max = 100;
 268       int iter;
 269
 270       x0 = floor (c - 5.0 * sqrt (c));
 271       if (x0 < 0.)
 272         x0 = 0.;
 273       a0 = a + x0;
 274       beta = (gsl_sf_lngamma (a0)
 275               + gsl_sf_lngamma (b)
 276               - gsl_sf_lngamma (a0 + b));
 277       temp = gsl_sf_beta_inc (a0, b, x);
 278       gx = exp (a0 * log (x) + b * log (1. - x) - beta - log (a0));
 279       if (a0 >= a)
 280         q = exp (-c + x0 * log (c)) - gsl_sf_lngamma (x0 + 1.);
 281       else
 282         q = exp (-c);
 283       ax = q * temp;
 284       sumq = 1. - q;
 285       sum = ax;
 286
 287       iter = 0;
 288       do
 289         {
 290           iter++;
 291           temp -= gx;
 292           gx = x * (a + b + iter - 1.) * gx / (a + iter);
 293           q *= c / iter;
 294           sumq -= q;
 295           ax = temp * q;
 296           sum += ax;
 297
 298           err_bound = (temp - gx) * sumq;
 299         }
 300       while (iter < iter_max && err_bound > err_max);
 301
 302       return sum;
 303     }
 304   else
 305     {
 306       /* Algorithm AS 310. */
 307       double m, m_sqrt;
 308       int iter, iter_lower, iter_upper, iter1, iter2, j;
 309       double t, q, r, psum, beta, s1, gx, fx, temp, ftemp, t0, s0, sum, s;
 310       double err_bound;
 311       double err_max = 2 * DBL_EPSILON;
 312
 313       iter = 0;
 314
 315       m = floor (c + .5);
 316       m_sqrt = sqrt (m);
 317       iter_lower = m - 5. * m_sqrt;
 318       iter_upper = m + 5. * m_sqrt;
 319
 320       t = -c + m * log (c) - gsl_sf_lngamma (m + 1.);
 321       q = exp (t);
 322       r = q;
 323       psum = q;
 324       beta = (gsl_sf_lngamma (a + m)
 325               + gsl_sf_lngamma (b)
 326               - gsl_sf_lngamma (a + m + b));
 327       s1 = (a + m) * log (x) + b * log (1. - x) - log (a + m) - beta;
 328       fx = gx = exp (s1);
 329       ftemp = temp = gsl_sf_beta_inc (a + m, b, x);
 330       iter++;
 331       sum = q * temp;
 332       iter1 = m;
 333
 334       while (iter1 >= iter_lower && q >= err_max)
 335         {
 336           q = q * iter1 / c;
 337           iter++;
 338           gx = (a + iter1) / (x * (a + b + iter1 - 1.)) * gx;
 339           iter1--;
 340           temp += gx;
 341           psum += q;
 342           sum += q * temp;
 343         }
 344
 345       t0 = (gsl_sf_lngamma (a + b)
 346             - gsl_sf_lngamma (a + 1.)
 347             - gsl_sf_lngamma (b));
 348       s0 = a * log (x) + b * log (1. - x);
 349
 350       s = 0.;
 351       for (j = 0; j < iter1; j++)
 352         {
 353           double t1;
 354           s += exp (t0 + s0 + j * log (x));
 355           t1 = log (a + b + j) - log (a + 1. + j) + t0;
 356           t0 = t1;
 357         }
 358
 359       err_bound = (1. - gsl_sf_gamma_inc_P (iter1, c)) * (temp + s);
 360       q = r;
 361       temp = ftemp;
 362       gx = fx;
 363       iter2 = m;
 364       for (;;)
 365         {
 366           double ebd = err_bound + (1. - psum) * temp;
 367           if (ebd < err_max || iter >= iter_upper)
 368             break;
 369
 370           iter2++;
 371           iter++;
 372           q = q * c / iter2;
 373           psum += q;
 374           temp -= gx;
 375           gx = x * (a + b + iter2 - 1.) / (a + iter2) * gx;
 376           sum += q * temp;
 377         }
 378
 379       return sum;
 380     }
 381 }
 382
 383 double
 384 cdf_bvnor (double x0, double x1, double r)
 385 {
 386   double z = x0 * x0 - 2. * r * x0 * x1 + x1 * x1;
 387   return exp (-z / (2. * (1 - r * r))) * (2. * M_PI * sqrt (1 - r * r));
 388 }
 389
 390 double
 391 idf_fdist (double P, double df1, double df2)
 392 {
 393   double temp = idf_beta (P, df1 / 2, df2 / 2);
 394   return temp * df2 / ((1. - temp) * df1);
 395 }
 396
 397 /*
 398  *  Mathlib : A C Library of Special Functions
 399  *  Copyright (C) 1998 Ross Ihaka
 400  *  Copyright (C) 2000 The R Development Core Team
 401  *
 402  *  This program is free software; you can redistribute it and/or
 403  *  modify
 404  *  it under the terms of the GNU General Public License as
 405  *  published by
 406  *  the Free Software Foundation; either version 2 of the
 407  *  License, or
 408  *  (at your option) any later version.
 409  *
 410  *  This program is distributed in the hope that it will be
 411  *  useful,
 412  *  but WITHOUT ANY WARRANTY; without even the implied warranty
 413  *  of
 414  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 415  *  GNU General Public License for more details.
 416  *
 417  *  You should have received a copy of the GNU General Public
 418  *  License
 419  *  along with this program; if not, write to the Free Software
 420  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
 421  *  02111-1307 USA.
 422  */
 423
 424 /* Returns the density of the noncentral beta distribution with
 425    noncentrality parameter LAMBDA. */
 426 double
 427 npdf_beta (double x, double a, double b, double lambda)
 428 {
 429   if (lambda < 0. || a <= 0. || b <= 0.)
 430     return SYSMIS;
 431   else if (lambda == 0.)
 432     return gsl_ran_beta_pdf (x, a, b);
 433   else
 434     {
 435       double max_error = 2 * DBL_EPSILON;
 436       int max_iter = 200;
 437       double term = gsl_ran_beta_pdf (x, a, b);
 438       double lambda2 = 0.5 * lambda;
 439       double weight = exp (-lambda2);
 440       double sum = weight * term;
 441       double psum = weight;
 442       int k;
 443       for (k = 1; k <= max_iter && 1 - psum < max_error; k++) {
 444         weight *= lambda2 / k;
 445         term *= x * (a + b) / a;
 446         sum += weight * term;
 447         psum += weight;
 448         a += 1;
 449       }
 450       return sum;
 451     }
 452 }