pintos-os.org Git - pspp/blob - src/language/expressions/helpers.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2008 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18 #include "helpers.h"
  19 #include <gsl/gsl_roots.h>
  20 #include <gsl/gsl_sf.h>
  21 #include <libpspp/assertion.h>
  22 #include <libpspp/pool.h>
  23 #include "private.h"
  24
  25 const struct substring empty_string = {NULL, 0};
  26
  27 static void
  28 expr_error (void *aux UNUSED, const char *format, ...)
  29 {
  30   struct msg m;
  31   va_list args;
  32
  33   m.category = MSG_C_SYNTAX;
  34   m.severity = MSG_S_ERROR;
  35   va_start (args, format);
  36   m.text = xvasprintf (format, args);
  37   va_end (args);
  38
  39   msg_emit (&m);
  40 }
  41
  42 double
  43 expr_ymd_to_ofs (double year, double month, double day)
  44 {
  45   int y = year;
  46   int m = month;
  47   int d = day;
  48
  49   if (y != year || m != month || d != day)
  50     {
  51       msg (SE, _("One of the arguments to a DATE function is not an integer.  "
  52                  "The result will be system-missing."));
  53       return SYSMIS;
  54     }
  55
  56   return calendar_gregorian_to_offset (y, m, d, expr_error, NULL);
  57 }
  58
  59 double
  60 expr_ymd_to_date (double year, double month, double day)
  61 {
  62   double ofs = expr_ymd_to_ofs (year, month, day);
  63   return ofs != SYSMIS ? ofs * DAY_S : SYSMIS;
  64 }
  65
  66 double
  67 expr_wkyr_to_date (double week, double year)
  68 {
  69   int w = week;
  70
  71   if (w != week)
  72     {
  73       msg (SE, _("The week argument to DATE.WKYR is not an integer.  "
  74                  "The result will be system-missing."));
  75       return SYSMIS;
  76     }
  77   else if (w < 1 || w > 53)
  78     {
  79       msg (SE, _("The week argument to DATE.WKYR is outside the acceptable "
  80                  "range of 1 to 53.  "
  81                  "The result will be system-missing."));
  82       return SYSMIS;
  83     }
  84   else
  85     {
  86       double yr_1_1 = expr_ymd_to_ofs (year, 1, 1);
  87       if (yr_1_1 != SYSMIS)
  88         return DAY_S * (yr_1_1 + WEEK_DAY * (w - 1));
  89       else
  90         return SYSMIS;
  91     }
  92 }
  93
  94 double
  95 expr_yrday_to_date (double year, double yday)
  96 {
  97   int yd = yday;
  98
  99   if (yd != yday)
 100     {
 101       msg (SE, _("The day argument to DATE.YRDAY is not an integer.  "
 102                  "The result will be system-missing."));
 103       return SYSMIS;
 104     }
 105   else if (yd < 1 || yd > 366)
 106     {
 107       msg (SE, _("The day argument to DATE.YRDAY is outside the acceptable "
 108                  "range of 1 to 366.  "
 109                  "The result will be system-missing."));
 110       return SYSMIS;
 111     }
 112   else
 113     {
 114       double yr_1_1 = expr_ymd_to_ofs (year, 1, 1);
 115       if (yr_1_1 != SYSMIS)
 116         return DAY_S * (yr_1_1 + yd - 1.);
 117       else
 118         return SYSMIS;
 119     }
 120 }
 121
 122 double
 123 expr_yrmoda (double year, double month, double day)
 124 {
 125   if (year >= 0 && year <= 99)
 126     year += 1900;
 127   else if (year != (int) year && year > 47516)
 128     {
 129       msg (SE, _("The year argument to YRMODA is greater than 47516.  "
 130                  "The result will be system-missing."));
 131       return SYSMIS;
 132     }
 133
 134   return expr_ymd_to_ofs (year, month, day);
 135 }
 136 \f
 137 /* A date unit. */
 138 enum date_unit
 139   {
 140     DATE_YEARS,
 141     DATE_QUARTERS,
 142     DATE_MONTHS,
 143     DATE_WEEKS,
 144     DATE_DAYS,
 145     DATE_HOURS,
 146     DATE_MINUTES,
 147     DATE_SECONDS
 148   };
 149
 150 /* Stores in *UNIT the unit whose name is NAME.
 151    Return success. */
 152 static enum date_unit
 153 recognize_unit (struct substring name, enum date_unit *unit)
 154 {
 155   struct unit_name
 156     {
 157       enum date_unit unit;
 158       const struct substring name;
 159     };
 160   static const struct unit_name unit_names[] =
 161     {
 162       { DATE_YEARS, SS_LITERAL_INITIALIZER ("years") },
 163       { DATE_QUARTERS, SS_LITERAL_INITIALIZER ("quarters") },
 164       { DATE_MONTHS, SS_LITERAL_INITIALIZER ("months") },
 165       { DATE_WEEKS, SS_LITERAL_INITIALIZER ("weeks") },
 166       { DATE_DAYS, SS_LITERAL_INITIALIZER ("days") },
 167       { DATE_HOURS, SS_LITERAL_INITIALIZER ("hours") },
 168       { DATE_MINUTES, SS_LITERAL_INITIALIZER ("minutes") },
 169       { DATE_SECONDS, SS_LITERAL_INITIALIZER ("seconds") },
 170     };
 171   const int unit_name_cnt = sizeof unit_names / sizeof *unit_names;
 172
 173   const struct unit_name *un;
 174
 175   for (un = unit_names; un < &unit_names[unit_name_cnt]; un++)
 176     if (ss_equals_case (un->name, name))
 177       {
 178         *unit = un->unit;
 179         return true;
 180       }
 181
 182   /* TRANSLATORS: Don't translate the the actual unit names `weeks', `days' etc
 183         They must remain in their original English. */
 184   msg (SE, _("Unrecognized date unit `%.*s'.  "
 185              "Valid date units are `years', `quarters', `months', "
 186              "`weeks', `days', `hours', `minutes', and `seconds'."),
 187        (int) ss_length (name), ss_data (name));
 188   return false;
 189 }
 190
 191 /* Returns the number of whole years from DATE1 to DATE2,
 192    where a year is defined as the same or later month, day, and
 193    time of day. */
 194 static int
 195 year_diff (double date1, double date2)
 196 {
 197   int y1, m1, d1, yd1;
 198   int y2, m2, d2, yd2;
 199   int diff;
 200
 201   assert (date2 >= date1);
 202   calendar_offset_to_gregorian (date1 / DAY_S, &y1, &m1, &d1, &yd1);
 203   calendar_offset_to_gregorian (date2 / DAY_S, &y2, &m2, &d2, &yd2);
 204
 205   diff = y2 - y1;
 206   if (diff > 0)
 207     {
 208       int yd1 = 32 * m1 + d1;
 209       int yd2 = 32 * m2 + d2;
 210       if (yd2 < yd1
 211           || (yd2 == yd1 && fmod (date2, DAY_S) < fmod (date1, DAY_S)))
 212         diff--;
 213     }
 214   return diff;
 215 }
 216
 217 /* Returns the number of whole months from DATE1 to DATE2,
 218    where a month is defined as the same or later day and time of
 219    day. */
 220 static int
 221 month_diff (double date1, double date2)
 222 {
 223   int y1, m1, d1, yd1;
 224   int y2, m2, d2, yd2;
 225   int diff;
 226
 227   assert (date2 >= date1);
 228   calendar_offset_to_gregorian (date1 / DAY_S, &y1, &m1, &d1, &yd1);
 229   calendar_offset_to_gregorian (date2 / DAY_S, &y2, &m2, &d2, &yd2);
 230
 231   diff = ((y2 * 12) + m2) - ((y1 * 12) + m1);
 232   if (diff > 0
 233       && (d2 < d1
 234           || (d2 == d1 && fmod (date2, DAY_S) < fmod (date1, DAY_S))))
 235     diff--;
 236   return diff;
 237 }
 238
 239 /* Returns the number of whole quarter from DATE1 to DATE2,
 240    where a quarter is defined as three months. */
 241 static int
 242 quarter_diff (double date1, double date2)
 243 {
 244   return month_diff (date1, date2) / 3;
 245 }
 246
 247 /* Returns the number of seconds in the given UNIT. */
 248 static int
 249 date_unit_duration (enum date_unit unit)
 250 {
 251   switch (unit)
 252     {
 253     case DATE_WEEKS:
 254       return WEEK_S;
 255
 256     case DATE_DAYS:
 257       return DAY_S;
 258
 259     case DATE_HOURS:
 260       return H_S;
 261
 262     case DATE_MINUTES:
 263       return MIN_S;
 264
 265     case DATE_SECONDS:
 266       return 1;
 267
 268     default:
 269       NOT_REACHED ();
 270     }
 271 }
 272
 273 /* Returns the span from DATE1 to DATE2 in terms of UNIT_NAME. */
 274 double
 275 expr_date_difference (double date1, double date2, struct substring unit_name)
 276 {
 277   enum date_unit unit;
 278
 279   if (!recognize_unit (unit_name, &unit))
 280     return SYSMIS;
 281
 282   switch (unit)
 283     {
 284     case DATE_YEARS:
 285       return (date2 >= date1
 286               ? year_diff (date1, date2)
 287               : -year_diff (date2, date1));
 288
 289     case DATE_QUARTERS:
 290       return (date2 >= date1
 291               ? quarter_diff (date1, date2)
 292               : -quarter_diff (date2, date1));
 293
 294     case DATE_MONTHS:
 295       return (date2 >= date1
 296               ? month_diff (date1, date2)
 297               : -month_diff (date2, date1));
 298
 299     case DATE_WEEKS:
 300     case DATE_DAYS:
 301     case DATE_HOURS:
 302     case DATE_MINUTES:
 303     case DATE_SECONDS:
 304       return trunc ((date2 - date1) / date_unit_duration (unit));
 305     }
 306
 307   NOT_REACHED ();
 308 }
 309
 310 /* How to deal with days out of range for a given month. */
 311 enum date_sum_method
 312   {
 313     SUM_ROLLOVER,       /* Roll them over to the next month. */
 314     SUM_CLOSEST         /* Use the last day of the month. */
 315   };
 316
 317 /* Stores in *METHOD the method whose name is NAME.
 318    Return success. */
 319 static bool
 320 recognize_method (struct substring method_name, enum date_sum_method *method)
 321 {
 322   if (ss_equals_case (method_name, ss_cstr ("closest")))
 323     {
 324       *method = SUM_CLOSEST;
 325       return true;
 326     }
 327   else if (ss_equals_case (method_name, ss_cstr ("rollover")))
 328     {
 329       *method = SUM_ROLLOVER;
 330       return true;
 331     }
 332   else
 333     {
 334       msg (SE, _("Invalid DATESUM method.  "
 335                  "Valid choices are `closest' and `rollover'."));
 336       return false;
 337     }
 338 }
 339
 340 /* Returns DATE advanced by the given number of MONTHS, with
 341    day-of-month overflow resolved using METHOD. */
 342 static double
 343 add_months (double date, int months, enum date_sum_method method)
 344 {
 345   int y, m, d, yd;
 346   double output;
 347
 348   calendar_offset_to_gregorian (date / DAY_S, &y, &m, &d, &yd);
 349   y += months / 12;
 350   m += months % 12;
 351   if (m < 1)
 352     {
 353       m += 12;
 354       y--;
 355     }
 356   else if (m > 12)
 357     {
 358       m -= 12;
 359       y++;
 360     }
 361   assert (m >= 1 && m <= 12);
 362
 363   if (method == SUM_CLOSEST && d > calendar_days_in_month (y, m))
 364     d = calendar_days_in_month (y, m);
 365
 366   output = calendar_gregorian_to_offset (y, m, d, expr_error, NULL);
 367   if (output != SYSMIS)
 368     output = (output * DAY_S) + fmod (date, DAY_S);
 369   return output;
 370 }
 371
 372 /* Returns DATE advanced by the given QUANTITY of units given in
 373    UNIT_NAME, with day-of-month overflow resolved using
 374    METHOD_NAME. */
 375 double
 376 expr_date_sum (double date, double quantity, struct substring unit_name,
 377                struct substring method_name)
 378 {
 379   enum date_unit unit;
 380   enum date_sum_method method;
 381
 382   if (!recognize_unit (unit_name, &unit)
 383       || !recognize_method (method_name, &method))
 384     return SYSMIS;
 385
 386   switch (unit)
 387     {
 388     case DATE_YEARS:
 389       return add_months (date, trunc (quantity) * 12, method);
 390
 391     case DATE_QUARTERS:
 392       return add_months (date, trunc (quantity) * 3, method);
 393
 394     case DATE_MONTHS:
 395       return add_months (date, trunc (quantity), method);
 396
 397     case DATE_WEEKS:
 398     case DATE_DAYS:
 399     case DATE_HOURS:
 400     case DATE_MINUTES:
 401     case DATE_SECONDS:
 402       return date + quantity * date_unit_duration (unit);
 403     }
 404
 405   NOT_REACHED ();
 406 }
 407
 408 int
 409 compare_string_3way (const struct substring *a, const struct substring *b)
 410 {
 411   size_t i;
 412
 413   for (i = 0; i < a->length && i < b->length; i++)
 414     if (a->string[i] != b->string[i])
 415       return a->string[i] < b->string[i] ? -1 : 1;
 416   for (; i < a->length; i++)
 417     if (a->string[i] != ' ')
 418       return 1;
 419   for (; i < b->length; i++)
 420     if (b->string[i] != ' ')
 421       return -1;
 422   return 0;
 423 }
 424
 425 size_t
 426 count_valid (double *d, size_t d_cnt)
 427 {
 428   size_t valid_cnt;
 429   size_t i;
 430
 431   valid_cnt = 0;
 432   for (i = 0; i < d_cnt; i++)
 433     valid_cnt += is_valid (d[i]);
 434   return valid_cnt;
 435 }
 436
 437 struct substring
 438 alloc_string (struct expression *e, size_t length)
 439 {
 440   struct substring s;
 441   s.length = length;
 442   s.string = pool_alloc (e->eval_pool, length);
 443   return s;
 444 }
 445
 446 struct substring
 447 copy_string (struct expression *e, const char *old, size_t length)
 448 {
 449   struct substring s = alloc_string (e, length);
 450   memcpy (s.string, old, length);
 451   return s;
 452 }
 453
 454 /* Returns the noncentral beta cumulative distribution function
 455    value for the given arguments.
 456
 457    FIXME: The accuracy of this function is not entirely
 458    satisfactory.  We only match the example values given in AS
 459    310 to the first 5 significant digits. */
 460 double
 461 ncdf_beta (double x, double a, double b, double lambda)
 462 {
 463   double c;
 464
 465   if (x <= 0. || x >= 1. || a <= 0. || b <= 0. || lambda <= 0.)
 466     return SYSMIS;
 467
 468   c = lambda / 2.;
 469   if (lambda < 54.)
 470     {
 471       /* Algorithm AS 226. */
 472       double x0, a0, beta, temp, gx, q, ax, sumq, sum;
 473       double err_max = 2 * DBL_EPSILON;
 474       double err_bound;
 475       int iter_max = 100;
 476       int iter;
 477
 478       x0 = floor (c - 5.0 * sqrt (c));
 479       if (x0 < 0.)
 480         x0 = 0.;
 481       a0 = a + x0;
 482       beta = (gsl_sf_lngamma (a0)
 483               + gsl_sf_lngamma (b)
 484               - gsl_sf_lngamma (a0 + b));
 485       temp = gsl_sf_beta_inc (a0, b, x);
 486       gx = exp (a0 * log (x) + b * log (1. - x) - beta - log (a0));
 487       if (a0 >= a)
 488         q = exp (-c + x0 * log (c)) - gsl_sf_lngamma (x0 + 1.);
 489       else
 490         q = exp (-c);
 491       ax = q * temp;
 492       sumq = 1. - q;
 493       sum = ax;
 494
 495       iter = 0;
 496       do
 497         {
 498           iter++;
 499           temp -= gx;
 500           gx = x * (a + b + iter - 1.) * gx / (a + iter);
 501           q *= c / iter;
 502           sumq -= q;
 503           ax = temp * q;
 504           sum += ax;
 505
 506           err_bound = (temp - gx) * sumq;
 507         }
 508       while (iter < iter_max && err_bound > err_max);
 509
 510       return sum;
 511     }
 512   else
 513     {
 514       /* Algorithm AS 310. */
 515       double m, m_sqrt;
 516       int iter, iter_lower, iter_upper, iter1, iter2, j;
 517       double t, q, r, psum, beta, s1, gx, fx, temp, ftemp, t0, s0, sum, s;
 518       double err_bound;
 519       double err_max = 2 * DBL_EPSILON;
 520
 521       iter = 0;
 522
 523       m = floor (c + .5);
 524       m_sqrt = sqrt (m);
 525       iter_lower = m - 5. * m_sqrt;
 526       iter_upper = m + 5. * m_sqrt;
 527
 528       t = -c + m * log (c) - gsl_sf_lngamma (m + 1.);
 529       q = exp (t);
 530       r = q;
 531       psum = q;
 532       beta = (gsl_sf_lngamma (a + m)
 533               + gsl_sf_lngamma (b)
 534               - gsl_sf_lngamma (a + m + b));
 535       s1 = (a + m) * log (x) + b * log (1. - x) - log (a + m) - beta;
 536       fx = gx = exp (s1);
 537       ftemp = temp = gsl_sf_beta_inc (a + m, b, x);
 538       iter++;
 539       sum = q * temp;
 540       iter1 = m;
 541
 542       while (iter1 >= iter_lower && q >= err_max)
 543         {
 544           q = q * iter1 / c;
 545           iter++;
 546           gx = (a + iter1) / (x * (a + b + iter1 - 1.)) * gx;
 547           iter1--;
 548           temp += gx;
 549           psum += q;
 550           sum += q * temp;
 551         }
 552
 553       t0 = (gsl_sf_lngamma (a + b)
 554             - gsl_sf_lngamma (a + 1.)
 555             - gsl_sf_lngamma (b));
 556       s0 = a * log (x) + b * log (1. - x);
 557
 558       s = 0.;
 559       for (j = 0; j < iter1; j++)
 560         {
 561           double t1;
 562           s += exp (t0 + s0 + j * log (x));
 563           t1 = log (a + b + j) - log (a + 1. + j) + t0;
 564           t0 = t1;
 565         }
 566
 567       err_bound = (1. - gsl_sf_gamma_inc_P (iter1, c)) * (temp + s);
 568       q = r;
 569       temp = ftemp;
 570       gx = fx;
 571       iter2 = m;
 572       for (;;)
 573         {
 574           double ebd = err_bound + (1. - psum) * temp;
 575           if (ebd < err_max || iter >= iter_upper)
 576             break;
 577
 578           iter2++;
 579           iter++;
 580           q = q * c / iter2;
 581           psum += q;
 582           temp -= gx;
 583           gx = x * (a + b + iter2 - 1.) / (a + iter2) * gx;
 584           sum += q * temp;
 585         }
 586
 587       return sum;
 588     }
 589 }
 590
 591 double
 592 cdf_bvnor (double x0, double x1, double r)
 593 {
 594   double z = pow2 (x0) - 2. * r * x0 * x1 + pow2 (x1);
 595   return exp (-z / (2. * (1 - r * r))) * (2. * M_PI * sqrt (1 - r * r));
 596 }
 597
 598 double
 599 idf_fdist (double P, double df1, double df2)
 600 {
 601   double temp = gsl_cdf_beta_Pinv (P, df1 / 2, df2 / 2);
 602   return temp * df2 / ((1. - temp) * df1);
 603 }
 604
 605 /*
 606  *  Mathlib : A C Library of Special Functions
 607  *  Copyright (C) 1998 Ross Ihaka
 608  *  Copyright (C) 2000 The R Development Core Team
 609  *
 610  *  This program is free software; you can redistribute it and/or
 611  *  modify
 612  *  it under the terms of the GNU General Public License as
 613  *  published by
 614  *  the Free Software Foundation; either version 2 of the
 615  *  License, or
 616  *  (at your option) any later version.
 617  *
 618  *  This program is distributed in the hope that it will be
 619  *  useful,
 620  *  but WITHOUT ANY WARRANTY; without even the implied warranty
 621  *  of
 622  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 623  *  GNU General Public License for more details.
 624  *
 625  *  You should have received a copy of the GNU General Public
 626  *  License
 627  *  along with this program; if not, write to the Free Software
 628  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 629  *  02110-1301 USA.
 630  */
 631
 632 /* Returns the density of the noncentral beta distribution with
 633    noncentrality parameter LAMBDA. */
 634 double
 635 npdf_beta (double x, double a, double b, double lambda)
 636 {
 637   if (lambda < 0. || a <= 0. || b <= 0.)
 638     return SYSMIS;
 639   else if (lambda == 0.)
 640     return gsl_ran_beta_pdf (x, a, b);
 641   else
 642     {
 643       double max_error = 2 * DBL_EPSILON;
 644       int max_iter = 200;
 645       double term = gsl_ran_beta_pdf (x, a, b);
 646       double lambda2 = 0.5 * lambda;
 647       double weight = exp (-lambda2);
 648       double sum = weight * term;
 649       double psum = weight;
 650       int k;
 651       for (k = 1; k <= max_iter && 1 - psum < max_error; k++) {
 652         weight *= lambda2 / k;
 653         term *= x * (a + b) / a;
 654         sum += weight * term;
 655         psum += weight;
 656         a += 1;
 657       }
 658       return sum;
 659     }
 660 }