X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fexpressions%2Fhelpers.c;h=d91365de8523c4ee53b3edd792d9f59ab87438b3;hb=ff0dcae1d9a5aa5ae1b80164c49df264be1576b6;hp=7c3371f5db97eb9daf3455c943014a035d178408;hpb=81579d9e9f994fb2908f50af41c3eb033d216e58;p=pspp diff --git a/src/language/expressions/helpers.c b/src/language/expressions/helpers.c index 7c3371f5db..d91365de85 100644 --- a/src/language/expressions/helpers.c +++ b/src/language/expressions/helpers.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2008, 2010, 2011, 2015, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,109 +25,46 @@ #include "libpspp/assertion.h" #include "libpspp/pool.h" -const struct substring empty_string = {NULL, 0}; - -double -expr_ymd_to_ofs (double year, double month, double day) -{ - int y = year; - int m = month; - int d = day; - char *error; - double ofs; - - if (y != year || m != month || d != day) - { - msg (SE, _("One of the arguments to a DATE function is not an integer. " - "The result will be system-missing.")); - return SYSMIS; - } - - ofs = calendar_gregorian_to_offset (y, m, d, &error); - if (error != NULL) - { - msg (SE, "%s", error); - free (error); - } - return ofs; -} +#include "gl/minmax.h" -double -expr_ymd_to_date (double year, double month, double day) -{ - double ofs = expr_ymd_to_ofs (year, month, day); - return ofs != SYSMIS ? ofs * DAY_S : SYSMIS; -} +const struct substring empty_string = {NULL, 0}; double -expr_wkyr_to_date (double week, double year) +expr_ymd_to_ofs (int y, int m, int d, + const struct expression *e, const struct expr_node *node, + int ya, int ma, int da) { - int w = week; - - if (w != week) - { - msg (SE, _("The week argument to DATE.WKYR is not an integer. " - "The result will be system-missing.")); - return SYSMIS; - } - else if (w < 1 || w > 53) - { - msg (SE, _("The week argument to DATE.WKYR is outside the acceptable " - "range of 1 to 53. " - "The result will be system-missing.")); - return SYSMIS; - } + int *error = calendar_gregorian_adjust (&y, &m, &d, + settings_get_fmt_settings ()); + if (!error) + return calendar_raw_gregorian_to_offset (y, m, d); else { - double yr_1_1 = expr_ymd_to_ofs (year, 1, 1); - if (yr_1_1 != SYSMIS) - return DAY_S * (yr_1_1 + WEEK_DAY * (w - 1)); - else - return SYSMIS; - } -} - -double -expr_yrday_to_date (double year, double yday) -{ - int yd = yday; - - if (yd != yday) - { - msg (SE, _("The day argument to DATE.YRDAY is not an integer. " - "The result will be system-missing.")); + msg_at (SE, expr_location (e, node), + _("Invalid arguments to %s function."), + operations[node->type].name); + + if (error == &y && ya > 0) + msg_at (SN, expr_location (e, y < 1582 ? node->args[ya - 1] : node), + _("Date %04d-%d-%d is before the earliest supported date " + "1582-10-15."), y, m, d); + else if (error == &m && ma > 0) + msg_at (SN, expr_location (e, node->args[ma - 1]), + _("Month %d is not in the acceptable range of 0 to 13."), m); + else if (error == &d && da > 0) + msg_at (SN, expr_location (e, node->args[da - 1]), + _("Day %d is not in the acceptable range of 0 to 31."), d); return SYSMIS; } - else if (yd < 1 || yd > 366) - { - msg (SE, _("The day argument to DATE.YRDAY is outside the acceptable " - "range of 1 to 366. " - "The result will be system-missing.")); - return SYSMIS; - } - else - { - double yr_1_1 = expr_ymd_to_ofs (year, 1, 1); - if (yr_1_1 != SYSMIS) - return DAY_S * (yr_1_1 + yd - 1.); - else - return SYSMIS; - } } double -expr_yrmoda (double year, double month, double day) +expr_ymd_to_date (int y, int m, int d, + const struct expression *e, const struct expr_node *n, + int ya, int ma, int da) { - if (year >= 0 && year <= 99) - year += 1900; - else if (year != (int) year && year > 47516) - { - msg (SE, _("The year argument to YRMODA is greater than 47516. " - "The result will be system-missing.")); - return SYSMIS; - } - - return expr_ymd_to_ofs (year, month, day); + double ofs = expr_ymd_to_ofs (y, m, d, e, n, ya, ma, da); + return ofs != SYSMIS ? ofs * DAY_S : SYSMIS; } /* A date unit. */ @@ -146,7 +83,8 @@ enum date_unit /* Stores in *UNIT the unit whose name is NAME. Return success. */ static enum date_unit -recognize_unit (struct substring name, enum date_unit *unit) +recognize_unit (struct substring name, const struct expression *e, + const struct expr_node *n, enum date_unit *unit) { struct unit_name { @@ -164,23 +102,25 @@ recognize_unit (struct substring name, enum date_unit *unit) { DATE_MINUTES, SS_LITERAL_INITIALIZER ("minutes") }, { DATE_SECONDS, SS_LITERAL_INITIALIZER ("seconds") }, }; - const int unit_name_cnt = sizeof unit_names / sizeof *unit_names; + const int n_unit_names = sizeof unit_names / sizeof *unit_names; const struct unit_name *un; - for (un = unit_names; un < &unit_names[unit_name_cnt]; un++) + for (un = unit_names; un < &unit_names[n_unit_names]; un++) if (ss_equals_case (un->name, name)) { *unit = un->unit; return true; } - /* TRANSLATORS: Don't translate the the actual unit names `weeks', `days' etc - They must remain in their original English. */ - msg (SE, _("Unrecognized date unit `%.*s'. " - "Valid date units are `years', `quarters', `months', " - "`weeks', `days', `hours', `minutes', and `seconds'."), - (int) ss_length (name), ss_data (name)); + msg_at (SE, expr_location (e, n), + _("Unrecognized date unit `%.*s'. " + "Valid date units are `%s', `%s', `%s', " + "`%s', `%s', `%s', `%s', and `%s'."), + (int) ss_length (name), ss_data (name), + "years", "quarters", "months", + "weeks", "days", "hours", "minutes", "seconds"); + return false; } @@ -268,11 +208,11 @@ date_unit_duration (enum date_unit unit) /* Returns the span from DATE1 to DATE2 in terms of UNIT_NAME. */ double -expr_date_difference (double date1, double date2, struct substring unit_name) +expr_date_difference (double date1, double date2, struct substring unit_name, + const struct expression *e, const struct expr_node *n) { enum date_unit unit; - - if (!recognize_unit (unit_name, &unit)) + if (!recognize_unit (unit_name, e, n->args[2], &unit)) return SYSMIS; switch (unit) @@ -313,7 +253,9 @@ enum date_sum_method /* Stores in *METHOD the method whose name is NAME. Return success. */ static bool -recognize_method (struct substring method_name, enum date_sum_method *method) +recognize_method (struct substring method_name, + const struct expression *e, const struct expr_node *n, + enum date_sum_method *method) { if (ss_equals_case (method_name, ss_cstr ("closest"))) { @@ -327,8 +269,9 @@ recognize_method (struct substring method_name, enum date_sum_method *method) } else { - msg (SE, _("Invalid DATESUM method. " - "Valid choices are `closest' and `rollover'.")); + msg_at (SE, expr_location (e, n), + _("Invalid DATESUM method. " + "Valid choices are `%s' and `%s'."), "closest", "rollover"); return false; } } @@ -336,7 +279,8 @@ recognize_method (struct substring method_name, enum date_sum_method *method) /* Returns DATE advanced by the given number of MONTHS, with day-of-month overflow resolved using METHOD. */ static double -add_months (double date, int months, enum date_sum_method method) +add_months (double date, int months, enum date_sum_method method, + const struct expression *e, const struct expr_node *n) { int y, m, d, yd; double output; @@ -360,12 +304,13 @@ add_months (double date, int months, enum date_sum_method method) if (method == SUM_CLOSEST && d > calendar_days_in_month (y, m)) d = calendar_days_in_month (y, m); - output = calendar_gregorian_to_offset (y, m, d, &error); + output = calendar_gregorian_to_offset (y, m, d, settings_get_fmt_settings (), + &error); if (output != SYSMIS) output = (output * DAY_S) + fmod (date, DAY_S); else { - msg (SE, "%s", error); + msg_at (SE, expr_location (e, n), "%s", error); free (error); } return output; @@ -374,27 +319,25 @@ add_months (double date, int months, enum date_sum_method method) /* Returns DATE advanced by the given QUANTITY of units given in UNIT_NAME, with day-of-month overflow resolved using METHOD_NAME. */ -double -expr_date_sum (double date, double quantity, struct substring unit_name, - struct substring method_name) +static double +expr_date_sum__ (double date, double quantity, struct substring unit_name, + enum date_sum_method method, + const struct expression *e, const struct expr_node *n) { enum date_unit unit; - enum date_sum_method method; - - if (!recognize_unit (unit_name, &unit) - || !recognize_method (method_name, &method)) + if (!recognize_unit (unit_name, e, n->args[2], &unit)) return SYSMIS; switch (unit) { case DATE_YEARS: - return add_months (date, trunc (quantity) * 12, method); + return add_months (date, trunc (quantity) * 12, method, e, n); case DATE_QUARTERS: - return add_months (date, trunc (quantity) * 3, method); + return add_months (date, trunc (quantity) * 3, method, e, n); case DATE_MONTHS: - return add_months (date, trunc (quantity), method); + return add_months (date, trunc (quantity), method, e, n); case DATE_WEEKS: case DATE_DAYS: @@ -407,6 +350,31 @@ expr_date_sum (double date, double quantity, struct substring unit_name, NOT_REACHED (); } +/* Returns DATE advanced by the given QUANTITY of units given in + UNIT_NAME, with day-of-month overflow resolved using + METHOD_NAME. */ +double +expr_date_sum (double date, double quantity, struct substring unit_name, + struct substring method_name, + const struct expression *e, const struct expr_node *n) +{ + enum date_sum_method method; + if (!recognize_method (method_name, e, n->args[3], &method)) + return SYSMIS; + + return expr_date_sum__ (date, quantity, unit_name, method, e, n); +} + +/* Returns DATE advanced by the given QUANTITY of units given in + UNIT_NAME, with day-of-month overflow resolved using + METHOD_NAME. */ +double +expr_date_sum_closest (double date, double quantity, struct substring unit_name, + const struct expression *e, const struct expr_node *n) +{ + return expr_date_sum__ (date, quantity, unit_name, SUM_CLOSEST, e, n); +} + int compare_string_3way (const struct substring *a, const struct substring *b) { @@ -425,15 +393,12 @@ compare_string_3way (const struct substring *a, const struct substring *b) } size_t -count_valid (double *d, size_t d_cnt) +count_valid (double *d, size_t n) { - size_t valid_cnt; - size_t i; - - valid_cnt = 0; - for (i = 0; i < d_cnt; i++) - valid_cnt += is_valid (d[i]); - return valid_cnt; + size_t n_valid = 0; + for (size_t i = 0; i < n; i++) + n_valid += is_valid (d[i]); + return n_valid; } struct substring @@ -453,210 +418,112 @@ copy_string (struct expression *e, const char *old, size_t length) return s; } -/* Returns the noncentral beta cumulative distribution function - value for the given arguments. +static double +round__ (double x, double mult, double fuzzbits, double adjustment) +{ + if (fuzzbits <= 0) + fuzzbits = settings_get_fuzzbits (); + adjustment += exp2 (fuzzbits - DBL_MANT_DIG); + + x /= mult; + x = x >= 0. ? floor (x + adjustment) : -floor (-x + adjustment); + return x * mult; +} - FIXME: The accuracy of this function is not entirely - satisfactory. We only match the example values given in AS - 310 to the first 5 significant digits. */ double -ncdf_beta (double x, double a, double b, double lambda) +round_nearest (double x, double mult, double fuzzbits) { - double c; + return round__ (x, mult, fuzzbits, .5); +} - if (x <= 0. || x >= 1. || a <= 0. || b <= 0. || lambda <= 0.) - return SYSMIS; +double +round_zero (double x, double mult, double fuzzbits) +{ + return round__ (x, mult, fuzzbits, 0); +} - c = lambda / 2.; - if (lambda < 54.) - { - /* Algorithm AS 226. */ - double x0, a0, beta, temp, gx, q, ax, sumq, sum; - double err_max = 2 * DBL_EPSILON; - double err_bound; - int iter_max = 100; - int iter; - - x0 = floor (c - 5.0 * sqrt (c)); - if (x0 < 0.) - x0 = 0.; - a0 = a + x0; - beta = (gsl_sf_lngamma (a0) - + gsl_sf_lngamma (b) - - gsl_sf_lngamma (a0 + b)); - temp = gsl_sf_beta_inc (a0, b, x); - gx = exp (a0 * log (x) + b * log (1. - x) - beta - log (a0)); - if (a0 >= a) - q = exp (-c + x0 * log (c)) - gsl_sf_lngamma (x0 + 1.); - else - q = exp (-c); - ax = q * temp; - sumq = 1. - q; - sum = ax; - - iter = 0; - do - { - iter++; - temp -= gx; - gx = x * (a + b + iter - 1.) * gx / (a + iter); - q *= c / iter; - sumq -= q; - ax = temp * q; - sum += ax; - - err_bound = (temp - gx) * sumq; - } - while (iter < iter_max && err_bound > err_max); - - return sum; - } - else - { - /* Algorithm AS 310. */ - double m, m_sqrt; - int iter, iter_lower, iter_upper, iter1, iter2, j; - double t, q, r, psum, beta, s1, gx, fx, temp, ftemp, t0, s0, sum, s; - double err_bound; - double err_max = 2 * DBL_EPSILON; - - iter = 0; - - m = floor (c + .5); - m_sqrt = sqrt (m); - iter_lower = m - 5. * m_sqrt; - iter_upper = m + 5. * m_sqrt; - - t = -c + m * log (c) - gsl_sf_lngamma (m + 1.); - q = exp (t); - r = q; - psum = q; - beta = (gsl_sf_lngamma (a + m) - + gsl_sf_lngamma (b) - - gsl_sf_lngamma (a + m + b)); - s1 = (a + m) * log (x) + b * log (1. - x) - log (a + m) - beta; - fx = gx = exp (s1); - ftemp = temp = gsl_sf_beta_inc (a + m, b, x); - iter++; - sum = q * temp; - iter1 = m; - - while (iter1 >= iter_lower && q >= err_max) - { - q = q * iter1 / c; - iter++; - gx = (a + iter1) / (x * (a + b + iter1 - 1.)) * gx; - iter1--; - temp += gx; - psum += q; - sum += q * temp; - } - - t0 = (gsl_sf_lngamma (a + b) - - gsl_sf_lngamma (a + 1.) - - gsl_sf_lngamma (b)); - s0 = a * log (x) + b * log (1. - x); - - s = 0.; - for (j = 0; j < iter1; j++) - { - double t1; - s += exp (t0 + s0 + j * log (x)); - t1 = log (a + b + j) - log (a + 1. + j) + t0; - t0 = t1; - } - - err_bound = (1. - gsl_sf_gamma_inc_P (iter1, c)) * (temp + s); - q = r; - temp = ftemp; - gx = fx; - iter2 = m; - for (;;) - { - double ebd = err_bound + (1. - psum) * temp; - if (ebd < err_max || iter >= iter_upper) - break; - - iter2++; - iter++; - q = q * c / iter2; - psum += q; - temp -= gx; - gx = x * (a + b + iter2 - 1.) / (a + iter2) * gx; - sum += q * temp; - } - - return sum; - } +struct substring +replace_string (struct expression *e, + struct substring haystack, + struct substring needle, + struct substring replacement, + int n) +{ + if (!needle.length || haystack.length < needle.length || n <= 0) + return haystack; + + struct substring result = alloc_string (e, MAX_STRING); + result.length = 0; + + size_t i = 0; + while (i <= haystack.length - needle.length) + if (!memcmp (&haystack.string[i], needle.string, needle.length)) + { + size_t copy_len = MIN (replacement.length, MAX_STRING - result.length); + memcpy (&result.string[result.length], replacement.string, copy_len); + result.length += copy_len; + i += needle.length; + + if (--n < 1) + break; + } + else + { + if (result.length < MAX_STRING) + result.string[result.length++] = haystack.string[i]; + i++; + } + while (i < haystack.length && result.length < MAX_STRING) + result.string[result.length++] = haystack.string[i++]; + + return result; } -double -cdf_bvnor (double x0, double x1, double r) +static int +compare_doubles (const void *a_, const void *b_) { - double z = pow2 (x0) - 2. * r * x0 * x1 + pow2 (x1); - return exp (-z / (2. * (1 - r * r))) * (2. * M_PI * sqrt (1 - r * r)); + const double *ap = a_; + const double *bp = b_; + double a = *ap; + double b = *bp; + + /* Sort SYSMIS to the end. */ + return (a == b ? 0 + : a == SYSMIS ? 1 + : b == SYSMIS ? -1 + : a > b ? 1 : -1); } double -idf_fdist (double P, double df1, double df2) +median (double *a, size_t n) { - double temp = gsl_cdf_beta_Pinv (P, df1 / 2, df2 / 2); - return temp * df2 / ((1. - temp) * df1); + /* Sort the array in-place, sorting SYSMIS to the end. */ + qsort (a, n, sizeof *a, compare_doubles); + + /* Drop SYSMIS. */ + n = count_valid (a, n); + + return (!n ? SYSMIS + : n % 2 ? a[n / 2] + : (a[n / 2 - 1] + a[n / 2]) / 2.0); } -/* - * Mathlib : A C Library of Special Functions - * Copyright (C) 1998 Ross Ihaka - * Copyright (C) 2000 The R Development Core Team - * - * This program is free software; you can redistribute it and/or - * modify - * it under the terms of the GNU General Public License as - * published by - * the Free Software Foundation; either version 2 of the - * License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be - * useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty - * of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301 USA. - */ - -/* Returns the density of the noncentral beta distribution with - noncentrality parameter LAMBDA. */ -double -npdf_beta (double x, double a, double b, double lambda) +const struct variable * +expr_index_vector (const struct expression *e, const struct expr_node *n, + const struct vector *v, double idx) { - if (lambda < 0. || a <= 0. || b <= 0.) - return SYSMIS; - else if (lambda == 0.) - return gsl_ran_beta_pdf (x, a, b); + if (idx >= 1 && idx <= vector_get_n_vars (v)) + return vector_get_var (v, idx - 1); + + msg_at (SE, expr_location (e, n), + _("Index outside valid range 1 to %zu, inclusive, for vector %s. " + "The value will be treated as system-missing."), + vector_get_n_vars (v), vector_get_name (v)); + if (idx == SYSMIS) + msg_at (SN, expr_location (e, n->args[0]), + _("The index is system-missing.")); else - { - double max_error = 2 * DBL_EPSILON; - int max_iter = 200; - double term = gsl_ran_beta_pdf (x, a, b); - double lambda2 = 0.5 * lambda; - double weight = exp (-lambda2); - double sum = weight * term; - double psum = weight; - int k; - for (k = 1; k <= max_iter && 1 - psum < max_error; k++) { - weight *= lambda2 / k; - term *= x * (a + b) / a; - sum += weight * term; - psum += weight; - a += 1; - } - return sum; - } + msg_at (SN, expr_location (e, n->args[0]), + _("The index has value %g."), idx); + return NULL; }