X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fpercentiles.c;h=bf99de163ffbaafe2af8711685e9a640a0256784;hb=2acfe799af1fd4504ee1278e0b8864ace451688a;hp=aa7eead6c03980d9b1dc2bafca96777341678351;hpb=dca2f6d80dcb53dd1a12c675118d4b7e0716b292;p=pspp-builds.git diff --git a/src/math/percentiles.c b/src/math/percentiles.c index aa7eead6..bf99de16 100644 --- a/src/math/percentiles.c +++ b/src/math/percentiles.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004 Free Software Foundation, Inc. + Copyright (C) 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,25 +15,19 @@ along with this program. If not, see . */ #include -#include -#include -#include -#include "factor-stats.h" #include "percentiles.h" -#include +#include -#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid -struct ptile_params -{ - double g1, g1_star; - double g2, g2_star; - int k1, k2; -}; +#include +#include +#include +#include +#include const char *const ptile_alg_desc[] = { @@ -47,380 +41,151 @@ const char *const ptile_alg_desc[] = { - -/* Individual Percentile algorithms */ - -/* Closest observation to tc1 */ -double ptile_round(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Weighted average at y_tc2 */ -double ptile_haverage(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Weighted average at y_tc1 */ -double ptile_waverage(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Empirical distribution function */ -double ptile_empirical(const struct weighted_value **wv, - const struct ptile_params *par); - - -/* Empirical distribution function with averaging*/ -double ptile_aempirical(const struct weighted_value **wv, - const struct ptile_params *par); - - - - -/* Closest observation to tc1 */ double -ptile_round(const struct weighted_value **wv, - const struct ptile_params *par) +percentile_calculate (const struct percentile *ptl, enum pc_alg alg) { - double x; - double a=0; + struct percentile *mutable = (struct percentile *) ptl; + const struct order_stats *os = &ptl->parent; - if ( par->k1 >= 0 ) - a = wv[par->k1]->v.f; + assert (os->cc == ptl->w); - if ( wv[par->k1 + 1]->w >= 1 ) - { - if ( par->g1_star < 0.5 ) - x = a; - else - x = wv[par->k1 + 1]->v.f; - } - else - { - if ( par->g1 < 0.5 ) - x = a; - else - x = wv[par->k1 + 1]->v.f; - - } - - return x; -} + if ( ptl->g1 == SYSMIS) + mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1; -/* Weighted average at y_tc2 */ -double -ptile_haverage(const struct weighted_value **wv, - const struct ptile_params *par) -{ - - double a=0; + if ( ptl->g1_star == SYSMIS) + mutable->g1_star = os->k[0].tc - os->k[0].cc; - if ( par->g2_star >= 1.0 ) - return wv[par->k2 + 1]->v.f ; - - /* Special case for k2 + 1 >= n_data - (actually it's not a special case, but just avoids indexing errors ) - */ - if ( par->g2_star == 0 ) + if ( ptl->g2 == SYSMIS) { - assert(par->g2 == 0 ); - return wv[par->k2]->v.f; - } - - /* Ditto for k2 < 0 */ - if ( par->k2 >= 0 ) - { - a = wv[par->k2]->v.f; + if ( os->k[1].c == 0 ) + mutable->g2 = os->k[1].tc / os->k[1].c_p1; + else if ( os->k[1].c_p1 == 0 ) + mutable->g2 = 0; + else + mutable->g2 = (os->k[1].tc - os->k[1].cc) / os->k[1].c_p1; } - if ( wv[par->k2 + 1]->w >= 1.0 ) - return ( (1 - par->g2_star) * a + - par->g2_star * wv[par->k2 + 1]->v.f); - else - return ( (1 - par->g2) * a + - par->g2 * wv[par->k2 + 1]->v.f); - -} - - - -/* Weighted average at y_tc1 */ -double -ptile_waverage(const struct weighted_value **wv, - const struct ptile_params *par) -{ - double a=0; - - if ( par->g1_star >= 1.0 ) - return wv[par->k1 + 1]->v.f ; - - if ( par->k1 >= 0 ) + if ( ptl->g2_star == SYSMIS) { - a = wv[par->k1]->v.f; + if ( os->k[1].c == 0 ) + mutable->g2_star = os->k[1].tc; + else if ( os->k[1].c_p1 == 0 ) + mutable->g2_star = 0; + else + mutable->g2_star = os->k[1].tc - os->k[1].cc; } - if ( wv[par->k1 + 1]->w >= 1.0 ) - return ( (1 - par->g1_star) * a + - par->g1_star * wv[par->k1 + 1]->v.f); - else - return ( (1 - par->g1) * a + - par->g1 * wv[par->k1 + 1]->v.f); -} - - -/* Empirical distribution function */ -double -ptile_empirical(const struct weighted_value **wv, - const struct ptile_params *par) -{ - if ( par->g1_star > 0 ) - return wv[par->k1 + 1]->v.f; - else - return wv[par->k1]->v.f; -} - - - -/* Empirical distribution function with averageing */ -double -ptile_aempirical(const struct weighted_value **wv, - const struct ptile_params *par) -{ - if ( par->g1_star > 0 ) - return wv[par->k1 + 1]->v.f; - else - return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ; -} - - - -/* Compute the percentile p */ -double ptile(double p, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm); - - - -double -ptile(double p, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm) -{ - int i; - double tc1, tc2; - double result; - - struct ptile_params pp; - - assert( p <= 1.0); - - tc1 = w * p ; - tc2 = (w + 1) * p ; - - pp.k1 = -1; - pp.k2 = -1; - - for ( i = 0 ; i < n_data ; ++i ) + switch (alg) { - if ( wv[i]->cc <= tc1 ) - pp.k1 = i; - - if ( wv[i]->cc <= tc2 ) - pp.k2 = i; + case PC_WAVERAGE: + if ( ptl->g1_star >= 1.0) + return os->k[0].y_p1; + else + { + double a = ( os->k[0].y == SYSMIS ) ? 0 : os->k[0].y; - } + if (os->k[0].c_p1 >= 1.0) + return (1 - ptl->g1_star) * a + ptl->g1_star * os->k[0].y_p1; + else + return (1 - ptl->g1) * a + ptl->g1 * os->k[0].y_p1; + } + break; + case PC_ROUND: + { + double a = ( os->k[0].y == SYSMIS ) ? 0 : os->k[0].y; - if ( pp.k1 >= 0 ) - { - pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w; - pp.g1_star = tc1 - wv[pp.k1]->cc ; - } - else - { - pp.g1 = tc1 / wv[pp.k1 + 1]->w; - pp.g1_star = tc1 ; - } + if (os->k[0].c_p1 >= 1.0) + return (ptl->g1_star < 0.5) ? a : os->k[0].y_p1; + else + return (ptl->g1 < 0.5) ? a : os->k[0].y_p1; + } + break; + case PC_EMPIRICAL: + if ( ptl->g1_star == 0 ) + return os->k[0].y; + else + return os->k[0].y_p1; + break; - if ( pp.k2 + 1 >= n_data ) - { - pp.g2 = 0 ; - pp.g2_star = 0; - } - else - { - if ( pp.k2 >= 0 ) + case PC_HAVERAGE: + if ( ptl->g2_star >= 1.0) { - pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w; - pp.g2_star = tc2 - wv[pp.k2]->cc ; + return os->k[1].y_p1; } else { - pp.g2 = tc2 / wv[pp.k2 + 1]->w; - pp.g2_star = tc2 ; + double a = ( os->k[1].y == SYSMIS ) ? 0 : os->k[1].y; + + if ( os->k[1].c_p1 >= 1.0) + { + if ( ptl->g2_star == 0) + return os->k[1].y; + + return (1 - ptl->g2_star) * a + ptl->g2_star * os->k[1].y_p1; + } + else + { + return (1 - ptl->g2) * a + ptl->g2 * os->k[1].y_p1; + } } - } - switch ( algorithm ) - { - case PC_HAVERAGE: - result = ptile_haverage(wv, &pp); - break; - case PC_WAVERAGE: - result = ptile_waverage(wv, &pp); - break; - case PC_ROUND: - result = ptile_round(wv, &pp); - break; - case PC_EMPIRICAL: - result = ptile_empirical(wv, &pp); break; + case PC_AEMPIRICAL: - result = ptile_aempirical(wv, &pp); + if ( ptl->g1_star == 0 ) + return (os->k[0].y + os->k[0].y_p1)/ 2.0; + else + return os->k[0].y_p1; break; + default: - result = SYSMIS; + NOT_REACHED (); + break; } - return result; + NOT_REACHED (); + + return SYSMIS; } -/* - Calculate the values of the percentiles in pc_hash. - wv is a sorted array of weighted values of the data set. -*/ -void -ptiles(struct hsh_table *pc_hash, - const struct weighted_value **wv, - int n_data, - double w, - enum pc_alg algorithm) +static void +destroy (struct statistic *stat) { - struct hsh_iterator hi; - struct percentile *p; - - if ( !pc_hash ) - return ; - for ( p = hsh_first(pc_hash, &hi); - p != 0 ; - p = hsh_next(pc_hash, &hi)) - { - p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm); - } - + struct order_stats *os = (struct order_stats *) stat; + free (os->k); + free (os); } -/* Calculate Tukey's Hinges */ -void -tukey_hinges(const struct weighted_value **wv, - int n_data, - double w, - double hinge[3] - ) +struct order_stats * +percentile_create (double p, double W) { - int i; - double c_star = DBL_MAX; - double d; - double l[3]; - int h[3]; - double a, a_star; - - for ( i = 0 ; i < n_data ; ++i ) - { - c_star = MIN(c_star, wv[i]->w); - } - - if ( c_star > 1 ) c_star = 1; - - d = floor((w/c_star + 3 ) / 2.0)/ 2.0; - - l[0] = d*c_star; - l[1] = w/2.0 + c_star/2.0; - l[2] = w + c_star - d*c_star; - - h[0]=-1; - h[1]=-1; - h[2]=-1; - - for ( i = 0 ; i < n_data ; ++i ) - { - if ( l[0] >= wv[i]->cc ) h[0] = i ; - if ( l[1] >= wv[i]->cc ) h[1] = i ; - if ( l[2] >= wv[i]->cc ) h[2] = i ; - } - - for ( i = 0 ; i < 3 ; i++ ) - { - - if ( h[i] >= 0 ) - a_star = l[i] - wv[h[i]]->cc ; - else - a_star = l[i]; - - if ( h[i] + 1 >= n_data ) - { - assert( a_star < 1 ) ; - hinge[i] = (1 - a_star) * wv[h[i]]->v.f; - continue; - } - else - { - a = a_star / ( wv[h[i] + 1]->cc ) ; - } - - if ( a_star >= 1.0 ) - { - hinge[i] = wv[h[i] + 1]->v.f ; - continue; - } - - if ( wv[h[i] + 1]->w >= 1) - { - hinge[i] = ( 1 - a_star) * wv[h[i]]->v.f - + a_star * wv[h[i] + 1]->v.f; - - continue; - } + struct percentile *ptl = xzalloc (sizeof (*ptl)); + struct order_stats *os = (struct order_stats *) ptl; + struct statistic *stat = (struct statistic *) ptl; - hinge[i] = (1 - a) * wv[h[i]]->v.f + a * wv[h[i] + 1]->v.f; + assert (p >= 0); + assert (p <= 1.0); - } - - assert(hinge[0] <= hinge[1]); - assert(hinge[1] <= hinge[2]); + ptl->ptile = p; + ptl->w = W; -} + os->n_k = 2; + os->k = xcalloc (sizeof (*os->k), 2); + os->k[0].tc = W * p; + os->k[1].tc = (W + 1.0) * p; + ptl->g1 = ptl->g1_star = SYSMIS; + ptl->g2 = ptl->g2_star = SYSMIS; -int -ptile_compare(const struct percentile *p1, - const struct percentile *p2, - void *aux UNUSED) -{ - - int cmp; + os->k[1].y_p1 = os->k[1].y = SYSMIS; + os->k[0].y_p1 = os->k[0].y = SYSMIS; - if ( p1->p == p2->p) - cmp = 0 ; - else if (p1->p < p2->p) - cmp = -1 ; - else - cmp = +1; + stat->destroy = destroy; - return cmp; + return os; } -unsigned -ptile_hash(const struct percentile *p, void *aux UNUSED) -{ - return hsh_hash_double(p->p); -} - -