1 /* PSPP - A program for statistical analysis . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "factor_stats.h"
22 #include "percentiles.h"
36 const char *ptile_alg_desc[] = {
39 N_("Weighted Average"),
42 N_("Empirical with averaging")
48 /* Individual Percentile algorithms */
50 /* Closest observation to tc1 */
51 double ptile_round(const struct weighted_value **wv,
52 const struct ptile_params *par);
55 /* Weighted average at y_tc2 */
56 double ptile_haverage(const struct weighted_value **wv,
57 const struct ptile_params *par);
60 /* Weighted average at y_tc1 */
61 double ptile_waverage(const struct weighted_value **wv,
62 const struct ptile_params *par);
65 /* Empirical distribution function */
66 double ptile_empirical(const struct weighted_value **wv,
67 const struct ptile_params *par);
70 /* Empirical distribution function with averaging*/
71 double ptile_aempirical(const struct weighted_value **wv,
72 const struct ptile_params *par);
77 /* Closest observation to tc1 */
79 ptile_round(const struct weighted_value **wv,
80 const struct ptile_params *par)
84 if ( wv[par->k1 + 1]->w >= 1 )
86 if ( par->g1_star < 0.5 )
89 x = wv[par->k1 + 1]->v.f;
96 x = wv[par->k1 + 1]->v.f;
103 /* Weighted average at y_tc2 */
105 ptile_haverage(const struct weighted_value **wv,
106 const struct ptile_params *par)
108 if ( par->g2_star >= 1.0 )
109 return wv[par->k2 + 1]->v.f ;
111 /* Special case for k2 + 1 >= n_data
112 (actually it's not a special case, but just avoids indexing errors )
114 if ( par->g2_star == 0 )
116 assert(par->g2 == 0 );
117 return wv[par->k2]->v.f;
120 assert(par->k2 >= 0);
122 if ( wv[par->k2 + 1]->w >= 1.0 )
123 return ( (1 - par->g2_star) * wv[par->k2]->v.f
125 par->g2_star * wv[par->k2 + 1]->v.f);
127 return ( (1 - par->g2) * wv[par->k2]->v.f
129 par->g2 * wv[par->k2 + 1]->v.f);
135 /* Weighted average at y_tc1 */
137 ptile_waverage(const struct weighted_value **wv,
138 const struct ptile_params *par)
140 if ( par->g1_star >= 1.0 )
141 return wv[par->k1 + 1]->v.f ;
143 if ( wv[par->k1 + 1]->w >= 1.0 )
144 return ( (1 - par->g1_star) * wv[par->k1]->v.f
146 par->g1_star * wv[par->k1 + 1]->v.f);
148 return ( (1 - par->g1) * wv[par->k1]->v.f
150 par->g1 * wv[par->k1 + 1]->v.f);
154 /* Empirical distribution function */
156 ptile_empirical(const struct weighted_value **wv,
157 const struct ptile_params *par)
159 if ( par->g1_star > 0 )
160 return wv[par->k1 + 1]->v.f;
162 return wv[par->k1]->v.f;
167 /* Empirical distribution function with averageing */
169 ptile_aempirical(const struct weighted_value **wv,
170 const struct ptile_params *par)
172 if ( par->g1_star > 0 )
173 return wv[par->k1 + 1]->v.f;
175 return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ;
180 /* Compute the percentile p */
181 double ptile(double p,
182 const struct weighted_value **wv,
185 enum pc_alg algorithm);
191 const struct weighted_value **wv,
194 enum pc_alg algorithm)
200 struct ptile_params pp;
210 for ( i = 0 ; i < n_data ; ++i )
212 if ( wv[i]->cc <= tc1 )
215 if ( wv[i]->cc <= tc2 )
223 pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w;
224 pp.g1_star = tc1 - wv[pp.k1]->cc ;
228 pp.g1 = tc1 / wv[pp.k1 + 1]->w;
233 if ( pp.k2 + 1 >= n_data )
242 pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w;
243 pp.g2_star = tc2 - wv[pp.k2]->cc ;
247 pp.g2 = tc2 / wv[pp.k2 + 1]->w;
255 result = ptile_haverage(wv, &pp);
258 result = ptile_waverage(wv, &pp);
261 result = ptile_round(wv, &pp);
264 result = ptile_empirical(wv, &pp);
267 result = ptile_aempirical(wv, &pp);
278 Calculate the values of the percentiles in pc_hash.
279 wv is a sorted array of weighted values of the data set.
282 ptiles(struct hsh_table *pc_hash,
283 const struct weighted_value **wv,
286 enum pc_alg algorithm)
288 struct hsh_iterator hi;
289 struct percentile *p;
293 for ( p = hsh_first(pc_hash, &hi);
295 p = hsh_next(pc_hash, &hi))
297 p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm);
303 /* Calculate Tukey's Hinges */
305 tukey_hinges(const struct weighted_value **wv,
311 double c_star = DBL_MAX;
317 for ( i = 0 ; i < n_data ; ++i )
319 c_star = min(c_star, wv[i]->w);
322 if ( c_star > 1 ) c_star = 1;
324 d = floor((w/c_star + 3 ) / 2.0)/ 2.0;
327 l[1] = w/2.0 + c_star/2.0;
328 l[2] = w + c_star - d*c_star;
334 for ( i = 0 ; i < n_data ; ++i )
336 if ( l[0] >= wv[i]->cc ) h[0] = i ;
337 if ( l[1] >= wv[i]->cc ) h[1] = i ;
338 if ( l[2] >= wv[i]->cc ) h[2] = i ;
341 for ( i = 0 ; i < 3 ; i++ )
343 assert(h[i] + 1< n_data);
346 a_star = l[i] - wv[h[i]]->cc ;
350 a = a_star / ( wv[h[i]+1]->cc ) ;
354 hinges[i] = wv[h[i] + 1]->v.f ;
358 if ( wv[h[i]+1]->w >= 1)
360 hinges[i] = ( 1 - a_star)* wv[h[i]]->v.f
361 + a_star * wv[h[i]+1]->v.f;
366 hinges[i] = ( 1 - a)* wv[h[i]]->v.f + a * wv[h[i]+1]->v.f;
370 assert(hinges[0] <= hinges[1]);
371 assert(hinges[1] <= hinges[2]);
376 ptile_compare(const struct percentile *p1,
377 const struct percentile *p2,
385 else if (p1->p < p2->p)
394 ptile_hash(const struct percentile *p, void *aux UNUSED)
396 return hsh_hash_double(p->p);