1 /* PSPP - A program for statistical analysis . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <libpspp/compiler.h>
23 #include "factor-stats.h"
24 #include "percentiles.h"
25 #include <libpspp/misc.h>
28 #define _(msgid) gettext (msgid)
29 #define N_(msgid) msgid
42 const char *ptile_alg_desc[] = {
45 N_("Weighted Average"),
48 N_("Empirical with averaging")
54 /* Individual Percentile algorithms */
56 /* Closest observation to tc1 */
57 double ptile_round(const struct weighted_value **wv,
58 const struct ptile_params *par);
61 /* Weighted average at y_tc2 */
62 double ptile_haverage(const struct weighted_value **wv,
63 const struct ptile_params *par);
66 /* Weighted average at y_tc1 */
67 double ptile_waverage(const struct weighted_value **wv,
68 const struct ptile_params *par);
71 /* Empirical distribution function */
72 double ptile_empirical(const struct weighted_value **wv,
73 const struct ptile_params *par);
76 /* Empirical distribution function with averaging*/
77 double ptile_aempirical(const struct weighted_value **wv,
78 const struct ptile_params *par);
83 /* Closest observation to tc1 */
85 ptile_round(const struct weighted_value **wv,
86 const struct ptile_params *par)
94 if ( wv[par->k1 + 1]->w >= 1 )
96 if ( par->g1_star < 0.5 )
99 x = wv[par->k1 + 1]->v.f;
106 x = wv[par->k1 + 1]->v.f;
113 /* Weighted average at y_tc2 */
115 ptile_haverage(const struct weighted_value **wv,
116 const struct ptile_params *par)
121 if ( par->g2_star >= 1.0 )
122 return wv[par->k2 + 1]->v.f ;
124 /* Special case for k2 + 1 >= n_data
125 (actually it's not a special case, but just avoids indexing errors )
127 if ( par->g2_star == 0 )
129 assert(par->g2 == 0 );
130 return wv[par->k2]->v.f;
133 /* Ditto for k2 < 0 */
136 a = wv[par->k2]->v.f;
139 if ( wv[par->k2 + 1]->w >= 1.0 )
140 return ( (1 - par->g2_star) * a +
141 par->g2_star * wv[par->k2 + 1]->v.f);
143 return ( (1 - par->g2) * a +
144 par->g2 * wv[par->k2 + 1]->v.f);
150 /* Weighted average at y_tc1 */
152 ptile_waverage(const struct weighted_value **wv,
153 const struct ptile_params *par)
157 if ( par->g1_star >= 1.0 )
158 return wv[par->k1 + 1]->v.f ;
162 a = wv[par->k1]->v.f;
165 if ( wv[par->k1 + 1]->w >= 1.0 )
166 return ( (1 - par->g1_star) * a +
167 par->g1_star * wv[par->k1 + 1]->v.f);
169 return ( (1 - par->g1) * a +
170 par->g1 * wv[par->k1 + 1]->v.f);
174 /* Empirical distribution function */
176 ptile_empirical(const struct weighted_value **wv,
177 const struct ptile_params *par)
179 if ( par->g1_star > 0 )
180 return wv[par->k1 + 1]->v.f;
182 return wv[par->k1]->v.f;
187 /* Empirical distribution function with averageing */
189 ptile_aempirical(const struct weighted_value **wv,
190 const struct ptile_params *par)
192 if ( par->g1_star > 0 )
193 return wv[par->k1 + 1]->v.f;
195 return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ;
200 /* Compute the percentile p */
201 double ptile(double p,
202 const struct weighted_value **wv,
205 enum pc_alg algorithm);
211 const struct weighted_value **wv,
214 enum pc_alg algorithm)
220 struct ptile_params pp;
230 for ( i = 0 ; i < n_data ; ++i )
232 if ( wv[i]->cc <= tc1 )
235 if ( wv[i]->cc <= tc2 )
243 pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w;
244 pp.g1_star = tc1 - wv[pp.k1]->cc ;
248 pp.g1 = tc1 / wv[pp.k1 + 1]->w;
253 if ( pp.k2 + 1 >= n_data )
262 pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w;
263 pp.g2_star = tc2 - wv[pp.k2]->cc ;
267 pp.g2 = tc2 / wv[pp.k2 + 1]->w;
275 result = ptile_haverage(wv, &pp);
278 result = ptile_waverage(wv, &pp);
281 result = ptile_round(wv, &pp);
284 result = ptile_empirical(wv, &pp);
287 result = ptile_aempirical(wv, &pp);
298 Calculate the values of the percentiles in pc_hash.
299 wv is a sorted array of weighted values of the data set.
302 ptiles(struct hsh_table *pc_hash,
303 const struct weighted_value **wv,
306 enum pc_alg algorithm)
308 struct hsh_iterator hi;
309 struct percentile *p;
313 for ( p = hsh_first(pc_hash, &hi);
315 p = hsh_next(pc_hash, &hi))
317 p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm);
323 /* Calculate Tukey's Hinges */
325 tukey_hinges(const struct weighted_value **wv,
332 double c_star = DBL_MAX;
338 for ( i = 0 ; i < n_data ; ++i )
340 c_star = min(c_star, wv[i]->w);
343 if ( c_star > 1 ) c_star = 1;
345 d = floor((w/c_star + 3 ) / 2.0)/ 2.0;
348 l[1] = w/2.0 + c_star/2.0;
349 l[2] = w + c_star - d*c_star;
355 for ( i = 0 ; i < n_data ; ++i )
357 if ( l[0] >= wv[i]->cc ) h[0] = i ;
358 if ( l[1] >= wv[i]->cc ) h[1] = i ;
359 if ( l[2] >= wv[i]->cc ) h[2] = i ;
362 for ( i = 0 ; i < 3 ; i++ )
366 a_star = l[i] - wv[h[i]]->cc ;
370 if ( h[i] + 1 >= n_data )
372 assert( a_star < 1 ) ;
373 hinge[i] = (1 - a_star) * wv[h[i]]->v.f;
378 a = a_star / ( wv[h[i] + 1]->cc ) ;
383 hinge[i] = wv[h[i] + 1]->v.f ;
387 if ( wv[h[i] + 1]->w >= 1)
389 hinge[i] = ( 1 - a_star) * wv[h[i]]->v.f
390 + a_star * wv[h[i] + 1]->v.f;
395 hinge[i] = (1 - a) * wv[h[i]]->v.f + a * wv[h[i] + 1]->v.f;
399 assert(hinge[0] <= hinge[1]);
400 assert(hinge[1] <= hinge[2]);
406 ptile_compare(const struct percentile *p1,
407 const struct percentile *p2,
415 else if (p1->p < p2->p)
424 ptile_hash(const struct percentile *p, void *aux UNUSED)
426 return hsh_hash_double(p->p);