1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/val-type.h>
20 #include <libpspp/compiler.h>
21 #include "factor-stats.h"
22 #include "percentiles.h"
23 #include <libpspp/misc.h>
28 #define _(msgid) gettext (msgid)
29 #define N_(msgid) msgid
39 const char *const ptile_alg_desc[] = {
42 N_("Weighted Average"),
45 N_("Empirical with averaging")
51 /* Individual Percentile algorithms */
53 /* Closest observation to tc1 */
54 double ptile_round(const struct weighted_value **wv,
55 const struct ptile_params *par);
58 /* Weighted average at y_tc2 */
59 double ptile_haverage(const struct weighted_value **wv,
60 const struct ptile_params *par);
63 /* Weighted average at y_tc1 */
64 double ptile_waverage(const struct weighted_value **wv,
65 const struct ptile_params *par);
68 /* Empirical distribution function */
69 double ptile_empirical(const struct weighted_value **wv,
70 const struct ptile_params *par);
73 /* Empirical distribution function with averaging*/
74 double ptile_aempirical(const struct weighted_value **wv,
75 const struct ptile_params *par);
80 /* Closest observation to tc1 */
82 ptile_round(const struct weighted_value **wv,
83 const struct ptile_params *par)
91 if ( wv[par->k1 + 1]->w >= 1 )
93 if ( par->g1_star < 0.5 )
96 x = wv[par->k1 + 1]->v.f;
103 x = wv[par->k1 + 1]->v.f;
110 /* Weighted average at y_tc2 */
112 ptile_haverage(const struct weighted_value **wv,
113 const struct ptile_params *par)
118 if ( par->g2_star >= 1.0 )
119 return wv[par->k2 + 1]->v.f ;
121 /* Special case for k2 + 1 >= n_data
122 (actually it's not a special case, but just avoids indexing errors )
124 if ( par->g2_star == 0 )
126 assert(par->g2 == 0 );
127 return wv[par->k2]->v.f;
130 /* Ditto for k2 < 0 */
133 a = wv[par->k2]->v.f;
136 if ( wv[par->k2 + 1]->w >= 1.0 )
137 return ( (1 - par->g2_star) * a +
138 par->g2_star * wv[par->k2 + 1]->v.f);
140 return ( (1 - par->g2) * a +
141 par->g2 * wv[par->k2 + 1]->v.f);
147 /* Weighted average at y_tc1 */
149 ptile_waverage(const struct weighted_value **wv,
150 const struct ptile_params *par)
154 if ( par->g1_star >= 1.0 )
155 return wv[par->k1 + 1]->v.f ;
159 a = wv[par->k1]->v.f;
162 if ( wv[par->k1 + 1]->w >= 1.0 )
163 return ( (1 - par->g1_star) * a +
164 par->g1_star * wv[par->k1 + 1]->v.f);
166 return ( (1 - par->g1) * a +
167 par->g1 * wv[par->k1 + 1]->v.f);
171 /* Empirical distribution function */
173 ptile_empirical(const struct weighted_value **wv,
174 const struct ptile_params *par)
176 if ( par->g1_star > 0 )
177 return wv[par->k1 + 1]->v.f;
179 return wv[par->k1]->v.f;
184 /* Empirical distribution function with averageing */
186 ptile_aempirical(const struct weighted_value **wv,
187 const struct ptile_params *par)
189 if ( par->g1_star > 0 )
190 return wv[par->k1 + 1]->v.f;
192 return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ;
197 /* Compute the percentile p */
198 double ptile(double p,
199 const struct weighted_value **wv,
202 enum pc_alg algorithm);
208 const struct weighted_value **wv,
211 enum pc_alg algorithm)
217 struct ptile_params pp;
227 for ( i = 0 ; i < n_data ; ++i )
229 if ( wv[i]->cc <= tc1 )
232 if ( wv[i]->cc <= tc2 )
240 pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w;
241 pp.g1_star = tc1 - wv[pp.k1]->cc ;
245 pp.g1 = tc1 / wv[pp.k1 + 1]->w;
250 if ( pp.k2 + 1 >= n_data )
259 pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w;
260 pp.g2_star = tc2 - wv[pp.k2]->cc ;
264 pp.g2 = tc2 / wv[pp.k2 + 1]->w;
272 result = ptile_haverage(wv, &pp);
275 result = ptile_waverage(wv, &pp);
278 result = ptile_round(wv, &pp);
281 result = ptile_empirical(wv, &pp);
284 result = ptile_aempirical(wv, &pp);
295 Calculate the values of the percentiles in pc_hash.
296 wv is a sorted array of weighted values of the data set.
299 ptiles(struct hsh_table *pc_hash,
300 const struct weighted_value **wv,
303 enum pc_alg algorithm)
305 struct hsh_iterator hi;
306 struct percentile *p;
310 for ( p = hsh_first(pc_hash, &hi);
312 p = hsh_next(pc_hash, &hi))
314 p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm);
320 /* Calculate Tukey's Hinges */
322 tukey_hinges(const struct weighted_value **wv,
329 double c_star = DBL_MAX;
335 for ( i = 0 ; i < n_data ; ++i )
337 c_star = MIN(c_star, wv[i]->w);
340 if ( c_star > 1 ) c_star = 1;
342 d = floor((w/c_star + 3 ) / 2.0)/ 2.0;
345 l[1] = w/2.0 + c_star/2.0;
346 l[2] = w + c_star - d*c_star;
352 for ( i = 0 ; i < n_data ; ++i )
354 if ( l[0] >= wv[i]->cc ) h[0] = i ;
355 if ( l[1] >= wv[i]->cc ) h[1] = i ;
356 if ( l[2] >= wv[i]->cc ) h[2] = i ;
359 for ( i = 0 ; i < 3 ; i++ )
363 a_star = l[i] - wv[h[i]]->cc ;
367 if ( h[i] + 1 >= n_data )
369 assert( a_star < 1 ) ;
370 hinge[i] = (1 - a_star) * wv[h[i]]->v.f;
375 a = a_star / ( wv[h[i] + 1]->cc ) ;
380 hinge[i] = wv[h[i] + 1]->v.f ;
384 if ( wv[h[i] + 1]->w >= 1)
386 hinge[i] = ( 1 - a_star) * wv[h[i]]->v.f
387 + a_star * wv[h[i] + 1]->v.f;
392 hinge[i] = (1 - a) * wv[h[i]]->v.f + a * wv[h[i] + 1]->v.f;
396 assert(hinge[0] <= hinge[1]);
397 assert(hinge[1] <= hinge[2]);
403 ptile_compare(const struct percentile *p1,
404 const struct percentile *p2,
412 else if (p1->p < p2->p)
421 ptile_hash(const struct percentile *p, void *aux UNUSED)
423 return hsh_hash_double(p->p);