-/* PSPP - A program for statistical analysis . -*-c-*-
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
-Copyright (C) 2004 Free Software Foundation, Inc.
-Author: John Darrington 2004
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "factor-stats.h"
-#include "percentiles.h"
-#include "misc.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-#define N_(msgid) msgid
-
-#include <assert.h>
-
-
-struct ptile_params
-{
- double g1, g1_star;
- double g2, g2_star;
- int k1, k2;
-};
-
-
-const char *ptile_alg_desc[] = {
- "",
- N_("HAverage"),
- N_("Weighted Average"),
- N_("Rounded"),
- N_("Empirical"),
- N_("Empirical with averaging")
-};
-
-
-
-
-/* Individual Percentile algorithms */
-
-/* Closest observation to tc1 */
-double ptile_round(const struct weighted_value **wv,
- const struct ptile_params *par);
-
-
-/* Weighted average at y_tc2 */
-double ptile_haverage(const struct weighted_value **wv,
- const struct ptile_params *par);
-
-
-/* Weighted average at y_tc1 */
-double ptile_waverage(const struct weighted_value **wv,
- const struct ptile_params *par);
-
-/* Empirical distribution function */
-double ptile_empirical(const struct weighted_value **wv,
- const struct ptile_params *par);
+#include "math/percentiles.h"
+#include "data/casereader.h"
+#include "data/val-type.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+#include "math/order-stats.h"
-/* Empirical distribution function with averaging*/
-double ptile_aempirical(const struct weighted_value **wv,
- const struct ptile_params *par);
-
-
-
-
-/* Closest observation to tc1 */
-double
-ptile_round(const struct weighted_value **wv,
- const struct ptile_params *par)
-{
- double x;
- double a=0;
-
- if ( par->k1 >= 0 )
- a = wv[par->k1]->v.f;
-
- if ( wv[par->k1 + 1]->w >= 1 )
- {
- if ( par->g1_star < 0.5 )
- x = a;
- else
- x = wv[par->k1 + 1]->v.f;
- }
- else
- {
- if ( par->g1 < 0.5 )
- x = a;
- else
- x = wv[par->k1 + 1]->v.f;
-
- }
-
- return x;
-}
+#include "gl/xalloc.h"
-/* Weighted average at y_tc2 */
double
-ptile_haverage(const struct weighted_value **wv,
- const struct ptile_params *par)
+percentile_calculate (const struct percentile *ptl, enum pc_alg alg)
{
+ struct percentile *mutable = CONST_CAST (struct percentile *, ptl);
+ const struct order_stats *os = &ptl->parent;
- double a=0;
+ if (ptl->g1 == SYSMIS)
+ mutable->g1 = (os->k[0].tc - os->k[0].cc) / os->k[0].c_p1;
- if ( par->g2_star >= 1.0 )
- return wv[par->k2 + 1]->v.f ;
+ if (ptl->g1_star == SYSMIS)
+ mutable->g1_star = os->k[0].tc - os->k[0].cc;
- /* Special case for k2 + 1 >= n_data
- (actually it's not a special case, but just avoids indexing errors )
- */
- if ( par->g2_star == 0 )
+ if (ptl->g2 == SYSMIS)
{
- assert(par->g2 == 0 );
- return wv[par->k2]->v.f;
- }
-
- /* Ditto for k2 < 0 */
- if ( par->k2 >= 0 )
- {
- a = wv[par->k2]->v.f;
+ if (os->k[1].c == 0)
+ mutable->g2 = os->k[1].tc / os->k[1].c_p1;
+ else if (os->k[1].c_p1 == 0)
+ mutable->g2 = 0;
+ else
+ mutable->g2 = (os->k[1].tc - os->k[1].cc) / os->k[1].c_p1;
}
- if ( wv[par->k2 + 1]->w >= 1.0 )
- return ( (1 - par->g2_star) * a +
- par->g2_star * wv[par->k2 + 1]->v.f);
- else
- return ( (1 - par->g2) * a +
- par->g2 * wv[par->k2 + 1]->v.f);
-
-}
-
-
-
-/* Weighted average at y_tc1 */
-double
-ptile_waverage(const struct weighted_value **wv,
- const struct ptile_params *par)
-{
- double a=0;
-
- if ( par->g1_star >= 1.0 )
- return wv[par->k1 + 1]->v.f ;
-
- if ( par->k1 >= 0 )
+ if (ptl->g2_star == SYSMIS)
{
- a = wv[par->k1]->v.f;
+ if (os->k[1].c == 0)
+ mutable->g2_star = os->k[1].tc;
+ else if (os->k[1].c_p1 == 0)
+ mutable->g2_star = 0;
+ else
+ mutable->g2_star = os->k[1].tc - os->k[1].cc;
}
- if ( wv[par->k1 + 1]->w >= 1.0 )
- return ( (1 - par->g1_star) * a +
- par->g1_star * wv[par->k1 + 1]->v.f);
- else
- return ( (1 - par->g1) * a +
- par->g1 * wv[par->k1 + 1]->v.f);
-}
-
-
-/* Empirical distribution function */
-double
-ptile_empirical(const struct weighted_value **wv,
- const struct ptile_params *par)
-{
- if ( par->g1_star > 0 )
- return wv[par->k1 + 1]->v.f;
- else
- return wv[par->k1]->v.f;
-}
-
-
-
-/* Empirical distribution function with averageing */
-double
-ptile_aempirical(const struct weighted_value **wv,
- const struct ptile_params *par)
-{
- if ( par->g1_star > 0 )
- return wv[par->k1 + 1]->v.f;
- else
- return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ;
-}
-
-
-
-/* Compute the percentile p */
-double ptile(double p,
- const struct weighted_value **wv,
- int n_data,
- double w,
- enum pc_alg algorithm);
-
-
-
-double
-ptile(double p,
- const struct weighted_value **wv,
- int n_data,
- double w,
- enum pc_alg algorithm)
-{
- int i;
- double tc1, tc2;
- double result;
-
- struct ptile_params pp;
-
- assert( p <= 1.0);
-
- tc1 = w * p ;
- tc2 = (w + 1) * p ;
-
- pp.k1 = -1;
- pp.k2 = -1;
-
- for ( i = 0 ; i < n_data ; ++i )
+ switch (alg)
{
- if ( wv[i]->cc <= tc1 )
- pp.k1 = i;
+ case PC_WAVERAGE:
+ if (ptl->g1_star >= 1.0)
+ return os->k[0].y_p1;
+ else
+ {
+ double a = (os->k[0].y == SYSMIS) ? 0 : os->k[0].y;
- if ( wv[i]->cc <= tc2 )
- pp.k2 = i;
-
- }
+ if (os->k[0].c_p1 >= 1.0)
+ return (1 - ptl->g1_star) * a + ptl->g1_star * os->k[0].y_p1;
+ else
+ return (1 - ptl->g1) * a + ptl->g1 * os->k[0].y_p1;
+ }
+ break;
+ case PC_ROUND:
+ {
+ double a = (os->k[0].y == SYSMIS) ? 0 : os->k[0].y;
- if ( pp.k1 >= 0 )
- {
- pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w;
- pp.g1_star = tc1 - wv[pp.k1]->cc ;
- }
- else
- {
- pp.g1 = tc1 / wv[pp.k1 + 1]->w;
- pp.g1_star = tc1 ;
- }
+ if (os->k[0].c_p1 >= 1.0)
+ return (ptl->g1_star < 0.5) ? a : os->k[0].y_p1;
+ else
+ return (ptl->g1 < 0.5) ? a : os->k[0].y_p1;
+ }
+ break;
+ case PC_EMPIRICAL:
+ if (ptl->g1_star == 0)
+ return os->k[0].y;
+ else
+ return os->k[0].y_p1;
+ break;
- if ( pp.k2 + 1 >= n_data )
- {
- pp.g2 = 0 ;
- pp.g2_star = 0;
- }
- else
- {
- if ( pp.k2 >= 0 )
+ case PC_HAVERAGE:
+ if (ptl->g2_star >= 1.0)
{
- pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w;
- pp.g2_star = tc2 - wv[pp.k2]->cc ;
+ return os->k[1].y_p1;
}
else
{
- pp.g2 = tc2 / wv[pp.k2 + 1]->w;
- pp.g2_star = tc2 ;
+ double a = (os->k[1].y == SYSMIS) ? 0 : os->k[1].y;
+
+ if (os->k[1].c_p1 >= 1.0)
+ {
+ if (ptl->g2_star == 0)
+ return os->k[1].y;
+
+ return (1 - ptl->g2_star) * a + ptl->g2_star * os->k[1].y_p1;
+ }
+ else
+ {
+ return (1 - ptl->g2) * a + ptl->g2 * os->k[1].y_p1;
+ }
}
- }
- switch ( algorithm )
- {
- case PC_HAVERAGE:
- result = ptile_haverage(wv, &pp);
- break;
- case PC_WAVERAGE:
- result = ptile_waverage(wv, &pp);
- break;
- case PC_ROUND:
- result = ptile_round(wv, &pp);
- break;
- case PC_EMPIRICAL:
- result = ptile_empirical(wv, &pp);
break;
+
case PC_AEMPIRICAL:
- result = ptile_aempirical(wv, &pp);
+ if (ptl->g1_star == 0)
+ return (os->k[0].y + os->k[0].y_p1)/ 2.0;
+ else
+ return os->k[0].y_p1;
break;
+
default:
- result = SYSMIS;
+ NOT_REACHED ();
+ break;
}
- return result;
+ NOT_REACHED ();
+
+ return SYSMIS;
}
-/*
- Calculate the values of the percentiles in pc_hash.
- wv is a sorted array of weighted values of the data set.
-*/
-void
-ptiles(struct hsh_table *pc_hash,
- const struct weighted_value **wv,
- int n_data,
- double w,
- enum pc_alg algorithm)
+static void
+destroy (struct statistic *stat)
{
- struct hsh_iterator hi;
- struct percentile *p;
-
- if ( !pc_hash )
- return ;
- for ( p = hsh_first(pc_hash, &hi);
- p != 0 ;
- p = hsh_next(pc_hash, &hi))
- {
- p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm);
- }
-
+ struct percentile *ptl = UP_CAST (stat, struct percentile, parent.parent);
+ struct order_stats *os = &ptl->parent;
+ free (os->k);
+ free (ptl);
}
-/* Calculate Tukey's Hinges */
-void
-tukey_hinges(const struct weighted_value **wv,
- int n_data,
- double w,
- double hinge[3]
- )
+struct percentile *
+percentile_create (double p, double W)
{
- int i;
- double c_star = DBL_MAX;
- double d;
- double l[3];
- int h[3];
- double a, a_star;
-
- for ( i = 0 ; i < n_data ; ++i )
- {
- c_star = min(c_star, wv[i]->w);
- }
-
- if ( c_star > 1 ) c_star = 1;
-
- d = floor((w/c_star + 3 ) / 2.0)/ 2.0;
-
- l[0] = d*c_star;
- l[1] = w/2.0 + c_star/2.0;
- l[2] = w + c_star - d*c_star;
-
- h[0]=-1;
- h[1]=-1;
- h[2]=-1;
-
- for ( i = 0 ; i < n_data ; ++i )
- {
- if ( l[0] >= wv[i]->cc ) h[0] = i ;
- if ( l[1] >= wv[i]->cc ) h[1] = i ;
- if ( l[2] >= wv[i]->cc ) h[2] = i ;
- }
-
- for ( i = 0 ; i < 3 ; i++ )
- {
-
- if ( h[i] >= 0 )
- a_star = l[i] - wv[h[i]]->cc ;
- else
- a_star = l[i];
-
- if ( h[i] + 1 >= n_data )
- {
- assert( a_star < 1 ) ;
- hinge[i] = (1 - a_star) * wv[h[i]]->v.f;
- continue;
- }
- else
- {
- a = a_star / ( wv[h[i] + 1]->cc ) ;
- }
+ struct percentile *ptl = XZALLOC (struct percentile);
+ struct order_stats *os = &ptl->parent;
+ struct statistic *stat = &os->parent;
- if ( a_star >= 1.0 )
- {
- hinge[i] = wv[h[i] + 1]->v.f ;
- continue;
- }
+ assert (p >= 0);
+ assert (p <= 1.0);
- if ( wv[h[i] + 1]->w >= 1)
- {
- hinge[i] = ( 1 - a_star) * wv[h[i]]->v.f
- + a_star * wv[h[i] + 1]->v.f;
+ ptl->ptile = p;
+ ptl->w = W;
- continue;
- }
+ os->n_k = 2;
+ os->k = xcalloc (2, sizeof (*os->k));
+ os->k[0].tc = W * p;
+ os->k[1].tc = (W + 1.0) * p;
- hinge[i] = (1 - a) * wv[h[i]]->v.f + a * wv[h[i] + 1]->v.f;
-
- }
+ ptl->g1 = ptl->g1_star = SYSMIS;
+ ptl->g2 = ptl->g2_star = SYSMIS;
- assert(hinge[0] <= hinge[1]);
- assert(hinge[1] <= hinge[2]);
+ os->k[1].y_p1 = os->k[1].y = SYSMIS;
+ os->k[0].y_p1 = os->k[0].y = SYSMIS;
-}
+ stat->destroy = destroy;
-
-int
-ptile_compare(const struct percentile *p1,
- const struct percentile *p2,
- void *aux UNUSED)
-{
-
- int cmp;
-
- if ( p1->p == p2->p)
- cmp = 0 ;
- else if (p1->p < p2->p)
- cmp = -1 ;
- else
- cmp = +1;
-
- return cmp;
+ return ptl;
}
-unsigned
-ptile_hash(const struct percentile *p, void *aux UNUSED)
-{
- return hsh_hash_double(p->p);
-}
-
-