From 26ff6fce95fb38f0986017dd922c36d4b1b239c7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 11 Mar 2011 22:10:54 -0800 Subject: [PATCH] FREQUENCIES: Fix percentiles calculation. The condition for using a variate directly instead of interpolating was just wrong. It would interpolate in cases where it clearly should not, which produced incorrect results in many cases. Thanks to Fabio Bordignon for reporting the problem and supplying a simple test case. --- src/language/stats/frequencies.q | 5 ++-- tests/language/dictionary/weight.at | 2 +- tests/language/stats/frequencies.at | 41 +++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index ecaefdf6..adc4f16b 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -922,8 +922,7 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) if (rank <= tp) break; - if (f->count > 1 - && (rank - (f->count - 1) > tp || f + 1 >= ft->missing)) + if (tp + 1 < rank || f + 1 >= ft->missing) pc->value = f->value.f; else pc->value = calc_percentile (pc->p, W, f->value.f, f[1].value.f); diff --git a/tests/language/dictionary/weight.at b/tests/language/dictionary/weight.at index 0eb55bc7..40226924 100644 --- a/tests/language/dictionary/weight.at +++ b/tests/language/dictionary/weight.at @@ -146,6 +146,6 @@ Range,,76.000 Minimum,,18.000 Maximum,,94.000 Sum,,23006.00 -Percentiles,50 (Median),29 +Percentiles,50 (Median),28 ]) AT_CLEANUP diff --git a/tests/language/stats/frequencies.at b/tests/language/stats/frequencies.at index cfd992a5..8aaba06c 100644 --- a/tests/language/stats/frequencies.at +++ b/tests/language/stats/frequencies.at @@ -419,6 +419,47 @@ Percentiles,0,1.00 ]) AT_CLEANUP +dnl Data for this test case from Fabio Bordignon . +AT_SETUP([FREQUENCIES enhanced percentiles, weighted (3)]) +AT_DATA([frequencies.sps], + [DATA LIST LIST notable /X * F *. +BEGIN DATA. +1 7 +2 16 +3 12 +4 5 +END DATA. + +WEIGHT BY f. + +FREQUENCIES + VAR=x + /PERCENTILES = 0 25 50 75 100. +]) +AT_CHECK([pspp -O format=csv frequencies.sps], [0], [dnl +Table: X +Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent +,1.00,7.00,17.50,17.50,17.50 +,2.00,16.00,40.00,40.00,57.50 +,3.00,12.00,30.00,30.00,87.50 +,4.00,5.00,12.50,12.50,100.00 +Total,,40.00,100.0,100.0, + +Table: X +N,Valid,40.00 +,Missing,.00 +Mean,,2.38 +Std Dev,,.93 +Minimum,,1.00 +Maximum,,4.00 +Percentiles,0,1.00 +,25,2.00 +,50 (Median),2.00 +,75,3.00 +,100,4.00 +]) +AT_CLEANUP + AT_SETUP([FREQUENCIES enhanced percentiles, weighted, missing values]) AT_DATA([frequencies.sps], [DATA LIST LIST notable /X * F *. -- 2.30.2