From c649f7f5c4f5b74d48efa3453c38c1458e7ae9a9 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 3 Sep 2009 20:09:36 -0700 Subject: [PATCH] T-TEST: Avoid NaN in paired-sample correlation significance calcuation. Sometimes the correlation comes out just above 1.0 due to inaccuracy of machine calculation, which causes sqrt(1-corr**2) to yield NaN. This commit forces the correlation into the valid range, fixing the problem. Thanks to Matej Cepl for reporting the problem. Thanks to John Darrington for suggesting the fix. --- src/language/stats/t-test.q | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index c69a6cdb..dd40de99 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1432,9 +1432,16 @@ pscbox (const struct dictionary *dict) double df = pairs[i].n -2; + /* pairs[i].correlation is a correlation, so mathematically it will + always be in the range [-1.0, 1.0]. Inaccurate calculations sometimes + cause it to be slightly greater than 1.0, however, which makes the + sqrt() below to come out as NaN instead of 0. So force it to be 1.0 + or less. */ + double corr = MIN (1.0, pairs[i].correlation); + double correlation_t = pairs[i].correlation * sqrt (df) / - sqrt (1 - pow2 (pairs[i].correlation)); + sqrt (1 - pow2 (corr)); /* row headings */ -- 2.30.2