From 24c05c4eafa2fa462bae17b45c9f58d0fb2a09c7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 4 May 2014 13:03:38 -0700 Subject: [PATCH] CROSSTABS: Fix T values for Spearman's R and Pearson's R. I verified that the values are correct using this online calculator: http://www.danielsoper.com/statcalc3/calc.aspx?id=61 --- doc/statistics.texi | 2 - src/language/stats/crosstabs.q | 13 ++- tests/language/stats/crosstabs.at | 136 ++++++++++++++++-------------- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/doc/statistics.texi b/doc/statistics.texi index c13528aa42..afa3b1aef2 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -603,8 +603,6 @@ followings bugs: @itemize @bullet @item -T values for Spearman's R and Pearson's R are wrong. -@item Significance of symmetric and directional measures is not calculated. @item Asymmetric ASEs and T values for lambda are wrong. diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 5095b74715..e8f6aa5833 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -16,7 +16,6 @@ /* FIXME: - - T values for Spearman's R and Pearson's R are wrong. - How to calculate significance of symmetric and directional measures? - Asymmetric ASEs and T values for lambda are wrong. - ASE of Goodman and Kruskal's tau is not calculated. @@ -2265,12 +2264,12 @@ calc_chisq (struct pivot_table *pt, } } -/* Calculate the value of Pearson's r. r is stored into R, ase_1 into - ASE_1, and ase_0 into ASE_0. The row and column values must be +/* Calculate the value of Pearson's r. r is stored into R, its T value into + T, and standard error into ERROR. The row and column values must be passed in PT and Y. */ static void calc_r (struct pivot_table *pt, - double *PT, double *Y, double *r, double *ase_0, double *ase_1) + double *PT, double *Y, double *r, double *t, double *error) { double SX, SY, S, T; double Xbar, Ybar; @@ -2308,7 +2307,7 @@ calc_r (struct pivot_table *pt, SY = sum_Y2c - pow2 (sum_Yc) / pt->total; T = sqrt (SX * SY); *r = S / T; - *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / pt->total) / (sum_X2r * sum_Y2c)); + *t = *r / sqrt (1 - pow2 (*r)) * sqrt (pt->total - 2); { double s, c, y, t; @@ -2329,7 +2328,7 @@ calc_r (struct pivot_table *pt, c = (t - s) - y; s = t; } - *ase_1 = sqrt (s) / (T * T); + *error = sqrt (s) / (T * T); } } @@ -2596,13 +2595,11 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, } calc_r (pt, R, C, &v[6], &t[6], &ase[6]); - t[6] = v[6] / t[6]; free (R); free (C); calc_r (pt, (double *) pt->rows, (double *) pt->cols, &v[7], &t[7], &ase[7]); - t[7] = v[7] / t[7]; } /* Cohen's kappa. */ diff --git a/tests/language/stats/crosstabs.at b/tests/language/stats/crosstabs.at index 4dbd0bdc78..ec2d313add 100644 --- a/tests/language/stats/crosstabs.at +++ b/tests/language/stats/crosstabs.at @@ -362,7 +362,7 @@ z,Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ,,Kendall's tau-c,.00,.32,.00, ,,Gamma,.00,.50,.00, ,,Spearman Correlation,.00,.22,.00, -,Interval by Interval,Pearson's R,.04,.22,.18, +,Interval by Interval,Pearson's R,.04,.22,.07, ,N of Valid Cases,,5,,, 2,Nominal by Nominal,Phi,1.00,,, ,,Cramer's V,1.00,,, @@ -370,8 +370,8 @@ z,Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ,Ordinal by Ordinal,Kendall's tau-b,-.71,.20,-1.73, ,,Kendall's tau-c,-.75,.43,-1.73, ,,Gamma,-1.00,.00,-1.73, -,,Spearman Correlation,-.77,.17,-6.77, -,Interval by Interval,Pearson's R,-.73,.18,-5.49, +,,Spearman Correlation,-.77,.17,-1.73, +,Interval by Interval,Pearson's R,-.73,.18,-1.49, ,N of Valid Cases,,4,,, Table: Directional measures. @@ -735,6 +735,8 @@ AT_CLEANUP AT_SETUP([CROSSTABS Pearson's R]) # Test 1. AT_DATA([pearson.sps], [dnl +SET FORMAT F8.3. + * From http://www.statisticslectures.com/topics/pearsonr/. DATA LIST FREE/x y. BEGIN DATA. @@ -755,22 +757,24 @@ x * y,5,100.0%,0,0.0%,5,100.0% Table: x * y [[count]]. ,y,,,,, -x,4.00,6.00,10.00,12.00,13.00,Total -1.00,1.00,.00,.00,.00,.00,1.00 -3.00,.00,1.00,.00,.00,.00,1.00 -5.00,.00,.00,1.00,1.00,.00,2.00 -6.00,.00,.00,.00,.00,1.00,1.00 -Total,1.00,1.00,1.00,1.00,1.00,5.00 +x,4.000,6.000,10.000,12.000,13.000,Total +1.000,1.000,.000,.000,.000,.000,1.000 +3.000,.000,1.000,.000,.000,.000,1.000 +5.000,.000,.000,1.000,1.000,.000,2.000 +6.000,.000,.000,.000,.000,1.000,1.000 +Total,1.000,1.000,1.000,1.000,1.000,5.000 Table: Symmetric measures. Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -Ordinal by Ordinal,Spearman Correlation,.97,.02,2.84, -Interval by Interval,Pearson's R,.97,.02,3.36, +Ordinal by Ordinal,Spearman Correlation,.975,.022,7.550, +Interval by Interval,Pearson's R,.968,.017,6.708, N of Valid Cases,,5,,, ]) # Test 2. AT_DATA([pearson2.sps], [dnl +SET FORMAT F8.3. + * Checked with http://www.socscistatistics.com/tests/pearson/Default2.aspx. DATA LIST FREE/x y. BEGIN DATA. @@ -796,28 +800,30 @@ x * y,10,100.0%,0,0.0%,10,100.0% Table: x * y [[count]]. ,y,,,,,,,,, -x,1.50,4.00,5.00,6.00,6.50,7.00,9.00,10.50,11.00,Total -1.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -2.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -3.00,.00,1.00,.00,.00,.00,.00,.00,.00,.00,1.00 -4.00,.00,.00,.00,1.00,.00,.00,.00,.00,.00,1.00 -5.00,.00,.00,1.00,.00,.00,.00,.00,.00,.00,1.00 -6.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,1.00 -7.00,.00,.00,.00,.00,1.00,.00,.00,.00,.00,1.00 -8.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,1.00 -9.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,1.00 -10.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,1.00 -Total,2.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,10.00 +x,1.500,4.000,5.000,6.000,6.500,7.000,9.000,10.500,11.000,Total +1.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +2.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +3.000,.000,1.000,.000,.000,.000,.000,.000,.000,.000,1.000 +4.000,.000,.000,.000,1.000,.000,.000,.000,.000,.000,1.000 +5.000,.000,.000,1.000,.000,.000,.000,.000,.000,.000,1.000 +6.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,1.000 +7.000,.000,.000,.000,.000,1.000,.000,.000,.000,.000,1.000 +8.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,1.000 +9.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,1.000 +10.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,1.000 +Total,2.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,10.000 Table: Symmetric measures. Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -Ordinal by Ordinal,Spearman Correlation,.97,.02,3.66, -Interval by Interval,Pearson's R,.97,.02,3.69, +Ordinal by Ordinal,Spearman Correlation,.973,.015,11.844, +Interval by Interval,Pearson's R,.971,.017,11.580, N of Valid Cases,,10,,, ]) # Test 3. AT_DATA([pearson3.sps], [dnl +SET FORMAT F8.3. + * From http://learntech.uwe.ac.uk/da/Default.aspx?pageid=1442. DATA LIST FREE/x y. BEGIN DATA. @@ -842,25 +848,27 @@ x * y,9,100.0%,0,0.0%,9,100.0% Table: x * y [[count]]. ,y,,,,,,,, -x,28.00,58.00,66.00,75.00,85.00,87.00,91.00,122.00,Total -25.00,1.00,.00,.00,.00,.00,.00,.00,.00,1.00 -35.00,.00,1.00,.00,.00,.00,.00,.00,.00,1.00 -44.00,.00,.00,1.00,.00,.00,.00,.00,.00,1.00 -50.00,.00,.00,.00,1.00,.00,.00,.00,.00,1.00 -56.00,.00,.00,.00,.00,.00,1.00,1.00,.00,2.00 -65.00,.00,.00,.00,.00,1.00,.00,1.00,.00,2.00 -87.00,.00,.00,.00,.00,.00,.00,.00,1.00,1.00 -Total,1.00,1.00,1.00,1.00,1.00,1.00,2.00,1.00,9.00 +x,28.000,58.000,66.000,75.000,85.000,87.000,91.000,122.000,Total +25.000,1.000,.000,.000,.000,.000,.000,.000,.000,1.000 +35.000,.000,1.000,.000,.000,.000,.000,.000,.000,1.000 +44.000,.000,.000,1.000,.000,.000,.000,.000,.000,1.000 +50.000,.000,.000,.000,1.000,.000,.000,.000,.000,1.000 +56.000,.000,.000,.000,.000,.000,1.000,1.000,.000,2.000 +65.000,.000,.000,.000,.000,1.000,.000,1.000,.000,2.000 +87.000,.000,.000,.000,.000,.000,.000,.000,1.000,1.000 +Total,1.000,1.000,1.000,1.000,1.000,1.000,2.000,1.000,9.000 Table: Symmetric measures. Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -Ordinal by Ordinal,Spearman Correlation,.91,.07,3.45, -Interval by Interval,Pearson's R,.97,.02,5.00, +Ordinal by Ordinal,Spearman Correlation,.911,.068,5.860, +Interval by Interval,Pearson's R,.966,.017,9.915, N of Valid Cases,,9,,, ]) # Test 4. AT_DATA([pearson4.sps], [dnl +SET FORMAT F8.3. + * From http://psychology.ucdavis.edu/faculty_sites/sommerb/sommerdemo/correlation/hand/pearson_hand.htm. DATA LIST FREE/x y. BEGIN DATA. @@ -888,27 +896,29 @@ x * y,12,100.0%,0,0.0%,12,100.0% Table: x * y [[count]]. ,y,,,,,,,,,,,, -x,2.00,4.00,5.00,7.00,9.00,11.00,12.00,14.00,15.00,17.00,18.00,20.00,Total -2.00,.00,.00,.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -3.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,.00,.00,1.00 -4.00,.00,.00,.00,.00,1.00,1.00,.00,.00,.00,.00,.00,.00,2.00 -5.00,.00,.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -6.00,1.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,2.00 -7.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,.00,1.00 -8.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,1.00 -9.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,1.00 -10.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,1.00,2.00 -Total,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,12.00 +x,2.000,4.000,5.000,7.000,9.000,11.000,12.000,14.000,15.000,17.000,18.000,20.000,Total +2.000,.000,.000,.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +3.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,.000,.000,1.000 +4.000,.000,.000,.000,.000,1.000,1.000,.000,.000,.000,.000,.000,.000,2.000 +5.000,.000,.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +6.000,1.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,2.000 +7.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,.000,1.000 +8.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,1.000 +9.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,1.000 +10.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,1.000,2.000 +Total,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,12.000 Table: Symmetric measures. Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -Ordinal by Ordinal,Spearman Correlation,.66,.14,2.59, -Interval by Interval,Pearson's R,.67,.13,2.93, +Ordinal by Ordinal,Spearman Correlation,.657,.140,2.758, +Interval by Interval,Pearson's R,.667,.132,2.830, N of Valid Cases,,12,,, ]) # Test 5. AT_DATA([pearson5.sps], [dnl +SET FORMAT F8.3. + * From http://www.statisticslectures.com/topics/pearsonr/. DATA LIST FREE/x y. BEGIN DATA. @@ -934,23 +944,23 @@ x * y,10,100.0%,0,0.0%,10,100.0% Table: x * y [[count]]. ,y,,,,,,,,,, -x,15000.00,26000.00,29000.00,32000.00,33000.00,41000.00,45000.00,52000.00,68000.00,80000.00,Total -18.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -24.00,.00,1.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00 -25.00,.00,.00,1.00,.00,.00,.00,.00,.00,.00,.00,1.00 -26.00,.00,.00,.00,1.00,.00,.00,.00,.00,.00,.00,1.00 -33.00,.00,.00,.00,.00,1.00,.00,.00,.00,.00,.00,1.00 -37.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,.00,1.00 -40.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,.00,1.00 -45.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,.00,1.00 -57.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,.00,1.00 -64.00,.00,.00,.00,.00,.00,.00,.00,.00,.00,1.00,1.00 -Total,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,1.00,10.00 +x,15000.000,26000.000,29000.000,32000.000,33000.000,41000.000,45000.000,52000.000,68000.000,80000.000,Total +18.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +24.000,.000,1.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000 +25.000,.000,.000,1.000,.000,.000,.000,.000,.000,.000,.000,1.000 +26.000,.000,.000,.000,1.000,.000,.000,.000,.000,.000,.000,1.000 +33.000,.000,.000,.000,.000,1.000,.000,.000,.000,.000,.000,1.000 +37.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,.000,1.000 +40.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,.000,1.000 +45.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,.000,1.000 +57.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,.000,1.000 +64.000,.000,.000,.000,.000,.000,.000,.000,.000,.000,1.000,1.000 +Total,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,10.000 Table: Symmetric measures. Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -Ordinal by Ordinal,Spearman Correlation,1.00,.00,3.76, -Interval by Interval,Pearson's R,.99,.00,3.86, +Ordinal by Ordinal,Spearman Correlation,1.000,.000,+Infinity, +Interval by Interval,Pearson's R,.992,.004,22.638, N of Valid Cases,,10,,, ]) AT_CLEANUP -- 2.30.2