From ef28191d9249de6ba14312b4faabcb11d382cdb1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 5 May 2014 23:35:44 -0700 Subject: [PATCH] CROSSTABS: Don't calculate ASE for symmetric Somers' d. I couldn't figure out how to get the formula to match the SPSS results. Also, add several tests for Somers' D and other ordinal tests. --- doc/statistics.texi | 7 +- src/language/stats/crosstabs.q | 6 +- tests/language/stats/crosstabs.at | 345 +++++++++++++++++++++++++++++- 3 files changed, 348 insertions(+), 10 deletions(-) diff --git a/doc/statistics.texi b/doc/statistics.texi index 2e29b047ef..6ae9da9577 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -605,11 +605,8 @@ following bugs: @item Significance of symmetric and directional measures is not calculated. @item -Asymptotic standard error is not calculated for asymmetric lambda. -@item -ASE of Goodman and Kruskal's tau is not calculated. -@item -ASE of symmetric somers' d is wrong. +Asymptotic standard error is not calculated for asymmetric lambda, +Goodman and Kruskal's tau, or symmetric Somers' d. @item Approximate T of uncertainty coefficient is wrong. @end itemize diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 21947ee457..e3f5aee1d5 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -18,8 +18,8 @@ - How to calculate significance of symmetric and directional measures? - How to calculate ASE for asymmetric lambda? - - ASE of Goodman and Kruskal's tau is not calculated. - - ASE of symmetric somers' d is wrong. + - How to calculate ASE for symmetric Somers' d? + - How to calculate ASE for Goodman and Kruskal's tau? - Approx. T of uncertainty coefficient is wrong. */ @@ -2535,7 +2535,7 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, if (proc->statistics & (1u << CRS_ST_D)) { somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr)); - somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc); + somers_d_ase[0] = SYSMIS; somers_d_t[0] = (somers_d_v[0] / (4 / (Dc + Dr) * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); diff --git a/tests/language/stats/crosstabs.at b/tests/language/stats/crosstabs.at index ce95d75e9f..c33857b8f2 100644 --- a/tests/language/stats/crosstabs.at +++ b/tests/language/stats/crosstabs.at @@ -384,7 +384,7 @@ z,Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ,,Uncertainty Coefficient,Symmetric,.47,.18,1.51, ,,,x Dependent,.31,.15,2.02, ,,,y Dependent,1.00,.00,2.02, -,Ordinal by Ordinal,Somers' d,Symmetric,.00,.09,.00, +,Ordinal by Ordinal,Somers' d,Symmetric,.00,,.00, ,,,x Dependent,.00,.50,.00, ,,,y Dependent,.00,.20,.00, ,Nominal by Interval,Eta,x Dependent,.04,,, @@ -397,7 +397,7 @@ z,Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ,,Uncertainty Coefficient,Symmetric,.58,.17,1.56, ,,,x Dependent,.41,.17,2.36, ,,,y Dependent,1.00,.00,2.36, -,Ordinal by Ordinal,Somers' d,Symmetric,-.67,.04,-1.73, +,Ordinal by Ordinal,Somers' d,Symmetric,-.67,,-1.73, ,,,x Dependent,-1.00,.00,-1.73, ,,,y Dependent,-.50,.29,-1.73, ,Nominal by Interval,Eta,x Dependent,.73,,, @@ -1065,3 +1065,344 @@ Nominal by Nominal,Lambda,Symmetric,.208,.010,18.793, ,,y Dependent,.081,,, ]) AT_CLEANUP + +AT_SETUP([CROSSTABS Somers' D, Tau-B, Tau-C, Gamma]) +AT_DATA([somersd.sps], [dnl +SET FORMAT F8.3. + +* From http://stats.stackexchange.com/questions/72203/problem-with-calculating-asymptotic-standard-error-for-somers-d. +DATA LIST LIST NOTABLE/x y * w (F10.6). +WEIGHT BY w. +BEGIN DATA. +1 1 0.000025 +1 2 0.0001 +1 3 0.001 +1 4 0.0025 +1 5 0.004 +1 6 0.0075 +1 7 0.0125 +2 1 0.049975 +2 2 0.0999 +2 3 0.199 +2 4 0.2475 +2 5 0.196 +2 6 0.1425 +2 7 0.0375 +END DATA. +CROSSTABS x BY y/STATISTICS=D/CELLS=NONE. + +* From http://uregina.ca/~gingrich/gamma.pdf. +DATA LIST LIST NOTABLE/x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 34 +1 2 24 +1 3 15 +2 1 42 +2 2 74 +2 3 67 +3 1 28 +3 2 111 +3 3 292 +END DATA. +CROSSTABS x BY y/STATISTICS=BTAU CTAU GAMMA D/CELLS=NONE. +]) +AT_CHECK([pspp -O format=csv somersd.sps], [0], [dnl +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,1.000000,100.0%,.000000,0.0%,1.000000,100.0% + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,-.084,,-.149, +,,x Dependent,-.045,.300,-.149, +,,y Dependent,-.684,2.378,-.149, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,687.000,100.0%,.000,0.0%,687.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,.372,.033,10.669, +,Kendall's tau-c,.310,.029,10.669, +,Gamma,.591,.043,10.669, +N of Valid Cases,,687.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,.371,,10.669, +,,x Dependent,.351,.032,10.669, +,,y Dependent,.394,.035,10.669, +]) + +AT_DATA([ordinal.sps], [dnl +SET FORMAT F8.3. + +* From https://www.iup.edu/WorkArea/DownloadAsset.aspx?id=9829, "Case 1". +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 2 40 +2 3 80 +3 4 30 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 2. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 40 +2 3 80 +3 4 30 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 3. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 4 40 +2 3 80 +3 2 30 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 4. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 20 +1 2 20 +2 3 80 +3 4 30 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 5. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 2 40 +2 2 80 +3 2 29 +3 3 1 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 6. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 3 +1 2 6 +1 3 28 +1 4 61 +2 1 4 +2 2 5 +2 3 21 +2 4 20 +END DATA. +CROSSTABS x BY y/STATISTICS=GAMMA D BTAU/CELLS=NONE. + +* Same site, case 7. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 38 +1 2 6 +1 3 3 +1 4 51 +2 1 4 +2 2 20 +2 3 21 +2 4 5 +END DATA. +CROSSTABS x BY y/STATISTICS=LAMBDA D PHI GAMMA/CELLS=NONE. + +* Same site, case 8. +DATA LIST LIST NOTABLE /x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 2 +1 2 3 +1 3 5 +1 4 1 +2 1 2 +2 2 16 +2 3 3 +2 4 6 +3 1 3 +3 2 10 +3 3 35 +3 4 27 +4 1 6 +4 2 15 +4 3 33 +4 4 45 +END DATA. +CROSSTABS x BY y/STATISTICS=LAMBDA D PHI BTAU/CELLS=NONE. +]) +AT_CHECK([pspp -O format=csv ordinal.sps], [0], [dnl +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,150.000,100.0%,.000,0.0%,150.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,1.000,.000,24.841, +,Gamma,1.000,.000,24.841, +N of Valid Cases,,150.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,1.000,,24.841, +,,x Dependent,1.000,.000,24.841, +,,y Dependent,1.000,.000,24.841, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,150.000,100.0%,.000,0.0%,150.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,1.000,.000,24.841, +,Gamma,1.000,.000,24.841, +N of Valid Cases,,150.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,1.000,,24.841, +,,x Dependent,1.000,.000,24.841, +,,y Dependent,1.000,.000,24.841, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,150.000,100.0%,.000,0.0%,150.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,-1.000,.000,-24.841, +,Gamma,-1.000,.000,-24.841, +N of Valid Cases,,150.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,-1.000,,-24.841, +,,x Dependent,-1.000,.000,-24.841, +,,y Dependent,-1.000,.000,-24.841, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,150.000,100.0%,.000,0.0%,150.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,.972,.007,24.841, +,Gamma,1.000,.000,24.841, +N of Valid Cases,,150.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,.971,,24.841, +,,x Dependent,.944,.013,24.841, +,,y Dependent,1.000,.000,24.841, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,150.000,100.0%,.000,0.0%,150.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,.119,.059,1.009, +,Gamma,1.000,.000,1.009, +N of Valid Cases,,150.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,.035,,1.009, +,,x Dependent,.805,.032,1.009, +,,y Dependent,.018,.017,1.009, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,148.000,100.0%,.000,0.0%,148.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Kendall's tau-b,-.208,.078,-2.641, +,Gamma,-.381,.130,-2.641, +N of Valid Cases,,148.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Somers' d,Symmetric,-.206,,-2.641, +,,x Dependent,-.182,.069,-2.641, +,,y Dependent,-.237,.089,-2.641, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,148.000,100.0%,.000,0.0%,148.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Phi,.731,,, +,Cramer's V,.731,,, +Ordinal by Ordinal,Gamma,-.110,.107,-1.022, +N of Valid Cases,,148.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Lambda,Symmetric,.338,.059,4.743, +,,x Dependent,.640,,4.875, +,,y Dependent,.174,,3.248, +,Goodman and Kruskal tau,x Dependent,.534,,, +,,y Dependent,.167,,, +Ordinal by Ordinal,Somers' d,Symmetric,-.074,,-1.022, +,,x Dependent,-.060,.059,-1.022, +,,y Dependent,-.096,.094,-1.022, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,212.000,100.0%,.000,0.0%,212.000,100.0% + +Table: Symmetric measures. +Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Phi,.432,,, +,Cramer's V,.249,,, +Ordinal by Ordinal,Kendall's tau-b,.209,.062,3.338, +N of Valid Cases,,212.000,,, + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Lambda,Symmetric,.102,.067,1.473, +,,x Dependent,.027,,.302, +,,y Dependent,.165,,2.349, +,Goodman and Kruskal tau,x Dependent,.051,,, +,,y Dependent,.068,,, +Ordinal by Ordinal,Somers' d,Symmetric,.209,,3.338, +,,x Dependent,.202,.060,3.338, +,,y Dependent,.217,.064,3.338, +]) +AT_CLEANUP -- 2.30.2