From 1bd3c7fb84c7ab0db1bf8c637f2b9b3591e846c6 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 5 May 2014 21:48:13 -0700 Subject: [PATCH] CROSSTABS: Drop buggy ASE for asymmetric lambda; fix T for symmetric lambda. I really can't figure out how SPSS calculates the ASE for asymmetric lambda, despite hours of effort. --- doc/statistics.texi | 4 +- src/language/stats/crosstabs.q | 40 ++--------- tests/language/stats/crosstabs.at | 113 ++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 42 deletions(-) diff --git a/doc/statistics.texi b/doc/statistics.texi index afa3b1aef2..2e29b047ef 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -599,13 +599,13 @@ some statistics are calculated only in integer mode. @subcmd{STATISTICS} subcommand is not given, no statistics are calculated. @strong{Please note:} Currently the implementation of @cmd{CROSSTABS} has the -followings bugs: +following bugs: @itemize @bullet @item Significance of symmetric and directional measures is not calculated. @item -Asymmetric ASEs and T values for lambda are wrong. +Asymptotic standard error is not calculated for asymmetric lambda. @item ASE of Goodman and Kruskal's tau is not calculated. @item diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index e8f6aa5833..21947ee457 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -17,7 +17,7 @@ /* FIXME: - How to calculate significance of symmetric and directional measures? - - Asymmetric ASEs and T values for lambda are wrong. + - How to calculate ASE for asymmetric lambda? - ASE of Goodman and Kruskal's tau is not calculated. - ASE of symmetric somers' d is wrong. - Approx. T of uncertainty coefficient is wrong. @@ -2800,22 +2800,8 @@ calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, v[1] = (sum_fmj - rm) / (pt->total - rm); v[2] = (sum_fim - cm) / (pt->total - cm); - /* ASE1 for Y given PT. */ - { - double accum; - - for (accum = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) - { - const int deltaj = j == cm_index; - accum += (pt->mat[j + i * pt->n_cols] - * pow2 ((j == fim_index[i]) - - deltaj - + v[0] * deltaj)); - } - - ase[2] = sqrt (accum - pt->total * v[0]) / (pt->total - cm); - } + /* XXX We don't have a working formula for ASE1. */ + ase[2] = SYSMIS; /* ASE0 for Y given PT. */ { @@ -2828,22 +2814,8 @@ calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / pt->total) / (pt->total - cm)); } - /* ASE1 for PT given Y. */ - { - double accum; - - for (accum = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) - { - const int deltaj = i == rm_index; - accum += (pt->mat[j + i * pt->n_cols] - * pow2 ((i == fmj_index[j]) - - deltaj - + v[0] * deltaj)); - } - - ase[1] = sqrt (accum - pt->total * v[0]) / (pt->total - rm); - } + /* XXX We don't have a working formula for ASE1. */ + ase[1] = SYSMIS; /* ASE0 for PT given Y. */ { @@ -2871,7 +2843,7 @@ calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, * pow2 (temp0 + (v[0] - 1.) * temp1)); } ase[0] = sqrt (accum1 - 4. * pt->total * v[0] * v[0]) / (2. * pt->total - rm - cm); - t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / pt->total)) + t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / pt->total) / (2. * pt->total - rm - cm)); } diff --git a/tests/language/stats/crosstabs.at b/tests/language/stats/crosstabs.at index ec2d313add..ce95d75e9f 100644 --- a/tests/language/stats/crosstabs.at +++ b/tests/language/stats/crosstabs.at @@ -376,9 +376,9 @@ z,Category,Statistic,Value,Asymp. Std. Error,Approx. T,Approx. Sig. Table: Directional measures. z,Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. -1,Nominal by Nominal,Lambda,Symmetric,.40,.28,1.02, -,,,x Dependent,.25,NaN,1.12, -,,,y Dependent,1.00,NaN,1.12, +1,Nominal by Nominal,Lambda,Symmetric,.40,.28,1.12, +,,,x Dependent,.25,,1.12, +,,,y Dependent,1.00,,1.12, ,,Goodman and Kruskal tau,x Dependent,.25,,, ,,,y Dependent,1.00,,, ,,Uncertainty Coefficient,Symmetric,.47,.18,1.51, @@ -389,9 +389,9 @@ z,Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ,,,y Dependent,.00,.20,.00, ,Nominal by Interval,Eta,x Dependent,.04,,, ,,,y Dependent,1.00,,, -2,Nominal by Nominal,Lambda,Symmetric,.50,.25,1.51, -,,,x Dependent,.33,NaN,1.15, -,,,y Dependent,1.00,NaN,1.15, +2,Nominal by Nominal,Lambda,Symmetric,.50,.25,2.00, +,,,x Dependent,.33,,1.15, +,,,y Dependent,1.00,,1.15, ,,Goodman and Kruskal tau,x Dependent,.33,,, ,,,y Dependent,1.00,,, ,,Uncertainty Coefficient,Symmetric,.58,.17,1.56, @@ -964,3 +964,104 @@ Interval by Interval,Pearson's R,.992,.004,22.638, N of Valid Cases,,10,,, ]) AT_CLEANUP + +AT_SETUP([CROSSTABS Goodman and Kruskal's lambda]) +AT_DATA([lambda.sps], [dnl +SET FORMAT F8.3. + +* From http://www.csupomona.edu/~jlkorey/POWERMUTT/Topics/contingency_tables.html. +DATA LIST LIST NOTABLE/x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 424 +1 2 213 +1 3 59 +3 1 55 +3 2 188 +3 3 357 +END DATA. + +CROSSTABS x BY y/CELLS=NONE/STATISTICS=LAMBDA. + +* From http://vassarstats.net. +DATA LIST LIST NOTABLE/x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 19 +1 2 26 +1 3 8 +2 1 21 +2 2 13 +2 3 5 +3 1 6 +3 2 12 +3 3 27 +END DATA. + +CROSSTABS x BY y/CELLS=NONE/STATISTICS=LAMBDA. + +* From Goodman, L.A., Kruskal, W.H. (1954) "Measures of association for + cross classifications". Part I. Journal of the American Statistical + Association, 49, 732-764. +DATA LIST LIST NOTABLE/x y w. +WEIGHT BY w. +BEGIN DATA. +1 1 1768 +1 2 807 +1 3 189 +1 4 47 +2 1 946 +2 2 1387 +2 3 746 +2 4 53 +3 1 115 +3 2 438 +3 3 288 +3 4 16 +END DATA. +CROSSTABS x BY y/CELLS=NONE/STATISTICS=LAMBDA. +]) +AT_CHECK([pspp -O format=csv lambda.sps], [0], [dnl +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,1296.000,100.0%,.000,0.0%,1296.000,100.0% + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Lambda,Symmetric,.423,.021,16.875, +,,x Dependent,.497,,15.986, +,,y Dependent,.370,,16.339, +,Goodman and Kruskal tau,x Dependent,.382,,, +,,y Dependent,.198,,, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,137.000,100.0%,.000,0.0%,137.000,100.0% + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Lambda,Symmetric,.259,.081,2.902, +,,x Dependent,.250,,2.479, +,,y Dependent,.267,,2.766, +,Goodman and Kruskal tau,x Dependent,.129,,, +,,y Dependent,.123,,, + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,6800.000,100.0%,.000,0.0%,6800.000,100.0% + +Table: Directional measures. +Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Lambda,Symmetric,.208,.010,18.793, +,,x Dependent,.224,,16.076, +,,y Dependent,.192,,14.438, +,Goodman and Kruskal tau,x Dependent,.089,,, +,,y Dependent,.081,,, +]) +AT_CLEANUP -- 2.30.2