From: Ben Pfaff Date: Sat, 29 Apr 2023 23:04:19 +0000 (-0700) Subject: CROSSTABS: Add significance of correlations in symmetric measures. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9f1795a4c231423729dfc1d599140a6be81776d5;p=pspp CROSSTABS: Add significance of correlations in symmetric measures. Requested by Domingo J Rubira López at https://lists.gnu.org/archive/html/pspp-users/2020-11/msg00000.html --- diff --git a/NEWS b/NEWS index 9abe5bc18b..2ba75571b3 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ Changes after 1.6.2: - Break variables are now optional. + * CROSSTABS now calculates significance of Pearson and Spearman + correlations in symmetric measures. + * DISPLAY MACROS is now implemented. * SHOW ENVIRONMENT is now implemented. diff --git a/doc/statistics.texi b/doc/statistics.texi index f8a06afd13..97a04a4ea0 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -836,7 +836,7 @@ following limitations: @itemize @bullet @item -Significance of some symmetric and directional measures is not calculated. +Significance of some directional measures is not calculated. @item Asymptotic standard error is not calculated for Goodman and Kruskal's tau or symmetric Somers' d. diff --git a/src/language/commands/crosstabs.c b/src/language/commands/crosstabs.c index df9b38defc..9839de2698 100644 --- a/src/language/commands/crosstabs.c +++ b/src/language/commands/crosstabs.c @@ -16,7 +16,7 @@ /* FIXME: - - How to calculate significance of some symmetric and directional measures? + - How to calculate significance of some directional measures? - How to calculate ASE for symmetric Somers ' d? - How to calculate ASE for Goodman and Kruskal's tau? - How to calculate approx. T of symmetric uncertainty coefficient? @@ -55,6 +55,7 @@ #include "libpspp/misc.h" #include "libpspp/pool.h" #include "libpspp/str.h" +#include "math/correlation.h" #include "output/pivot-table.h" #include "output/charts/barchart.h" @@ -1821,8 +1822,16 @@ display_crosstabulation (struct crosstabs_proc *proc, free (indexes); } +struct symmetric_statistic + { + double v; /* Value. */ + double ase; /* Appropriate standard error. */ + double t; /* Student's t value. */ + double sig; /* Significance. */ + }; + static void calc_r (struct crosstabulation *, - double *XT, double *Y, double *, double *, double *); + double *XT, double *Y, struct symmetric_statistic *); static void calc_chisq (struct crosstabulation *, double[N_CHISQ], int[N_CHISQ], double *, double *); @@ -1873,14 +1882,6 @@ display_chisq (struct crosstabulation *xt, struct pivot_table *chisq) free (indexes); } -struct symmetric_statistic - { - double v; /* Value. */ - double ase; /* Appropriate standard error. */ - double t; /* Student's t value. */ - double sig; /* Significance. */ - }; - struct somers_d { double v; @@ -2189,22 +2190,20 @@ calc_chisq (struct crosstabulation *xt, if (var_is_numeric (xt->vars[ROW_VAR].var) && var_is_numeric (xt->vars[COL_VAR].var)) { - double r, ase_0, ase_1; + struct symmetric_statistic r; calc_r (xt, (double *) xt->vars[ROW_VAR].values, - (double *) xt->vars[COL_VAR].values, - &r, &ase_0, &ase_1); + (double *) xt->vars[COL_VAR].values, &r); - chisq[4] = (xt->total - 1.) * r * r; + chisq[4] = (xt->total - 1.) * pow2 (r.v); df[4] = 1; } } -/* Calculate the value of Pearson's r. r is stored into R, its T value into - T, and standard error into ERROR. The row and column values must be - passed in XT and Y. */ +/* Calculate the value of Pearson's r and stores it into *R. The row and + column values must be passed in XT and Y. */ static void calc_r (struct crosstabulation *xt, - double *XT, double *Y, double *r, double *t, double *error) + double *XT, double *Y, struct symmetric_statistic *r) { size_t n_rows = xt->vars[ROW_VAR].n_values; size_t n_cols = xt->vars[COL_VAR].n_values; @@ -2241,8 +2240,9 @@ calc_r (struct crosstabulation *xt, double SX = sum_X2r - pow2 (sum_Xr) / xt->total; double SY = sum_Y2c - pow2 (sum_Yc) / xt->total; double T = sqrt (SX * SY); - *r = S / T; - *t = *r / sqrt (1 - pow2 (*r)) * sqrt (xt->total - 2); + r->v = S / T; + r->t = r->v / sqrt (1 - pow2 (r->v)) * sqrt (xt->total - 2); + r->sig = 2 * significance_of_correlation (r->v, xt->total); double s = 0; double c = 0; @@ -2259,7 +2259,7 @@ calc_r (struct crosstabulation *xt, c = (t - s) - y; s = t; } - *error = sqrt (s) / (T * T); + r->ase = sqrt (s) / (T * T); } /* Calculate symmetric statistics and their asymptotic standard @@ -2486,14 +2486,14 @@ calc_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt, s = t; } - calc_r (xt, R, C, &sym[6].v, &sym[6].t, &sym[6].ase); + calc_r (xt, R, C, &sym[6]); free (R); free (C); calc_r (xt, (double *) xt->vars[ROW_VAR].values, (double *) xt->vars[COL_VAR].values, - &sym[7].v, &sym[7].t, &sym[7].ase); + &sym[7]); } /* Cohen's kappa. */ diff --git a/tests/language/commands/crosstabs.at b/tests/language/commands/crosstabs.at index 392c48c738..f6cc80b9e8 100644 --- a/tests/language/commands/crosstabs.at +++ b/tests/language/commands/crosstabs.at @@ -469,25 +469,25 @@ z,1,Pearson Chi-Square,5.00,4,.287 ,,N of Valid Cases,4,, Table: Symmetric Measures -,,,,Value,Asymp. Std. Error,Approx. T -z,1,Nominal by Nominal,Phi,1.00,, -,,,Cramer's V,1.00,, -,,,Contingency Coefficient,.71,, -,,Ordinal by Ordinal,Kendall's tau-b,.00,.32,.00 -,,,Kendall's tau-c,.00,.32,.00 -,,,Gamma,.00,.50,.00 -,,,Spearman Correlation,.00,.22,.00 -,,Interval by Interval,Pearson's R,.04,.22,.07 -,,N of Valid Cases,,5,, -,2,Nominal by Nominal,Phi,1.00,, -,,,Cramer's V,1.00,, -,,,Contingency Coefficient,.71,, -,,Ordinal by Ordinal,Kendall's tau-b,-.71,.20,-1.73 -,,,Kendall's tau-c,-.75,.43,-1.73 -,,,Gamma,-1.00,.00,-1.73 -,,,Spearman Correlation,-.77,.17,-1.73 -,,Interval by Interval,Pearson's R,-.73,.18,-1.49 -,,N of Valid Cases,,4,, +,,,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +z,1,Nominal by Nominal,Phi,1.00,,, +,,,Cramer's V,1.00,,, +,,,Contingency Coefficient,.71,,, +,,Ordinal by Ordinal,Kendall's tau-b,.00,.32,.00, +,,,Kendall's tau-c,.00,.32,.00, +,,,Gamma,.00,.50,.00, +,,,Spearman Correlation,.00,.22,.00,1.000 +,,Interval by Interval,Pearson's R,.04,.22,.07,.950 +,,N of Valid Cases,,5,,, +,2,Nominal by Nominal,Phi,1.00,,, +,,,Cramer's V,1.00,,, +,,,Contingency Coefficient,.71,,, +,,Ordinal by Ordinal,Kendall's tau-b,-.71,.20,-1.73, +,,,Kendall's tau-c,-.75,.43,-1.73, +,,,Gamma,-1.00,.00,-1.73, +,,,Spearman Correlation,-.77,.17,-1.73,.225 +,,Interval by Interval,Pearson's R,-.73,.18,-1.49,.274 +,,N of Valid Cases,,4,,, Table: Directional Measures ,,,,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. @@ -989,10 +989,10 @@ x,1.000,Count,1,0,0,0,0,1 Total,,Count,1,1,1,1,1,5 Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Ordinal by Ordinal,Spearman Correlation,.975,.022,7.550 -Interval by Interval,Pearson's R,.968,.017,6.708 -N of Valid Cases,,5,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Spearman Correlation,.975,.022,7.550,.005 +Interval by Interval,Pearson's R,.968,.017,6.708,.007 +N of Valid Cases,,5,,, ]) AT_CLEANUP @@ -1040,10 +1040,10 @@ x,1.000,Count,1,0,0,0,0,0,0,0,0,1 Total,,Count,2,1,1,1,1,1,1,1,1,10 Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Ordinal by Ordinal,Spearman Correlation,.973,.015,11.844 -Interval by Interval,Pearson's R,.971,.017,11.580 -N of Valid Cases,,10,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Spearman Correlation,.973,.015,11.844,.000 +Interval by Interval,Pearson's R,.971,.017,11.580,.000 +N of Valid Cases,,10,,, ]) AT_CLEANUP @@ -1087,10 +1087,10 @@ x,25.000,Count,1,0,0,0,0,0,0,0,1 Total,,Count,1,1,1,1,1,1,2,1,9 Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Ordinal by Ordinal,Spearman Correlation,.911,.068,5.860 -Interval by Interval,Pearson's R,.966,.017,9.915 -N of Valid Cases,,9,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Spearman Correlation,.911,.068,5.860,.001 +Interval by Interval,Pearson's R,.966,.017,9.915,.000 +N of Valid Cases,,9,,, ]) AT_CLEANUP @@ -1139,10 +1139,10 @@ x,2.000,Count,0,0,0,1,0,0,0,0,0,0,0,0,1 Total,,Count,1,1,1,1,1,1,1,1,1,1,1,1,12 Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Ordinal by Ordinal,Spearman Correlation,.657,.140,2.758 -Interval by Interval,Pearson's R,.667,.132,2.830 -N of Valid Cases,,12,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Spearman Correlation,.657,.140,2.758,.020 +Interval by Interval,Pearson's R,.667,.132,2.830,.018 +N of Valid Cases,,12,,, ]) AT_CLEANUP @@ -1190,10 +1190,10 @@ x,18.000,Count,1,0,0,0,0,0,0,0,0,0,1 Total,,Count,1,1,1,1,1,1,1,1,1,1,10 Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Ordinal by Ordinal,Spearman Correlation,1.000,.000,+Infinit -Interval by Interval,Pearson's R,.992,.004,22.638 -N of Valid Cases,,10,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Ordinal by Ordinal,Spearman Correlation,1.000,.000,+Infinit,.000 +Interval by Interval,Pearson's R,.992,.004,22.638,.000 +N of Valid Cases,,10,,, ]) AT_CLEANUP @@ -1782,16 +1782,16 @@ Linear-by-Linear Association,5.8450,1.0000,.016 N of Valid Cases,66.0000,, Table: Symmetric Measures -,,Value,Asymp. Std. Error,Approx. T -Nominal by Nominal,Phi,.3246,, -,Cramer's V,.3246,, -,Contingency Coefficient,.3088,, -Ordinal by Ordinal,Kendall's tau-b,.2752,.0856,1.9920 -,Kendall's tau-c,.1497,.0751,1.9920 -,Gamma,.8717,.1250,1.9920 -,Spearman Correlation,.2908,.0906,2.4311 -Interval by Interval,Pearson's R,.2999,.0973,2.5147 -N of Valid Cases,,66.0000,, +,,Value,Asymp. Std. Error,Approx. T,Approx. Sig. +Nominal by Nominal,Phi,.3246,,, +,Cramer's V,.3246,,, +,Contingency Coefficient,.3088,,, +Ordinal by Ordinal,Kendall's tau-b,.2752,.0856,1.9920, +,Kendall's tau-c,.1497,.0751,1.9920, +,Gamma,.8717,.1250,1.9920, +,Spearman Correlation,.2908,.0906,2.4311,.018 +Interval by Interval,Pearson's R,.2999,.0973,2.5147,.014 +N of Valid Cases,,66.0000,,, Table: Directional Measures ,,,Value,Asymp. Std. Error,Approx. T,Approx. Sig.