From 445beb7fedbdee5ddaaa481e9ce77f2e2a5a2d70 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sun, 18 Jun 2017 21:34:47 +0200 Subject: [PATCH] FACTOR: Add Anti-image matrix output --- NEWS | 2 + doc/statistics.texi | 4 +- src/language/stats/factor.c | 198 +++++++++++++++++++++++++++------ tests/language/stats/factor.at | 61 +++++++++- 4 files changed, 225 insertions(+), 40 deletions(-) diff --git a/NEWS b/NEWS index 492dc678ac..527694634f 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,8 @@ Changes from 0.10.2 to 0.10.5pre1: * The FACTOR command can now analyse matrix files prepared with MATRIX DATA. + * The FACTOR command can now print the anti-image matrices. + * The MATRIX DATA command has been added. * Some inappropriate properties in selection dialogs have been corrected. diff --git a/doc/statistics.texi b/doc/statistics.texi index b5026e7b2c..ba9fe0431a 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -798,7 +798,7 @@ FACTOR @{ [ /ROTATION=@{VARIMAX, EQUAMAX, QUARTIMAX, PROMAX[(@var{k})], NOROTATE@}] - [ /PRINT=[INITIAL] [EXTRACTION] [ROTATION] [UNIVARIATE] [CORRELATION] [COVARIANCE] [DET] [KMO] [SIG] [ALL] [DEFAULT] ] + [ /PRINT=[INITIAL] [EXTRACTION] [ROTATION] [UNIVARIATE] [CORRELATION] [COVARIANCE] [DET] [KMO] [AIC] [SIG] [ALL] [DEFAULT] ] [ /PLOT=[EIGEN] ] @@ -862,6 +862,8 @@ The @subcmd{/PRINT} subcommand may be used to select which features of the analy The covariance matrix is printed. @item @subcmd{DET} The determinant of the correlation or covariance matrix is printed. +@item @subcmd{AIC} + The anti-image covariance and anti-image correlation matrices are printed. @item @subcmd{KMO} The Kaiser-Meyer-Olkin measure of sampling adequacy and the Bartlett test of sphericity is printed. @item @subcmd{SIG} diff --git a/src/language/stats/factor.c b/src/language/stats/factor.c index fc3eb5b4d2..a1cd333389 100644 --- a/src/language/stats/factor.c +++ b/src/language/stats/factor.c @@ -243,6 +243,8 @@ struct idata double detR; /* The determinant of the correlation matrix */ + gsl_matrix *ai_cov; /* The anti-image covariance matrix */ + gsl_matrix *ai_cor; /* The anti-image correlation matrix */ struct covariance *cvm; }; @@ -266,35 +268,31 @@ idata_free (struct idata *id) gsl_vector_free (id->msr); gsl_vector_free (id->eval); gsl_matrix_free (id->evec); + gsl_matrix_free (id->ai_cov); + gsl_matrix_free (id->ai_cor); free (id); } - -static gsl_matrix * -anti_image (const gsl_matrix *m) +/* Return the sum of squares of all the elements in row J excluding column J */ +static double +ssq_row_od_n (const gsl_matrix *m, int j) { - int i, j; - gsl_matrix *a; + int i; + double ss = 0; assert (m->size1 == m->size2); - a = gsl_matrix_alloc (m->size1, m->size2); + assert (j < m->size1); for (i = 0; i < m->size1; ++i) { - for (j = 0; j < m->size2; ++j) - { - double *p = gsl_matrix_ptr (a, i, j); - *p = gsl_matrix_get (m, i, j); - *p /= gsl_matrix_get (m, i, i); - *p /= gsl_matrix_get (m, j, j); - } + if (i == j ) continue; + ss += pow2 (gsl_matrix_get (m, i, j)); } - return a; + return ss; } - /* Return the sum of all the elements excluding row N */ static double ssq_od_n (const gsl_matrix *m, int n) @@ -318,6 +316,58 @@ ssq_od_n (const gsl_matrix *m, int n) } +static gsl_matrix * +anti_image_corr (const gsl_matrix *m, const struct idata *idata) +{ + int i, j; + gsl_matrix *a; + assert (m->size1 == m->size2); + + a = gsl_matrix_alloc (m->size1, m->size2); + + for (i = 0; i < m->size1; ++i) + { + for (j = 0; j < m->size2; ++j) + { + double *p = gsl_matrix_ptr (a, i, j); + *p = gsl_matrix_get (m, i, j); + *p /= sqrt (gsl_matrix_get (m, i, i) * + gsl_matrix_get (m, j, j)); + } + } + + for (i = 0; i < m->size1; ++i) + { + double r = ssq_row_od_n (idata->mm.corr, i); + double u = ssq_row_od_n (a, i); + gsl_matrix_set (a, i, i, r / (r + u)); + } + + return a; +} + +static gsl_matrix * +anti_image_cov (const gsl_matrix *m) +{ + int i, j; + gsl_matrix *a; + assert (m->size1 == m->size2); + + a = gsl_matrix_alloc (m->size1, m->size2); + + for (i = 0; i < m->size1; ++i) + { + for (j = 0; j < m->size2; ++j) + { + double *p = gsl_matrix_ptr (a, i, j); + *p = gsl_matrix_get (m, i, j); + *p /= gsl_matrix_get (m, i, i); + *p /= gsl_matrix_get (m, j, j); + } + } + + return a; +} #if 0 static void @@ -1365,10 +1415,11 @@ cmd_factor (struct lexer *lexer, struct dataset *ds) else if (lex_match_id (lexer, "INV")) { } +#endif else if (lex_match_id (lexer, "AIC")) { + factor.print |= PRINT_AIC; } -#endif else if (lex_match_id (lexer, "SIG")) { factor.print |= PRINT_SIG; @@ -1981,6 +2032,81 @@ show_factor_correlation (const struct cmd_factor * factor, const gsl_matrix *fcm tab_submit (t); } +static void +show_aic (const struct cmd_factor *factor, const struct idata *idata) +{ + struct tab_table *t ; + size_t i; + + const int heading_rows = 1; + const int heading_columns = 2; + + const int nc = heading_columns + factor->n_vars; + const int nr = heading_rows + 2 * factor->n_vars; + + if ((factor->print & PRINT_AIC) == 0) + return; + + t = tab_create (nc, nr); + + tab_title (t, _("Anti-Image Matrices")); + + tab_hline (t, TAL_1, 0, nc - 1, heading_rows); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + tab_vline (t, TAL_2, 2, 0, nr - 1); + + /* Outline the box */ + tab_box (t, + TAL_2, TAL_2, + -1, -1, + 0, 0, + nc - 1, nr - 1); + + /* Vertical lines */ + tab_box (t, + -1, -1, + -1, TAL_1, + heading_columns, 0, + nc - 1, nr - 1); + + + for (i = 0; i < factor->n_vars; ++i) + tab_text (t, heading_columns + i, 0, TAT_TITLE, var_to_string (factor->vars[i])); + + tab_text (t, 0, heading_rows, TAT_TITLE, _("Anti-image Covariance")); + tab_hline (t, TAL_1, 0, nc - 1, heading_rows + factor->n_vars); + tab_text (t, 0, heading_rows + factor->n_vars, TAT_TITLE, _("Anti-image Correlation")); + + for (i = 0; i < factor->n_vars; ++i) + { + tab_text (t, 1, i + heading_rows, TAT_TITLE, + var_to_string (factor->vars[i])); + + tab_text (t, 1, factor->n_vars + i + heading_rows, TAT_TITLE, + var_to_string (factor->vars[i])); + } + + for (i = 0; i < factor->n_vars; ++i) + { + int j; + for (j = 0; j < factor->n_vars; ++j) + { + tab_double (t, heading_columns + i, heading_rows + j, 0, + gsl_matrix_get (idata->ai_cov, i, j), NULL, RC_OTHER); + } + + + for (j = 0; j < factor->n_vars; ++j) + { + tab_double (t, heading_columns + i, factor->n_vars + heading_rows + j, 0, + gsl_matrix_get (idata->ai_cor, i, j), NULL, RC_OTHER); + } + } + + tab_submit (t); +} static void show_correlation_matrix (const struct cmd_factor *factor, const struct idata *idata) @@ -2245,6 +2371,25 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) else idata->analysis_matrix = idata->mm.cov; + gsl_matrix *r_inv; + r_inv = clone_matrix (idata->mm.corr); + gsl_linalg_cholesky_decomp (r_inv); + gsl_linalg_cholesky_invert (r_inv); + + idata->ai_cov = anti_image_cov (r_inv); + idata->ai_cor = anti_image_corr (r_inv, idata); + + int i; + double sum_ssq_r = 0; + double sum_ssq_a = 0; + for (i = 0; i < r_inv->size1; ++i) + { + sum_ssq_r += ssq_od_n (r_inv, i); + sum_ssq_a += ssq_od_n (idata->ai_cov, i); + } + + gsl_matrix_free (r_inv); + if (factor->print & PRINT_DETERMINANT || factor->print & PRINT_KMO) { @@ -2315,9 +2460,6 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) if (factor->print & PRINT_KMO) { int i; - double sum_ssq_r = 0; - double sum_ssq_a = 0; - double df = factor->n_vars * (factor->n_vars - 1) / 2; double w = 0; @@ -2331,25 +2473,11 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) const int nr = heading_rows + 4; const int nc = heading_columns + 1; - gsl_matrix *a, *x; + struct tab_table *t = tab_create (nc, nr); tab_title (t, _("KMO and Bartlett's Test")); - x = clone_matrix (idata->mm.corr); - gsl_linalg_cholesky_decomp (x); - gsl_linalg_cholesky_invert (x); - - a = anti_image (x); - - for (i = 0; i < x->size1; ++i) - { - sum_ssq_r += ssq_od_n (x, i); - sum_ssq_a += ssq_od_n (a, i); - } - - gsl_matrix_free (a); - gsl_matrix_free (x); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -2488,9 +2616,9 @@ do_factor_by_matrix (const struct cmd_factor *factor, struct idata *idata) } + show_aic (factor, idata); show_communalities (factor, initial_communalities, extracted_communalities); - if ( factor->rotation != ROT_NONE) { rotated_factors = gsl_matrix_calloc (factor_matrix->size1, factor_matrix->size2); diff --git a/tests/language/stats/factor.at b/tests/language/stats/factor.at index 0b45656326..38dcd50f59 100644 --- a/tests/language/stats/factor.at +++ b/tests/language/stats/factor.at @@ -2254,10 +2254,6 @@ bnt_actws_56,.562 AT_CLEANUP - - - - AT_SETUP([FACTOR bad input]) dnl Test for a crash @@ -2289,3 +2285,60 @@ FACTOR MATRIX IN (CORR =!*) AT_CHECK([pspp -O format=csv bad-input.sps], [1], [ignore]) AT_CLEANUP + + +AT_SETUP([FACTOR anti-image matrix]) + +AT_DATA([anti-image-matrix.sps], [dnl +SET FORMAT=F20.3 . +matrix data + variables = rowtype_ viq piq pa ran piatwr piatc + / n = 476 + / format = lower diagonal . +begin data +mean 96.88 100.51 -1.73 -0.94 -2.52 -1.85 +sd 10.97 11.19 1.19 0.88 0.85 0.97 +corr 1.00 +corr 0.38 1.00 +corr 0.26 0.24 1.00 +corr 0.16 0.17 0.34 1.00 +corr 0.25 0.07 0.46 0.38 1.00 +corr 0.37 0.22 0.39 0.30 0.59 1.00 +end data. + +factor matrix = in (cor = *) + / analysis = viq piq pa ran piatwr piatc + / format = sort + / extraction = pc + / rotation = norotate + / print = aic +]) + +AT_CHECK([pspp -O format=csv anti-image-matrix.sps], [0], [dnl +Table: Anti-Image Matrices +,,viq,piq,pa,ran,piatwr,piatc +Anti-image Covariance,viq,.762,-.248,-.048,.008,-.031,-.143 +,piq,-.248,.807,-.117,-.081,.108,-.071 +,pa,-.048,-.117,.711,-.125,-.173,-.060 +,ran,.008,-.081,-.125,.808,-.143,-.035 +,piatwr,-.031,.108,-.173,-.143,.551,-.265 +,piatc,-.143,-.071,-.060,-.035,-.265,.581 +Anti-image Correlation,viq,.741,-.316,-.066,.011,-.048,-.215 +,piq,-.316,.624,-.154,-.100,.163,-.103 +,pa,-.066,-.154,.811,-.165,-.277,-.093 +,ran,.011,-.100,-.165,.825,-.214,-.051 +,piatwr,-.048,.163,-.277,-.214,.675,-.469 +,piatc,-.215,-.103,-.093,-.051,-.469,.729 + +Table: Component Matrix +,Component,,,, +,1,2,3,4,5 +piatc,.774,.122,-.368,.365,-.322 +piatwr,.754,.418,.442,.219,-.115 +pa,.707,.124,-.117,-.161,.256 +piq,.456,-.733,.122,-.289,-.377 +viq,.589,-.539,.033,.298,.457 +ran,.592,.262,-.069,-.638,.096 +]) + +AT_CLEANUP -- 2.30.2