--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+
+#include <gsl/gsl_vector.h>
+#include <gsl/gsl_linalg.h>
+#include <gsl/gsl_matrix.h>
+#include <gsl/gsl_eigen.h>
+#include <gsl/gsl_blas.h>
+#include <gsl/gsl_sort_vector.h>
+
+#include <math/covariance.h>
+
+#include <math/correlation.h>
+#include <math/moments.h>
+#include <data/procedure.h>
+#include <language/lexer/variable-parser.h>
+#include <language/lexer/value-parser.h>
+#include <language/command.h>
+#include <language/lexer/lexer.h>
+
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/format.h>
+#include <data/subcase.h>
+
+#include <libpspp/misc.h>
+#include <libpspp/message.h>
+
+#include <output/table.h>
+
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+enum method
+ {
+ METHOD_CORR,
+ METHOD_COV
+ };
+
+enum missing_type
+ {
+ MISS_LISTWISE,
+ MISS_PAIRWISE,
+ MISS_MEANSUB,
+ };
+
+enum extraction_method
+ {
+ EXTRACTION_PC,
+ EXTRACTION_PAF,
+ };
+
+enum print_opts
+ {
+ PRINT_UNIVARIATE = 0x0001,
+ PRINT_DETERMINANT = 0x0002,
+ PRINT_INV = 0x0004,
+ PRINT_AIC = 0x0008,
+ PRINT_SIG = 0x0010,
+ PRINT_COVARIANCE = 0x0020,
+ PRINT_CORRELATION = 0x0040,
+ PRINT_ROTATION = 0x0080,
+ PRINT_EXTRACTION = 0x0100,
+ PRINT_INITIAL = 0x0200,
+ PRINT_KMO = 0x0400,
+ PRINT_REPR = 0x0800,
+ PRINT_FSCORE = 0x1000
+ };
+
+
+struct cmd_factor
+{
+ size_t n_vars;
+ const struct variable **vars;
+
+ const struct variable *wv;
+
+ enum method method;
+ enum missing_type missing_type;
+ enum mv_class exclude;
+ enum print_opts print;
+ enum extraction_method extraction;
+
+ /* Extraction Criteria */
+ int n_factors;
+ double min_eigen;
+ double econverge;
+ int iterations;
+
+ /* Format */
+ double blank;
+ bool sort;
+};
+
+struct idata
+{
+ /* Intermediate values used in calculation */
+
+ gsl_vector *eval ; /* The eigenvalues */
+ gsl_matrix *evec ; /* The eigenvectors */
+
+ int n_extractions;
+
+ gsl_vector *msr ; /* Multiple Squared Regressions */
+};
+
+static struct idata *
+idata_alloc (size_t n_vars)
+{
+ struct idata *id = xmalloc (sizeof (*id));
+
+ id->n_extractions = 0;
+ id->msr = gsl_vector_alloc (n_vars);
+
+ id->eval = gsl_vector_alloc (n_vars);
+ id->evec = gsl_matrix_alloc (n_vars, n_vars);
+
+ return id;
+}
+
+static void
+idata_free (struct idata *id)
+{
+ gsl_vector_free (id->msr);
+ gsl_vector_free (id->eval);
+ gsl_matrix_free (id->evec);
+
+ free (id);
+}
+
+
+static void
+dump_matrix (const gsl_matrix *m)
+{
+ size_t i, j;
+
+ for (i = 0 ; i < m->size1; ++i)
+ {
+ for (j = 0 ; j < m->size2; ++j)
+ printf ("%02f ", gsl_matrix_get (m, i, j));
+ printf ("\n");
+ }
+}
+
+
+static void
+dump_matrix_permute (const gsl_matrix *m, const gsl_permutation *p)
+{
+ size_t i, j;
+
+ for (i = 0 ; i < m->size1; ++i)
+ {
+ for (j = 0 ; j < m->size2; ++j)
+ printf ("%02f ", gsl_matrix_get (m, gsl_permutation_get (p, i), j));
+ printf ("\n");
+ }
+}
+
+
+static void
+dump_vector (const gsl_vector *v)
+{
+ size_t i;
+ for (i = 0 ; i < v->size; ++i)
+ {
+ printf ("%02f\n", gsl_vector_get (v, i));
+ }
+ printf ("\n");
+}
+
+
+static int
+n_extracted_factors (const struct cmd_factor *factor, struct idata *idata)
+{
+ int i;
+
+ /* If there is a cached value, then return that. */
+ if ( idata->n_extractions != 0)
+ return idata->n_extractions;
+
+ /* Otherwise, if the number of factors has been explicitly requested,
+ use that. */
+ if (factor->n_factors > 0)
+ {
+ idata->n_extractions = factor->n_factors;
+ goto finish;
+ }
+
+ /* Use the MIN_EIGEN setting. */
+ for (i = 0 ; i < idata->eval->size; ++i)
+ {
+ double evali = fabs (gsl_vector_get (idata->eval, i));
+
+ idata->n_extractions = i;
+
+ if (evali < factor->min_eigen)
+ goto finish;
+ }
+
+ finish:
+ return idata->n_extractions;
+}
+
+
+/* Returns a newly allocated matrix identical to M.
+ It it the callers responsibility to free the returned value.
+*/
+static gsl_matrix *
+matrix_dup (const gsl_matrix *m)
+{
+ gsl_matrix *n = gsl_matrix_alloc (m->size1, m->size2);
+
+ gsl_matrix_memcpy (n, m);
+
+ return n;
+}
+
+
+struct smr_workspace
+{
+ /* Copy of the subject */
+ gsl_matrix *m;
+
+ gsl_matrix *inverse;
+
+ gsl_permutation *perm;
+
+ gsl_matrix *result1;
+ gsl_matrix *result2;
+};
+
+
+static struct smr_workspace *ws_create (const gsl_matrix *input)
+{
+ struct smr_workspace *ws = xmalloc (sizeof (*ws));
+
+ ws->m = gsl_matrix_alloc (input->size1, input->size2);
+ ws->inverse = gsl_matrix_calloc (input->size1 - 1, input->size2 - 1);
+ ws->perm = gsl_permutation_alloc (input->size1 - 1);
+ ws->result1 = gsl_matrix_calloc (input->size1 - 1, 1);
+ ws->result2 = gsl_matrix_calloc (1, 1);
+
+ return ws;
+}
+
+static void
+ws_destroy (struct smr_workspace *ws)
+{
+ gsl_matrix_free (ws->result2);
+ gsl_matrix_free (ws->result1);
+ gsl_permutation_free (ws->perm);
+ gsl_matrix_free (ws->inverse);
+ gsl_matrix_free (ws->m);
+
+ free (ws);
+}
+
+
+/*
+ Return the square of the regression coefficient for VAR regressed against all other variables.
+ */
+static double
+squared_multiple_correlation (const gsl_matrix *analysis_matrix, int var, struct smr_workspace *ws)
+{
+ /* For an explanation of what this is doing, see
+ http://www.visualstatistics.net/Visual%20Statistics%20Multimedia/multiple_regression_analysis.htm
+ */
+
+ int signum = 0;
+ gsl_matrix_view rxx;
+
+ gsl_matrix_memcpy (ws->m, analysis_matrix);
+
+ gsl_matrix_swap_rows (ws->m, 0, var);
+ gsl_matrix_swap_columns (ws->m, 0, var);
+
+ rxx = gsl_matrix_submatrix (ws->m, 1, 1, ws->m->size1 - 1, ws->m->size1 - 1);
+
+ gsl_linalg_LU_decomp (&rxx.matrix, ws->perm, &signum);
+
+ gsl_linalg_LU_invert (&rxx.matrix, ws->perm, ws->inverse);
+
+ {
+ gsl_matrix_const_view rxy = gsl_matrix_const_submatrix (ws->m, 1, 0, ws->m->size1 - 1, 1);
+ gsl_matrix_const_view ryx = gsl_matrix_const_submatrix (ws->m, 0, 1, 1, ws->m->size1 - 1);
+
+ gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,
+ 1.0, ws->inverse, &rxy.matrix, 0.0, ws->result1);
+
+ gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,
+ 1.0, &ryx.matrix, ws->result1, 0.0, ws->result2);
+ }
+
+ return gsl_matrix_get (ws->result2, 0, 0);
+}
+
+
+
+static double the_communality (const gsl_matrix *evec, const gsl_vector *eval, int n, int n_factors);
+
+
+struct factor_matrix_workspace
+{
+ size_t n_factors;
+ gsl_eigen_symmv_workspace *eigen_ws;
+
+ gsl_vector *eval ;
+ gsl_matrix *evec ;
+
+ gsl_matrix *gamma ;
+
+ gsl_matrix *r;
+};
+
+static struct factor_matrix_workspace *
+factor_matrix_workspace_alloc (size_t n, size_t nf)
+{
+ struct factor_matrix_workspace *ws = xmalloc (sizeof (*ws));
+
+ ws->n_factors = nf;
+ ws->gamma = gsl_matrix_calloc (nf, nf);
+ ws->eigen_ws = gsl_eigen_symmv_alloc (n);
+ ws->eval = gsl_vector_alloc (n);
+ ws->evec = gsl_matrix_alloc (n, n);
+ ws->r = gsl_matrix_alloc (n, n);
+
+ return ws;
+}
+
+static void
+factor_matrix_workspace_free (struct factor_matrix_workspace *ws)
+{
+ gsl_eigen_symmv_free (ws->eigen_ws);
+ gsl_vector_free (ws->eval);
+ gsl_matrix_free (ws->evec);
+ gsl_matrix_free (ws->gamma);
+ gsl_matrix_free (ws->r);
+ free (ws);
+}
+
+/*
+ Shift P left by OFFSET places, and overwrite TARGET
+ with the shifted result.
+ Positions in TARGET less than OFFSET are unchanged.
+*/
+static void
+perm_shift_apply (gsl_permutation *target, const gsl_permutation *p,
+ size_t offset)
+{
+ size_t i;
+ assert (target->size == p->size);
+ assert (offset <= target->size);
+
+ for (i = 0; i < target->size - offset; ++i)
+ {
+ target->data[i] = p->data [i + offset];
+ }
+}
+
+
+/*
+ Indirectly sort the rows of matrix INPUT, storing the sort order in PERM.
+ The sort criteria are as follows:
+
+ Rows are sorted on the first column, until the absolute value of an
+ element in a subsequent column is greater than that of the first
+ column. Thereafter, rows will be sorted on the second column,
+ until the absolute value of an element in a subsequent column
+ exceeds that of the second column ...
+*/
+static void
+sort_matrix_indirect (const gsl_matrix *input, gsl_permutation *perm)
+{
+ const size_t n = perm->size;
+ const size_t m = input->size2;
+ int i, j;
+ gsl_matrix *mat ;
+ int column_n = 0;
+ int row_n = 0;
+ gsl_permutation *p;
+
+ assert (perm->size == input->size1);
+
+ p = gsl_permutation_alloc (n);
+
+ /* Copy INPUT into MAT, discarding the sign */
+ mat = gsl_matrix_alloc (n, m);
+ for (i = 0 ; i < mat->size1; ++i)
+ {
+ for (j = 0 ; j < mat->size2; ++j)
+ {
+ double x = gsl_matrix_get (input, i, j);
+ gsl_matrix_set (mat, i, j, fabs (x));
+ }
+ }
+
+ while (column_n < m && row_n < n)
+ {
+ gsl_vector_const_view columni = gsl_matrix_const_column (mat, column_n);
+ gsl_sort_vector_index (p, &columni.vector);
+
+ for (i = 0 ; i < n; ++i)
+ {
+ gsl_vector_view row = gsl_matrix_row (mat, p->data[n - 1 - i]);
+ size_t maxindex = gsl_vector_max_index (&row.vector);
+
+ if ( maxindex > column_n )
+ break;
+
+ /* All subsequent elements of this row, are of no interest.
+ So set them all to a highly negative value */
+ for (j = column_n + 1; j < row.vector.size ; ++j)
+ gsl_vector_set (&row.vector, j, -DBL_MAX);
+ }
+
+ perm_shift_apply (perm, p, row_n);
+ row_n += i;
+
+ column_n++;
+ }
+
+ gsl_permutation_free (p);
+ gsl_matrix_free (mat);
+
+ assert ( 0 == gsl_permutation_valid (perm));
+
+ /* We want the biggest value to be first */
+ gsl_permutation_reverse (perm);
+}
+
+
+/*
+ Get an approximation for the factor matrix into FACTORS, and the communalities into COMMUNALITIES.
+ R is the matrix to be analysed.
+ WS is a pointer to a structure which must have been initialised with factor_matrix_workspace_init.
+ */
+static void
+iterate_factor_matrix (const gsl_matrix *r, gsl_vector *communalities, gsl_matrix *factors, struct factor_matrix_workspace *ws)
+{
+ size_t i;
+ gsl_matrix_view mv ;
+
+ assert (r->size1 == r->size2);
+ assert (r->size1 == communalities->size);
+
+ assert (factors->size1 == r->size1);
+ assert (factors->size2 == ws->n_factors);
+
+ gsl_matrix_memcpy (ws->r, r);
+
+ /* Apply Communalities to diagonal of correlation matrix */
+ for (i = 0 ; i < communalities->size ; ++i)
+ {
+ double *x = gsl_matrix_ptr (ws->r, i, i);
+ *x = gsl_vector_get (communalities, i);
+ }
+
+ gsl_eigen_symmv (ws->r, ws->eval, ws->evec, ws->eigen_ws);
+
+ mv = gsl_matrix_submatrix (ws->evec, 0, 0, ws->evec->size1, ws->n_factors);
+
+ /* Gamma is the diagonal matrix containing the absolute values of the eigenvalues */
+ for (i = 0 ; i < ws->n_factors ; ++i)
+ {
+ double *ptr = gsl_matrix_ptr (ws->gamma, i, i);
+ *ptr = fabs (gsl_vector_get (ws->eval, i));
+ }
+
+ /* Take the square root of gamma */
+ gsl_linalg_cholesky_decomp (ws->gamma);
+
+ gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,
+ 1.0, &mv.matrix, ws->gamma, 0.0, factors);
+
+ for (i = 0 ; i < r->size1 ; ++i)
+ {
+ double h = the_communality (ws->evec, ws->eval, i, ws->n_factors);
+ gsl_vector_set (communalities, i, h);
+ }
+}
+
+
+
+static bool run_factor (struct dataset *ds, const struct cmd_factor *factor);
+
+
+int
+cmd_factor (struct lexer *lexer, struct dataset *ds)
+{
+ bool extraction_seen = false;
+ const struct dictionary *dict = dataset_dict (ds);
+
+ struct cmd_factor factor;
+ factor.method = METHOD_CORR;
+ factor.missing_type = MISS_LISTWISE;
+ factor.exclude = MV_ANY;
+ factor.print = PRINT_INITIAL | PRINT_EXTRACTION | PRINT_ROTATION;
+ factor.extraction = EXTRACTION_PC;
+ factor.n_factors = 0;
+ factor.min_eigen = SYSMIS;
+ factor.iterations = 25;
+ factor.econverge = 0.001;
+ factor.blank = 0;
+ factor.sort = false;
+
+ factor.wv = dict_get_weight (dict);
+
+ lex_match (lexer, '/');
+
+ if (!lex_force_match_id (lexer, "VARIABLES"))
+ {
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+
+ if (!parse_variables_const (lexer, dict, &factor.vars, &factor.n_vars,
+ PV_NO_DUPLICATE | PV_NUMERIC))
+ goto error;
+
+ while (lex_token (lexer) != '.')
+ {
+ lex_match (lexer, '/');
+
+#if FACTOR_FULLY_IMPLEMENTED
+ if (lex_match_id (lexer, "PLOT"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "EIGEN"))
+ {
+ }
+ else if (lex_match_id (lexer, "ROTATION"))
+ {
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else
+#endif
+ if (lex_match_id (lexer, "METHOD"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "COVARIANCE"))
+ {
+ factor.method = METHOD_COV;
+ }
+ else if (lex_match_id (lexer, "CORRELATION"))
+ {
+ factor.method = METHOD_CORR;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+#if FACTOR_FULLY_IMPLEMENTED
+ else if (lex_match_id (lexer, "ROTATION"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "VARIMAX"))
+ {
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+#endif
+ else if (lex_match_id (lexer, "CRITERIA"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "FACTORS"))
+ {
+ if ( lex_force_match (lexer, '('))
+ {
+ lex_force_int (lexer);
+ factor.n_factors = lex_integer (lexer);
+ lex_get (lexer);
+ lex_force_match (lexer, ')');
+ }
+ }
+ else if (lex_match_id (lexer, "MINEIGEN"))
+ {
+ if ( lex_force_match (lexer, '('))
+ {
+ lex_force_num (lexer);
+ factor.min_eigen = lex_number (lexer);
+ lex_get (lexer);
+ lex_force_match (lexer, ')');
+ }
+ }
+ else if (lex_match_id (lexer, "ECONVERGE"))
+ {
+ if ( lex_force_match (lexer, '('))
+ {
+ lex_force_num (lexer);
+ factor.econverge = lex_number (lexer);
+ lex_get (lexer);
+ lex_force_match (lexer, ')');
+ }
+ }
+ else if (lex_match_id (lexer, "ITERATE"))
+ {
+ if ( lex_force_match (lexer, '('))
+ {
+ lex_force_int (lexer);
+ factor.iterations = lex_integer (lexer);
+ lex_get (lexer);
+ lex_force_match (lexer, ')');
+ }
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ factor.n_factors = 0;
+ factor.min_eigen = 1;
+ factor.iterations = 25;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else if (lex_match_id (lexer, "EXTRACTION"))
+ {
+ extraction_seen = true;
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "PAF"))
+ {
+ factor.extraction = EXTRACTION_PAF;
+ }
+ else if (lex_match_id (lexer, "PC"))
+ {
+ factor.extraction = EXTRACTION_PC;
+ }
+ else if (lex_match_id (lexer, "PA1"))
+ {
+ factor.extraction = EXTRACTION_PC;
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ factor.extraction = EXTRACTION_PC;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else if (lex_match_id (lexer, "FORMAT"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "SORT"))
+ {
+ factor.sort = true;
+ }
+ else if (lex_match_id (lexer, "BLANK"))
+ {
+ if ( lex_force_match (lexer, '('))
+ {
+ lex_force_num (lexer);
+ factor.blank = lex_number (lexer);
+ lex_get (lexer);
+ lex_force_match (lexer, ')');
+ }
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ factor.blank = 0;
+ factor.sort = false;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else if (lex_match_id (lexer, "PRINT"))
+ {
+ factor.print = 0;
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "UNIVARIATE"))
+ {
+ factor.print |= PRINT_UNIVARIATE;
+ }
+ else if (lex_match_id (lexer, "DET"))
+ {
+ factor.print |= PRINT_DETERMINANT;
+ }
+#if FACTOR_FULLY_IMPLEMENTED
+ else if (lex_match_id (lexer, "INV"))
+ {
+ }
+ else if (lex_match_id (lexer, "AIC"))
+ {
+ }
+ else if (lex_match_id (lexer, "SIG"))
+ {
+ }
+ else if (lex_match_id (lexer, "COVARIANCE"))
+ {
+ }
+ else if (lex_match_id (lexer, "CORRELATION"))
+ {
+ }
+#endif
+ else if (lex_match_id (lexer, "ROTATION"))
+ {
+ factor.print |= PRINT_ROTATION;
+ }
+ else if (lex_match_id (lexer, "EXTRACTION"))
+ {
+ factor.print |= PRINT_EXTRACTION;
+ }
+ else if (lex_match_id (lexer, "INITIAL"))
+ {
+ factor.print |= PRINT_INITIAL;
+ }
+#if FACTOR_FULLY_IMPLEMENTED
+ else if (lex_match_id (lexer, "KMO"))
+ {
+ }
+ else if (lex_match_id (lexer, "REPR"))
+ {
+ }
+ else if (lex_match_id (lexer, "FSCORE"))
+ {
+ }
+#endif
+ else if (lex_match (lexer, T_ALL))
+ {
+ factor.print = 0xFFFF;
+ }
+ else if (lex_match_id (lexer, "DEFAULT"))
+ {
+ factor.print |= PRINT_INITIAL ;
+ factor.print |= PRINT_EXTRACTION ;
+ factor.print |= PRINT_ROTATION ;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else if (lex_match_id (lexer, "MISSING"))
+ {
+ lex_match (lexer, '=');
+ while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+ {
+ if (lex_match_id (lexer, "INCLUDE"))
+ {
+ factor.exclude = MV_SYSTEM;
+ }
+ else if (lex_match_id (lexer, "EXCLUDE"))
+ {
+ factor.exclude = MV_ANY;
+ }
+ else if (lex_match_id (lexer, "LISTWISE"))
+ {
+ factor.missing_type = MISS_LISTWISE;
+ }
+ else if (lex_match_id (lexer, "PAIRWISE"))
+ {
+ factor.missing_type = MISS_PAIRWISE;
+ }
+ else if (lex_match_id (lexer, "MEANSUB"))
+ {
+ factor.missing_type = MISS_MEANSUB;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+
+ if ( ! run_factor (ds, &factor))
+ goto error;
+
+ free (factor.vars);
+ return CMD_SUCCESS;
+
+ error:
+ free (factor.vars);
+ return CMD_FAILURE;
+}
+
+static void do_factor (const struct cmd_factor *factor, struct casereader *group);
+
+
+static bool
+run_factor (struct dataset *ds, const struct cmd_factor *factor)
+{
+ struct dictionary *dict = dataset_dict (ds);
+ bool ok;
+ struct casereader *group;
+
+ struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
+
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ if ( factor->missing_type == MISS_LISTWISE )
+ group = casereader_create_filter_missing (group, factor->vars, factor->n_vars,
+ factor->exclude,
+ NULL, NULL);
+ do_factor (factor, group);
+ }
+
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
+
+ return ok;
+}
+
+
+/* Return the communality of variable N, calculated to N_FACTORS */
+static double
+the_communality (const gsl_matrix *evec, const gsl_vector *eval, int n, int n_factors)
+{
+ size_t i;
+
+ double comm = 0;
+
+ assert (n >= 0);
+ assert (n < eval->size);
+ assert (n < evec->size1);
+ assert (n_factors <= eval->size);
+
+ for (i = 0 ; i < n_factors; ++i)
+ {
+ double evali = fabs (gsl_vector_get (eval, i));
+
+ double eveci = gsl_matrix_get (evec, n, i);
+
+ comm += pow2 (eveci) * evali;
+ }
+
+ return comm;
+}
+
+/* Return the communality of variable N, calculated to N_FACTORS */
+static double
+communality (struct idata *idata, int n, int n_factors)
+{
+ return the_communality (idata->evec, idata->eval, n, n_factors);
+}
+
+
+
+static void
+show_communalities (const struct cmd_factor * factor,
+ const gsl_vector *initial, const gsl_vector *extracted)
+{
+ int i;
+ int c = 0;
+ const int heading_columns = 1;
+ int nc = heading_columns;
+ const int heading_rows = 1;
+ const int nr = heading_rows + factor->n_vars;
+ struct tab_table *t;
+
+ if (factor->print & PRINT_EXTRACTION)
+ nc++;
+
+ if (factor->print & PRINT_INITIAL)
+ nc++;
+
+ /* No point having a table with only headings */
+ if (nc <= 1)
+ return;
+
+ t = tab_create (nc, nr, 0);
+
+ tab_title (t, _("Communalities"));
+
+ tab_dim (t, tab_natural_dimensions, NULL);
+
+ tab_headers (t, heading_columns, 0, heading_rows, 0);
+
+ c = 1;
+ if (factor->print & PRINT_INITIAL)
+ tab_text (t, c++, 0, TAB_CENTER | TAT_TITLE, _("Initial"));
+
+ if (factor->print & PRINT_EXTRACTION)
+ tab_text (t, c++, 0, TAB_CENTER | TAT_TITLE, _("Extraction"));
+
+ /* Outline the box */
+ tab_box (t,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ nc - 1, nr - 1);
+
+ /* Vertical lines */
+ tab_box (t,
+ -1, -1,
+ -1, TAL_1,
+ heading_columns, 0,
+ nc - 1, nr - 1);
+
+ tab_hline (t, TAL_1, 0, nc - 1, heading_rows);
+ tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
+
+ for (i = 0 ; i < factor->n_vars; ++i)
+ {
+ c = 0;
+ tab_text (t, c++, i + heading_rows, TAT_TITLE, var_to_string (factor->vars[i]));
+
+ if (factor->print & PRINT_INITIAL)
+ tab_double (t, c++, i + heading_rows, 0, gsl_vector_get (initial, i), NULL);
+
+ if (factor->print & PRINT_EXTRACTION)
+ tab_double (t, c++, i + heading_rows, 0, gsl_vector_get (extracted, i), NULL);
+ }
+
+ tab_submit (t);
+}
+
+
+static void
+show_factor_matrix (const struct cmd_factor *factor, struct idata *idata, const gsl_matrix *fm)
+{
+ int i;
+ const int n_factors = n_extracted_factors (factor, idata);
+
+ const int heading_columns = 1;
+ const int heading_rows = 2;
+ const int nr = heading_rows + factor->n_vars;
+ const int nc = heading_columns + n_factors;
+ gsl_permutation *perm;
+
+ struct tab_table *t = tab_create (nc, nr, 0);
+
+ if ( factor->extraction == EXTRACTION_PC )
+ tab_title (t, _("Component Matrix"));
+ else
+ tab_title (t, _("Factor Matrix"));
+
+ tab_dim (t, tab_natural_dimensions, NULL);
+
+ tab_headers (t, heading_columns, 0, heading_rows, 0);
+
+ if ( factor->extraction == EXTRACTION_PC )
+ tab_joint_text (t,
+ 1, 0,
+ nc - 1, 0,
+ TAB_CENTER | TAT_TITLE, _("Component"));
+ else
+ tab_joint_text (t,
+ 1, 0,
+ nc - 1, 0,
+ TAB_CENTER | TAT_TITLE, _("Factor"));
+
+
+ tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
+
+
+ /* Outline the box */
+ tab_box (t,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ nc - 1, nr - 1);
+
+ /* Vertical lines */
+ tab_box (t,
+ -1, -1,
+ -1, TAL_1,
+ heading_columns, 1,
+ nc - 1, nr - 1);
+
+ tab_hline (t, TAL_1, 0, nc - 1, heading_rows);
+ tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
+
+
+ {
+ gsl_vector_const_view r1 = gsl_matrix_const_row (fm, 0);
+ gsl_vector_const_view r2 = gsl_matrix_const_row (fm, 1);
+
+ dump_vector (&r1.vector);
+ dump_vector (&r2.vector);
+ }
+
+ /* Initialise to the identity permutation */
+ perm = gsl_permutation_calloc (factor->n_vars);
+
+ if ( factor->sort)
+ sort_matrix_indirect (fm, perm);
+
+ for (i = 0 ; i < n_factors; ++i)
+ {
+ tab_text_format (t, heading_columns + i, 1, TAB_CENTER | TAT_TITLE, _("%d"), i + 1);
+ }
+
+ for (i = 0 ; i < factor->n_vars; ++i)
+ {
+ int j;
+ const int matrix_row = perm->data[i];
+ tab_text (t, 0, i + heading_rows, TAT_TITLE, var_to_string (factor->vars[matrix_row]));
+
+ for (j = 0 ; j < n_factors; ++j)
+ {
+ double x = gsl_matrix_get (fm, matrix_row, j);
+
+ if ( fabs (x) < factor->blank)
+ continue;
+
+ tab_double (t, heading_columns + j, heading_rows + i, 0, x, NULL);
+ }
+ }
+
+ gsl_permutation_free (perm);
+
+ tab_submit (t);
+}
+
+
+static void
+show_explained_variance (const struct cmd_factor * factor, struct idata *idata,
+ const gsl_vector *initial_eigenvalues,
+ const gsl_vector *extracted_eigenvalues)
+{
+ size_t i;
+ int c = 0;
+ const int heading_columns = 1;
+ const int heading_rows = 2;
+ const int nr = heading_rows + factor->n_vars;
+
+ struct tab_table *t ;
+
+ double i_total = 0.0;
+ double i_cum = 0.0;
+
+ double e_total = 0.0;
+ double e_cum = 0.0;
+
+ int nc = heading_columns;
+
+ if (factor->print & PRINT_EXTRACTION)
+ nc += 3;
+
+ if (factor->print & PRINT_INITIAL)
+ nc += 3;
+
+ if (factor->print & PRINT_ROTATION)
+ nc += 3;
+
+ /* No point having a table with only headings */
+ if ( nc <= heading_columns)
+ return;
+
+ t = tab_create (nc, nr, 0);
+
+ tab_title (t, _("Total Variance Explained"));
+
+ tab_dim (t, tab_natural_dimensions, NULL);
+
+ tab_headers (t, heading_columns, 0, heading_rows, 0);
+
+ /* Outline the box */
+ tab_box (t,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ nc - 1, nr - 1);
+
+ /* Vertical lines */
+ tab_box (t,
+ -1, -1,
+ -1, TAL_1,
+ heading_columns, 0,
+ nc - 1, nr - 1);
+
+ tab_hline (t, TAL_1, 0, nc - 1, heading_rows);
+ tab_hline (t, TAL_1, 1, nc - 1, 1);
+
+ tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
+
+
+ if ( factor->extraction == EXTRACTION_PC)
+ tab_text (t, 0, 1, TAB_LEFT | TAT_TITLE, _("Component"));
+ else
+ tab_text (t, 0, 1, TAB_LEFT | TAT_TITLE, _("Factor"));
+
+ c = 1;
+ if (factor->print & PRINT_INITIAL)
+ {
+ tab_joint_text (t, c, 0, c + 2, 0, TAB_CENTER | TAT_TITLE, _("Initial Eigenvalues"));
+ c += 3;
+ }
+
+ if (factor->print & PRINT_EXTRACTION)
+ {
+ tab_joint_text (t, c, 0, c + 2, 0, TAB_CENTER | TAT_TITLE, _("Extraction Sums of Squared Loadings"));
+ c += 3;
+ }
+
+ if (factor->print & PRINT_ROTATION)
+ {
+ printf ("C is %d\n", c);
+ tab_joint_text (t, c, 0, c + 2, 0, TAB_CENTER | TAT_TITLE, _("Rotation Sums of Squared Loadings"));
+ c += 3;
+ }
+
+ for (i = 0; i < (nc - heading_columns) / 3 ; ++i)
+ {
+ tab_text (t, i * 3 + 1, 1, TAB_CENTER | TAT_TITLE, _("Total"));
+ tab_text (t, i * 3 + 2, 1, TAB_CENTER | TAT_TITLE, _("% of Variance"));
+ tab_text (t, i * 3 + 3, 1, TAB_CENTER | TAT_TITLE, _("Cumulative %"));
+
+ tab_vline (t, TAL_2, heading_columns + i * 3, 0, nr - 1);
+ }
+
+ for (i = 0 ; i < initial_eigenvalues->size; ++i)
+ i_total += gsl_vector_get (initial_eigenvalues, i);
+
+ if ( factor->extraction == EXTRACTION_PAF)
+ {
+ e_total = factor->n_vars;
+ }
+ else
+ {
+ e_total = i_total;
+ }
+
+
+ for (i = 0 ; i < factor->n_vars; ++i)
+ {
+ const double i_lambda = gsl_vector_get (initial_eigenvalues, i);
+ double i_percent = 100.0 * i_lambda / i_total ;
+
+ const double e_lambda = gsl_vector_get (extracted_eigenvalues, i);
+ double e_percent = 100.0 * e_lambda / e_total ;
+
+ c = 0;
+
+ tab_text_format (t, c++, i + heading_rows, TAB_LEFT | TAT_TITLE, _("%d"), i + 1);
+
+ i_cum += i_percent;
+ e_cum += e_percent;
+
+ /* Initial Eigenvalues */
+ if (factor->print & PRINT_INITIAL)
+ {
+ tab_double (t, c++, i + heading_rows, 0, i_lambda, NULL);
+ tab_double (t, c++, i + heading_rows, 0, i_percent, NULL);
+ tab_double (t, c++, i + heading_rows, 0, i_cum, NULL);
+ }
+
+ if (factor->print & PRINT_EXTRACTION)
+ {
+ if ( i < n_extracted_factors (factor, idata))
+ {
+ /* Sums of squared loadings */
+ tab_double (t, c++, i + heading_rows, 0, e_lambda, NULL);
+ tab_double (t, c++, i + heading_rows, 0, e_percent, NULL);
+ tab_double (t, c++, i + heading_rows, 0, e_cum, NULL);
+ }
+ }
+ }
+
+ tab_submit (t);
+}
+
+
+
+static void
+do_factor (const struct cmd_factor *factor, struct casereader *r)
+{
+ struct ccase *c;
+ const gsl_matrix *cov_matrix;
+ const gsl_matrix *var_matrix;
+ const gsl_matrix *mean_matrix;
+ const gsl_matrix *n_matrix;
+
+ const gsl_matrix *analysis_matrix;
+ struct idata *idata;
+
+ struct covariance *cov = covariance_create (factor->n_vars, factor->vars,
+ factor->wv, factor->exclude);
+
+ for ( ; (c = casereader_read (r) ); case_unref (c))
+ {
+ covariance_accumulate (cov, c);
+ }
+
+ cov_matrix = covariance_calculate (cov);
+
+ var_matrix = covariance_moments (cov, MOMENT_VARIANCE);
+ mean_matrix = covariance_moments (cov, MOMENT_MEAN);
+ n_matrix = covariance_moments (cov, MOMENT_NONE);
+
+ if ( factor->method == METHOD_CORR)
+ {
+ analysis_matrix = correlation_from_covariance (cov_matrix, var_matrix);
+ }
+ else
+ analysis_matrix = cov_matrix;
+
+ if ( factor->print & PRINT_UNIVARIATE)
+ {
+ const int nc = 4;
+ int i;
+ const struct fmt_spec *wfmt = factor->wv ? var_get_print_format (factor->wv) : & F_8_0;
+
+
+ const int heading_columns = 1;
+ const int heading_rows = 1;
+
+ const int nr = heading_rows + factor->n_vars;
+
+ struct tab_table *t = tab_create (nc, nr, 0);
+ tab_title (t, _("Descriptive Statistics"));
+ tab_dim (t, tab_natural_dimensions, NULL);
+
+ tab_headers (t, heading_columns, 0, heading_rows, 0);
+
+ /* Outline the box */
+ tab_box (t,
+ TAL_2, TAL_2,
+ -1, -1,
+ 0, 0,
+ nc - 1, nr - 1);
+
+ /* Vertical lines */
+ tab_box (t,
+ -1, -1,
+ -1, TAL_1,
+ heading_columns, 0,
+ nc - 1, nr - 1);
+
+ tab_hline (t, TAL_1, 0, nc - 1, heading_rows);
+ tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
+
+ tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
+ tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
+ tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Analysis N"));
+
+ for (i = 0 ; i < factor->n_vars; ++i)
+ {
+ const struct variable *v = factor->vars[i];
+ tab_text (t, 0, i + heading_rows, TAB_LEFT | TAT_TITLE, var_to_string (v));
+
+ tab_double (t, 1, i + heading_rows, 0, gsl_matrix_get (mean_matrix, i, i), NULL);
+ tab_double (t, 2, i + heading_rows, 0, sqrt (gsl_matrix_get (var_matrix, i, i)), NULL);
+ tab_double (t, 3, i + heading_rows, 0, gsl_matrix_get (n_matrix, i, i), wfmt);
+ }
+
+ tab_submit (t);
+ }
+
+ if ( factor->print & PRINT_DETERMINANT)
+ {
+ const int nc = 2;
+ const int heading_columns = 0;
+ const int heading_rows = 0;
+ const int nr = 1;
+ struct tab_table *t ;
+
+ int sign = 0;
+ double det = 0.0;
+ const int size = analysis_matrix->size1;
+ gsl_permutation *p = gsl_permutation_calloc (size);
+ gsl_matrix *tmp = gsl_matrix_calloc (size, size);
+
+ gsl_matrix_memcpy (tmp, analysis_matrix);
+ gsl_linalg_LU_decomp (tmp, p, &sign);
+ det = gsl_linalg_LU_det (tmp, sign);
+ gsl_permutation_free (p);
+ gsl_matrix_free (tmp);
+
+ t = tab_create (nc, nr, 0);
+
+ if ( factor->method == METHOD_CORR)
+ tab_title (t, _("Correlation Matrix"));
+ else
+ tab_title (t, _("Covariance Matrix"));
+
+ tab_dim (t, tab_natural_dimensions, NULL);
+
+ tab_headers (t, heading_columns, 0, heading_rows, 0);
+
+ tab_hline (t, TAL_1, 0, nc - 1, heading_rows);
+
+ tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Determinant"));
+ tab_double (t, 1, 0, 0, det, NULL);
+
+ tab_submit (t);
+ }
+
+
+ idata = idata_alloc (factor->n_vars);
+
+
+#if 1
+ {
+ gsl_eigen_symmv_workspace *workspace = gsl_eigen_symmv_alloc (factor->n_vars);
+
+ gsl_eigen_symmv (matrix_dup (analysis_matrix), idata->eval, idata->evec, workspace);
+
+ gsl_eigen_symmv_free (workspace);
+ }
+
+ gsl_eigen_symmv_sort (idata->eval, idata->evec, GSL_EIGEN_SORT_ABS_DESC);
+#endif
+
+ {
+ const gsl_vector *extracted_eigenvalues = NULL;
+ gsl_vector *initial_communalities = gsl_vector_alloc (factor->n_vars);
+ gsl_vector *extracted_communalities = gsl_vector_alloc (factor->n_vars);
+ size_t i;
+ struct factor_matrix_workspace *fmw = factor_matrix_workspace_alloc (idata->msr->size, n_extracted_factors (factor, idata));
+ gsl_matrix *factor_matrix = gsl_matrix_calloc (factor->n_vars, fmw->n_factors);
+
+ if ( factor->extraction == EXTRACTION_PAF)
+ {
+ gsl_vector *diff = gsl_vector_alloc (idata->msr->size);
+ struct smr_workspace *ws = ws_create (analysis_matrix);
+
+ for (i = 0 ; i < factor->n_vars ; ++i)
+ {
+ double r2 = squared_multiple_correlation (analysis_matrix, i, ws);
+
+ gsl_vector_set (idata->msr, i, r2);
+ }
+ ws_destroy (ws);
+
+ gsl_vector_memcpy (initial_communalities, idata->msr);
+
+ for (i = 0; i < factor->iterations; ++i)
+ {
+ double min, max;
+ gsl_vector_memcpy (diff, idata->msr);
+
+ iterate_factor_matrix (analysis_matrix, idata->msr, factor_matrix, fmw);
+
+ gsl_vector_sub (diff, idata->msr);
+
+ gsl_vector_minmax (diff, &min, &max);
+
+ if ( fabs (min) < factor->econverge && fabs (max) < factor->econverge)
+ break;
+ }
+ gsl_vector_free (diff);
+
+ gsl_vector_memcpy (extracted_communalities, idata->msr);
+ extracted_eigenvalues = fmw->eval;
+ }
+ else if (factor->extraction == EXTRACTION_PC)
+ {
+ for (i = 0 ; i < factor->n_vars; ++i)
+ {
+ gsl_vector_set (initial_communalities, i, communality (idata, i, factor->n_vars));
+ }
+ gsl_vector_memcpy (extracted_communalities, initial_communalities);
+
+ iterate_factor_matrix (analysis_matrix, extracted_communalities, factor_matrix, fmw);
+ extracted_eigenvalues = idata->eval;
+ }
+
+ show_communalities (factor, initial_communalities, extracted_communalities);
+
+ show_explained_variance (factor, idata, idata->eval, extracted_eigenvalues);
+
+ factor_matrix_workspace_free (fmw);
+
+ show_factor_matrix (factor, idata, factor_matrix);
+
+ gsl_vector_free (initial_communalities);
+ gsl_vector_free (extracted_communalities);
+ }
+
+ idata_free (idata);
+
+ casereader_destroy (r);
+}