From fe55889f4dbca132c99c17fcaf9604acd2a26593 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Wed, 10 May 2017 12:41:40 +0200 Subject: [PATCH] matrix reader: Fix bug which incorrectly read in matrix material. --- src/language/data-io/matrix-reader.c | 84 +++++++++++++++++------ tests/language/data-io/matrix-data.at | 51 ++++++++++++++ tests/language/stats/factor.at | 97 +++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 20 deletions(-) diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c index fa7dcf4819..2c0aee2fec 100644 --- a/src/language/data-io/matrix-reader.c +++ b/src/language/data-io/matrix-reader.c @@ -20,6 +20,7 @@ #include +#include #include #include #include @@ -149,6 +150,30 @@ destroy_matrix_reader (struct matrix_reader *mr) } +/* + Allocates MATRIX if necessary, + and populates row MROW, from the data in C corresponding to + variables in VARS. N_VARS is the length of VARS. +*/ +static void +matrix_fill_row (gsl_matrix **matrix, + const struct ccase *c, int mrow, + const struct variable **vars, size_t n_vars) +{ + int col; + if (*matrix == NULL) + *matrix = gsl_matrix_alloc (n_vars, n_vars); + + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv = vars [col]; + double x = case_data (c, cv)->f; + assert (col < (*matrix)->size2); + assert (mrow < (*matrix)->size1); + gsl_matrix_set (*matrix, mrow, col, x); + } +} + bool next_matrix_from_reader (struct matrix_material *mm, struct matrix_reader *mr, @@ -156,6 +181,8 @@ next_matrix_from_reader (struct matrix_material *mm, { struct casereader *group; + assert (vars); + gsl_matrix_free (mr->n_vectors); gsl_matrix_free (mr->mean_vectors); gsl_matrix_free (mr->var_vectors); @@ -176,8 +203,21 @@ next_matrix_from_reader (struct matrix_material *mm, mr->correlation = NULL; mr->covariance = NULL; + // FIXME: Make this into a hash table. + unsigned long *table = xmalloc (sizeof (*table) * n_vars); + int i; + for (i = 0; i < n_vars; ++i) + { + const int w = var_get_width (mr->varname); + uint8_t s[w]; + memset (s, 0, w); + const char *name = var_get_name (vars[i]); + strcpy (s, name); + unsigned long h = hash_bytes (s, w, 0); + table[i] = h; + } + struct ccase *c; - int crow = 0; for ( ; (c = casereader_read (group) ); case_unref (c)) { const union value *uv = case_data (c, mr->rowtype); @@ -197,31 +237,33 @@ next_matrix_from_reader (struct matrix_material *mm, for (row = 0; row < n_vars; ++row) gsl_matrix_set (mr->var_vectors, row, col, x * x); } - if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + + const union value *uvv = case_data (c, mr->varname); + const uint8_t *vs = value_str (uvv, var_get_width (mr->varname)); + int w = var_get_width (mr->varname); + unsigned long h = hash_bytes (vs, w, 0); + + int mrow = -1; + for (i = 0; i < n_vars; ++i) { - if (mr->correlation == NULL) - mr->correlation = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) + if (table[i] == h) { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->correlation, crow, col, x); + mrow = i; + break; } - crow++; + } + + if (mrow == -1) + continue; + + + if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + { + matrix_fill_row (&mr->correlation, c, mrow, vars, n_vars); } else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8)) { - if (mr->covariance == NULL) - mr->covariance = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) - { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->covariance, crow, col, x); - } - crow++; + matrix_fill_row (&mr->covariance, c, mrow, vars, n_vars); } } @@ -230,5 +272,7 @@ next_matrix_from_reader (struct matrix_material *mm, mm->cov = mr->covariance; mm->corr = mr->correlation; + free (table); + return true; } diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at index c200f4284c..2f39f5817f 100644 --- a/tests/language/data-io/matrix-data.at +++ b/tests/language/data-io/matrix-data.at @@ -311,3 +311,54 @@ corr ,variableFour,7.0000,5.0000,4.0000,1.0000 ]) AT_CLEANUP + + + +AT_SETUP([Matrix reader - read integrity]) + +dnl Check that matrices presented are read correctly. +dnl The example below is an unlikely one since all +dnl covariance/correlation matrices must be symetrical +dnl but it serves a purpose for this test. +AT_DATA([matrix-reader.pspp], [dnl +matrix data + variables = rowtype_ var01 to var9 + /format = full. + +begin data +n 1 2 3 4 5 6 7 8 9 +sd 100 200 300 400 500 600 700 800 900 +corr 11 12 13 14 15 16 17 18 19 +corr 21 22 23 24 25 26 27 28 29 +corr 31 32 33 34 35 36 37 38 39 +corr 41 42 43 44 45 46 47 48 49 +corr 51 52 53 54 55 56 57 58 59 +corr 61 62 63 64 65 66 67 68 69 +corr 71 72 73 74 75 76 77 78 79 +corr 81 82 83 84 85 86 87 88 89 +corr 91 92 93 94 95 96 97 98 99 +end data. + +factor /matrix = in (corr = *) + /analysis var02 var04 var06 + /method = correlation + /rotation = norotate + /print correlation. +]) + +AT_CHECK([pspp -O format=csv matrix-reader.pspp], [0], [dnl +Table: Correlation Matrix +,,var02,var04,var06 +Correlations,var02,22.00,24.00,26.00 +,var04,42.00,44.00,46.00 +,var06,62.00,64.00,66.00 + +Table: Component Matrix +,Component, +,1,2 +var02,6.73,-2.23 +var04,6.95,2.15 +var06,9.22,.01 +]) + +AT_CLEANUP diff --git a/tests/language/stats/factor.at b/tests/language/stats/factor.at index 32378e1f6d..55c781c4fb 100644 --- a/tests/language/stats/factor.at +++ b/tests/language/stats/factor.at @@ -2153,5 +2153,102 @@ var07,1.205,3.948,1.926,1.515,-2.450,-.317,-.087 var08,.085,.319,-.157,-.011,.353,-.341,-.816 ]) +AT_CLEANUP + + + +dnl A more realistic example of factor analysis usage. +AT_SETUP([FACTOR correlation matrix]) + +AT_DATA([correlation-matrix.sps], [dnl +set format = F10.3. + +matrix data + variables = rowtype_ + cdi_actws_16 cdi_maxzin_16 rdls_passws_16 rdls_actws_16 cdi_actws_20 cdi_maxzin_20 cdi_actws_26 cdi_maxzin_26 rdls_passws_26 rdls_actws_26 + nepsy_passws_36 morf_verv_36 bnt_actws_36 klankgr_id_36 snelnoe_36 letters_36 ppvt_passws_50 morf_verv_50 + nepsy_passws_56 bnt_actws_56 klank_gr_weg_56 snelnoe_56 letters_56 + leesacc_wo_owo_811 leesacc_tekst_811 leesacc_otekst_811 leessne_wo_owo_811 leesvl_tekst_811 leesvl_otekst_811 leessne_wo_811 spel_wo_owo_811 + / format = upper diagonal . +begin data +mean 64.44 1.74 15.30 11.50 269.03 5.37 441.90 8.57 36.59 33.99 11.68 14.74 18.67 6.70 71.57 2.28 70.45 51.82 18.82 34.57 11.68 45.63 12.94 35.08 92.60 79.28 2.78 61.71 29.44 9.46 13.17 +sd 74.93 1.36 5.51 4.17 159.26 2.76 128.77 3.50 6.20 6.50 3.55 8.37 5.90 3.01 24.81 4.09 24.44 18.55 2.90 6.46 3.01 14.06 7.69 4.36 7.10 17.57 1.27 25.68 11.75 3.36 4.13 +n 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 150 +corr 1.00 .784 .397 .862 .692 .625 .490 .374 .406 .371 .260 .257 .306 .118 -.148 .072 .202 .234 .198 .241 .205 -.054 .246 .166 .143 .155 -.122 .144 -.010 .135 .241 +corr 1.00 .333 .751 .549 .553 .447 .313 .304 .377 .204 .249 .258 .193 -.158 .119 .150 .216 .127 .209 .242 .046 .233 .120 .155 .107 -.126 .147 -.009 .134 .208 +corr 1.00 .469 .433 .381 .442 .307 .462 .391 .378 .293 .369 .191 -.306 .238 .204 .215 .295 .285 .157 .069 .241 .029 .060 .054 -.043 .124 -.069 .054 .136 +corr 1.00 .708 .663 .509 .419 .434 .432 .267 .255 .342 .132 -.192 .142 .228 .203 .248 .260 .200 -.051 .254 .136 .156 .109 -.126 .172 -.004 .157 .268 +corr 1.00 .787 .710 .567 .402 .511 .274 .285 .332 .154 -.096 .247 .253 .235 .245 .257 .261 -.048 .243 .119 .194 .164 -.108 .184 .011 .157 .235 +corr 1.00 .590 .646 .449 .505 .313 .322 .405 .148 -.117 .152 .294 .322 .252 .321 .267 -.055 .255 .118 .178 .137 -.110 .182 .004 .146 .216 +corr 1.00 .548 .343 .619 .296 .260 .456 .149 -.098 .252 .279 .267 .342 .361 .186 -.066 .215 .107 .148 .059 -.114 .156 -.035 .095 .220 +corr 1.00 .406 .509 .397 .236 .416 .037 -.179 .192 .334 .293 .277 .367 .162 -.150 .306 .171 .307 .173 -.128 .255 .075 .224 .315 +corr 1.00 .410 .497 .560 .574 .240 -.301 .204 .508 .351 .457 .428 .242 -.117 .367 .136 .191 .191 -.102 .215 .053 .185 .273 +corr 1.00 .328 .258 .534 .236 -.202 .200 .333 .209 .352 .375 .302 -.119 .272 .062 .203 .042 -.092 .220 .020 .158 .227 +corr 1.00 .439 .488 .323 -.213 .287 .507 .427 .493 .522 .298 -.142 .371 .109 .215 .213 -.048 .228 .009 .133 .267 +corr 1.00 .437 .381 -.158 .153 .403 .430 .383 .379 .150 -.141 .303 .115 .131 .155 -.170 .206 .039 .193 .254 +corr 1.00 .247 -.143 .182 .521 .364 .415 .688 .304 -.185 .327 .188 .211 .202 -.111 .272 .122 .226 .301 +corr 1.00 -.150 .229 .296 .249 .329 .255 .210 -.036 .252 .141 .230 .112 -.195 .309 .135 .250 .195 +corr 1.00 -.132 -.204 -.162 -.284 -.166 -.189 .294 -.339 -.094 -.218 -.144 .153 -.246 -.128 -.192 -.239 +corr 1.00 .151 .132 .166 .195 .387 -.214 .476 .154 .187 .167 -.236 .410 .316 .370 .245 +corr 1.00 .388 .479 .591 .294 -.171 .351 .102 .245 .180 .003 .274 .059 .178 .236 +corr 1.00 .408 .437 .276 -.153 .353 .251 .318 .229 -.111 .263 .042 .203 .349 +corr 1.00 .467 .234 -.249 .382 .199 .313 .197 -.117 .263 .047 .215 .318 +corr 1.00 .368 -.199 .441 .198 .224 .197 -.099 .329 .105 .256 .322 +corr 1.00 -.211 .473 .233 .253 .268 -.198 .397 .229 .309 .277 +corr 1.00 -.310 -.217 -.312 -.203 .227 -.296 -.260 -.276 -.321 +corr 1.00 .368 .350 .311 -.313 .578 .338 .521 .458 +corr 1.00 .415 .580 -.588 .545 .497 .635 .683 +corr 1.00 .570 -.386 .494 .340 .538 .524 +corr 1.00 -.366 .427 .299 .498 .506 +corr 1.00 -.684 -.620 -.746 -.568 +corr 1.00 .759 .900 .555 +corr 1.00 .814 .400 +corr 1.00 .621 +corr 1.00 +end data . + +factor matrix in (cor = *) + / analysis = cdi_actws_16 rdls_actws_16 cdi_actws_20 cdi_actws_26 rdls_actws_26 bnt_actws_36 bnt_actws_56 + / format = default + / criteria = factors (1) + / extraction = pc + / rotation = norotate + / print = initial extraction . + +]) + +AT_CHECK([pspp -O format=csv correlation-matrix.sps], [0], [dnl +Table: Communalities +,Initial,Extraction +cdi_actws_16,1.000,.614 +rdls_actws_16,1.000,.660 +cdi_actws_20,1.000,.695 +cdi_actws_26,1.000,.650 +rdls_actws_26,1.000,.536 +bnt_actws_36,1.000,.443 +bnt_actws_56,1.000,.316 + +Table: Total Variance Explained +,Initial Eigenvalues,,,Extraction Sums of Squared Loadings,, +Component,Total,% of Variance,Cumulative %,Total,% of Variance,Cumulative % +1,3.914,55.908,55.908,3.914,55.908,55.908 +2,1.320,18.852,74.760,,, +3,.716,10.223,84.983,,, +4,.422,6.030,91.012,,, +5,.278,3.977,94.989,,, +6,.216,3.088,98.077,,, +7,.135,1.923,100.000,,, + +Table: Component Matrix +,Component +,1 +cdi_actws_16,.784 +rdls_actws_16,.812 +cdi_actws_20,.834 +cdi_actws_26,.806 +rdls_actws_26,.732 +bnt_actws_36,.666 +bnt_actws_56,.562 +]) AT_CLEANUP -- 2.30.2