From dac22e121809fcc6dd75946c3e1f0cd9de2e811e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 15 Nov 2021 21:03:06 -0800 Subject: [PATCH 1/1] MATRIX MGET with factor and splits and TYPE --- doc/matrices.texi | 7 +- src/language/stats/matrix.c | 113 +++++++++------ tests/language/data-io/matrix-data.at | 65 +++++++++ tests/language/stats/matrix.at | 192 +++++++++++++++++++++++++- 4 files changed, 327 insertions(+), 50 deletions(-) diff --git a/doc/matrices.texi b/doc/matrices.texi index f5af6ec4ac..565cdbd257 100644 --- a/doc/matrices.texi +++ b/doc/matrices.texi @@ -2535,9 +2535,10 @@ Vector of counts. @end table @item -If the matrix file has factor variables, @code{F@i{n}}, where @i{n} -is a number identifying a group of factors: @code{F1} for the first -group, @code{F2} for the second, and so on. +If the matrix file has factor variables, @code{F@i{n}}, where @i{n} is +a number identifying a group of factors: @code{F1} for the first +group, @code{F2} for the second, and so on. This part is omitted for +pooled data (where the factors all have the system-missing value). @item If the matrix file has split file variables, @code{S@i{n}}, where diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c index 8445e64ac3..a9690a42f1 100644 --- a/src/language/stats/matrix.c +++ b/src/language/stats/matrix.c @@ -6248,17 +6248,44 @@ get_a8_var (const struct dictionary *d, const char *name) } static bool -is_rowtype (const union value *v, const char *rowtype) +var_changed (const struct ccase *ca, const struct ccase *cb, + const struct variable *var) { - struct substring vs = ss_buffer (CHAR_CAST (char *, v->s), 8); - ss_rtrim (&vs, ss_cstr (" ")); - return ss_equals_case (vs, ss_cstr (rowtype)); + return (ca && cb + ? !value_equal (case_data (ca, var), case_data (cb, var), + var_get_width (var)) + : ca || cb); +} + +static bool +vars_changed (const struct ccase *ca, const struct ccase *cb, + const struct dictionary *d, + size_t first_var, size_t n_vars) +{ + for (size_t i = 0; i < n_vars; i++) + { + const struct variable *v = dict_get_var (d, first_var + i); + if (var_changed (ca, cb, v)) + return true; + } + return false; +} + +static bool +vars_all_missing (const struct ccase *c, const struct dictionary *d, + size_t first_var, size_t n_vars) +{ + for (size_t i = 0; i < n_vars; i++) + if (case_num (c, dict_get_var (d, first_var + i)) != SYSMIS) + return false; + return true; } static void matrix_mget_commit_var (struct ccase **rows, size_t n_rows, const struct dictionary *d, const struct variable *rowtype_var, + const struct stringi_set *accepted_rowtypes, struct matrix_state *s, size_t ss, size_t sn, size_t si, size_t fs, size_t fn, size_t fi, @@ -6267,19 +6294,27 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, struct pivot_dimension *var_dimension) { if (!n_rows) - return; + goto exit; + + /* Is this a matrix for pooled data, either where there are no factor + variables or the factor variables are missing? */ + bool pooled = !fn || vars_all_missing (rows[0], d, fs, fn); - const union value *rowtype_ = case_data (rows[0], rowtype_var); - const char *name_prefix = (is_rowtype (rowtype_, "COV") ? "CV" - : is_rowtype (rowtype_, "CORR") ? "CR" - : is_rowtype (rowtype_, "MEAN") ? "MN" - : is_rowtype (rowtype_, "STDDEV") ? "SD" - : is_rowtype (rowtype_, "N") ? "NC" - : "CN"); + struct substring rowtype = case_ss (rows[0], rowtype_var); + ss_rtrim (&rowtype, ss_cstr (" ")); + if (!stringi_set_is_empty (accepted_rowtypes) + && !stringi_set_contains_len (accepted_rowtypes, + rowtype.string, rowtype.length)) + goto exit; struct string name = DS_EMPTY_INITIALIZER; - ds_put_cstr (&name, name_prefix); - if (fi > 0) + ds_put_cstr (&name, (ss_equals_case (rowtype, ss_cstr ("COV")) ? "CV" + : ss_equals_case (rowtype, ss_cstr ("CORR")) ? "CR" + : ss_equals_case (rowtype, ss_cstr ("MEAN")) ? "MN" + : ss_equals_case (rowtype, ss_cstr ("STDDEV")) ? "SD" + : ss_equals_case (rowtype, ss_cstr ("N")) ? "NC" + : "CN")); + if (!pooled) ds_put_format (&name, "F%zu", fi); if (si > 0) ds_put_format (&name, "S%zu", si); @@ -6291,7 +6326,7 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, { msg (SW, _("Matrix data file contains variable with existing name %s."), ds_cstr (&name)); - goto exit; + goto exit_free_name; } gsl_matrix *m = gsl_matrix_alloc (n_rows, cn); @@ -6324,7 +6359,8 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, for (size_t j = 0; j < fn; j++) { struct variable *var = dict_get_var (d, fs + j); - const union value *value = case_data (rows[0], var); + const union value sysmis = { .f = SYSMIS }; + const union value *value = pooled ? &sysmis : case_data (rows[0], var); pivot_table_put2 (pt, j + sn, var_index, pivot_value_new_var_value (var, value)); } @@ -6340,36 +6376,14 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, ds_cstr (&name), n_missing); mv->value = m; -exit: +exit_free_name: ds_destroy (&name); + +exit: for (size_t y = 0; y < n_rows; y++) case_unref (rows[y]); } -static bool -var_changed (const struct ccase *ca, const struct ccase *cb, - const struct variable *var) -{ - return (ca && cb - ? !value_equal (case_data (ca, var), case_data (cb, var), - var_get_width (var)) - : ca || cb); -} - -static bool -vars_changed (const struct ccase *ca, const struct ccase *cb, - const struct dictionary *d, - size_t first_var, size_t n_vars) -{ - for (size_t i = 0; i < n_vars; i++) - { - const struct variable *v = dict_get_var (d, first_var + i); - if (var_changed (ca, cb, v)) - return true; - } - return false; -} - static void matrix_cmd_execute_mget__ (struct mget_command *mget, struct casereader *r, const struct dictionary *d) @@ -6437,7 +6451,7 @@ matrix_cmd_execute_mget__ (struct mget_command *mget, if (fn > 0) { struct pivot_category *factors = pivot_category_create_group ( - attr_dimension->root, N_("Factor Values")); + attr_dimension->root, N_("Factors")); for (size_t i = 0; i < fn; i++) pivot_category_create_leaf (factors, pivot_value_new_variable ( dict_get_var (d, fs + i))); @@ -6453,6 +6467,8 @@ matrix_cmd_execute_mget__ (struct mget_command *mget, struct ccase *c; while ((c = casereader_read (r)) != NULL) { + bool row_has_factors = fn && !vars_all_missing (c, d, fs, fn); + enum { SPLITS_CHANGED, @@ -6468,7 +6484,8 @@ matrix_cmd_execute_mget__ (struct mget_command *mget, if (change != NOTHING_CHANGED) { - matrix_mget_commit_var (rows, n_rows, d, rowtype_, + matrix_mget_commit_var (rows, n_rows, d, + rowtype_, &mget->rowtypes, mget->state, ss, sn, si, fs, fn, fi, @@ -6492,19 +6509,23 @@ matrix_cmd_execute_mget__ (struct mget_command *mget, /* Reset the factor number, if there are factors. */ if (fn) { - fi = 1; + fi = 0; + if (row_has_factors) + fi++; case_unref (fc); fc = case_ref (c); } } else if (change == FACTORS_CHANGED) { - fi++; + if (row_has_factors) + fi++; case_unref (fc); fc = case_ref (c); } } - matrix_mget_commit_var (rows, n_rows, d, rowtype_, + matrix_mget_commit_var (rows, n_rows, d, + rowtype_, &mget->rowtypes, mget->state, ss, sn, si, fs, fn, fi, diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at index 3c69db6cd4..ca965dd7a2 100644 --- a/tests/language/data-io/matrix-data.at +++ b/tests/language/data-io/matrix-data.at @@ -389,6 +389,71 @@ CORR,.,var04,.7,.5,.4,1.0 ]) AT_CLEANUP +AT_SETUP([MATRIX DATA - factors and splits]) +AT_DATA([matrix-data.sps], [dnl +matrix data + variables = s f rowtype_ var01 var02 var03 + /split=s + /factor=f. + +begin data +8 0 mean 21.4 5.0 72.9 +8 0 sd 6.5 1.6 22.8 +8 0 n 106 106 106 +8 . corr 1 +8 . corr .41 1 +8 . corr -.16 -.22 1 +9 1 mean 11.4 1.0 52.9 +9 1 sd 9.5 8.6 12.8 +9 1 n 10 11 12 +9 . corr 1 +9 . corr .51 1 +9 . corr .36 -.41 1 +end data. + +display dictionary. + +list. +]) +AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl +Table: Variables +Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format +s,1,Scale,Input,8,Right,F4.0,F4.0 +ROWTYPE_,2,Nominal,Input,8,Left,A8,A8 +f,3,Scale,Input,8,Right,F4.0,F4.0 +VARNAME_,4,Nominal,Input,8,Left,A8,A8 +var01,5,Scale,Input,8,Right,F10.4,F10.4 +var02,6,Scale,Input,8,Right,F10.4,F10.4 +var03,7,Scale,Input,8,Right,F10.4,F10.4 + +Table: Split Values +Variable,Value +s,8 + +Table: Data List +s,ROWTYPE_,f,VARNAME_,var01,var02,var03 +8,MEAN,0,,21.4000,5.0000,72.9000 +8,STDDEV,0,,6.5000,1.6000,22.8000 +8,N,0,,106.0000,106.0000,106.0000 +8,CORR,.,var01,1.0000,.4100,-.1600 +8,CORR,.,var02,.4100,1.0000,-.2200 +8,CORR,.,var03,-.1600,-.2200,1.0000 + +Table: Split Values +Variable,Value +s,9 + +Table: Data List +s,ROWTYPE_,f,VARNAME_,var01,var02,var03 +9,MEAN,1,,11.4000,1.0000,52.9000 +9,STDDEV,1,,9.5000,8.6000,12.8000 +9,N,1,,10.0000,11.0000,12.0000 +9,CORR,.,var01,1.0000,.5100,.3600 +9,CORR,.,var02,.5100,1.0000,-.4100 +9,CORR,.,var03,.3600,-.4100,1.0000 +]) +AT_CLEANUP + AT_SETUP([MATRIX DATA - bad ROWTYPE_]) AT_DATA([matrix-data.sps], [dnl matrix data diff --git a/tests/language/stats/matrix.at b/tests/language/stats/matrix.at index bace7412f5..b8e3edaad3 100644 --- a/tests/language/stats/matrix.at +++ b/tests/language/stats/matrix.at @@ -3345,4 +3345,194 @@ CRS2 .51 1.00 -.41 .36 -.41 1.00 ]) -AT_CLEANUP \ No newline at end of file +AT_CLEANUP + +AT_SETUP([MATRIX - MGET with factor variables]) +AT_DATA([matrix.sps], [dnl +MATRIX DATA + VARIABLES=ROWTYPE_ f1 var01 TO var04 + /FACTOR=f1. +BEGIN DATA. +MEAN 0 34 35 36 37 +SD 0 22 11 55 66 +N 0 99 98 99 92 +MEAN 1 44 45 34 39 +SD 1 23 15 51 46 +N 1 98 34 87 23 +CORR . 1 +CORR . .9 1 +CORR . .8 .6 1 +CORR . .7 .5 .4 1 +END DATA. +FORMATS var01 TO var04(F5.1). +SAVE OUTFILE='matrix.sav'. +]) +AT_DATA([matrix2.sps], [dnl +MATRIX. +MGET FILE='matrix.sav'. +PRINT MNF1/FORMAT=F2.0. +PRINT SDF1/FORMAT=F2.0. +PRINT NCF1/FORMAT=F2.0. +PRINT MNF2/FORMAT=F2.0. +PRINT SDF2/FORMAT=F2.0. +PRINT NCF2/FORMAT=F2.0. +PRINT CR/FORMAT=F3.1. +END MATRIX. +]) +AT_CHECK([pspp matrix.sps]) +AT_CHECK([pspp -O format=csv matrix2.sps], [0], [dnl +Table: Matrix Variables Created by MGET +,Factors,Dimensions, +,f1,Rows,Columns +MNF1,0,1,4 +SDF1,0,1,4 +NCF1,0,1,4 +MNF2,1,1,4 +SDF2,1,1,4 +NCF2,1,1,4 +CR,.,4,4 + +MNF1 +34 35 36 37 + +SDF1 +22 11 55 66 + +NCF1 +99 98 99 92 + +MNF2 +44 45 34 39 + +SDF2 +23 15 51 46 + +NCF2 +98 34 87 23 + +CR +1.0 .9 .8 .7 +.9 1.0 .6 .5 +.8 .6 1.0 .4 +.7 .5 .4 1.0 +]) +AT_CLEANUP + +AT_SETUP([MATRIX - MGET with factor and split variables]) +AT_DATA([matrix.sps], [dnl +matrix data + variables = s f rowtype_ var01 var02 var03 + /split=s + /factor=f. + +begin data +8 0 mean 21.4 5.0 72.9 +8 0 sd 6.5 1.6 22.8 +8 0 n 106 106 106 +8 . corr 1 +8 . corr .41 1 +8 . corr -.16 -.22 1 +9 1 mean 11.4 1.0 52.9 +9 1 sd 9.5 8.6 12.8 +9 1 n 10 11 12 +9 . corr 1 +9 . corr .51 1 +9 . corr .36 -.41 1 +end data. + +MATRIX. +MGET. +PRINT MNF1S1/FORMAT=F5.1. +PRINT SDF1S1/FORMAT=F5.1. +PRINT NCF1S1/FORMAT=F5.0. +PRINT CRS1/FORMAT=F5.2. +PRINT MNF1S2/FORMAT=F5.1. +PRINT SDF1S2/FORMAT=F5.1. +PRINT NCF1S2/FORMAT=F5.0. +PRINT CRS2/FORMAT=F5.2. +END MATRIX. +]) +AT_CHECK([pspp matrix.sps -O format=csv], [0], [dnl +Table: Matrix Variables Created by MGET +,Split Values,Factors,Dimensions, +,s,f,Rows,Columns +MNF1S1,8,0,1,3 +SDF1S1,8,0,1,3 +NCF1S1,8,0,1,3 +CRS1,8,.,3,3 +MNF1S2,9,1,1,3 +SDF1S2,9,1,1,3 +NCF1S2,9,1,1,3 +CRS2,9,.,3,3 + +MNF1S1 +21.4 5.0 72.9 + +SDF1S1 +6.5 1.6 22.8 + +NCF1S1 +106 106 106 + +CRS1 +1.00 .41 -.16 +.41 1.00 -.22 +-.16 -.22 1.00 + +MNF1S2 +11.4 1.0 52.9 + +SDF1S2 +9.5 8.6 12.8 + +NCF1S2 +10 11 12 + +CRS2 +1.00 .51 .36 +.51 1.00 -.41 +.36 -.41 1.00 +]) +AT_CLEANUP + +AT_SETUP([MATRIX - MGET with TYPE]) +AT_DATA([matrix.sps], [dnl +MATRIX DATA + VARIABLES=ROWTYPE_ f1 var01 TO var04 + /FACTOR=f1. +BEGIN DATA. +MEAN 0 34 35 36 37 +SD 0 22 11 55 66 +N 0 99 98 99 92 +MEAN 1 44 45 34 39 +SD 1 23 15 51 46 +N 1 98 34 87 23 +CORR . 1 +CORR . .9 1 +CORR . .8 .6 1 +CORR . .7 .5 .4 1 +END DATA. +FORMATS var01 TO var04(F5.1). +SAVE OUTFILE='matrix.sav'. +]) +AT_DATA([matrix2.sps], [dnl +MATRIX. +MGET FILE='matrix.sav'/TYPE=CORR. +PRINT CR/FORMAT=F3.1. +END MATRIX. +]) +AT_CHECK([pspp matrix.sps]) +AT_CHECK([pspp -O format=csv matrix2.sps], [0], [dnl +Table: Matrix Variables Created by MGET +,Factors,Dimensions, +,f1,Rows,Columns +CR,.,4,4 + +CR +1.0 .9 .8 .7 +.9 1.0 .6 .5 +.8 .6 1.0 .4 +.7 .5 .4 1.0 +]) +AT_CLEANUP + -- 2.30.2