From bc37c3f489947cf081a930a5d4e58dbd133eb563 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Mon, 1 May 2017 20:30:45 +0200 Subject: [PATCH] MATRIX DATA: Handle multiple splits properly --- src/language/data-io/matrix-data.c | 85 +++++++++++++++++++++------ tests/language/data-io/matrix-data.at | 55 +++++++++-------- 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index 7855caf464..fb214fd644 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -72,6 +72,8 @@ struct matrix_format const struct variable *rowtype; const struct variable *varname; int n_continuous_vars; + struct variable **split_vars; + size_t n_split_vars; }; /* @@ -114,16 +116,41 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void struct casewriter *writer; writer = autopaging_writer_create (proto); - double *temp_matrix = - xcalloc (sizeof (*temp_matrix), - mformat->n_continuous_vars * mformat->n_continuous_vars); + double **matrices = NULL; + size_t n_splits = 0; + + const size_t sizeof_matrix = + sizeof (double) * mformat->n_continuous_vars * mformat->n_continuous_vars; + /* Make an initial pass to populate our temporary matrix */ struct casereader *pass0 = casereader_clone (casereader0); struct ccase *c; + unsigned int prev_split_hash = 1; int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; for (; (c = casereader_read (pass0)) != NULL; case_unref (c)) { + int s; + unsigned int split_hash = 0; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + split_hash = value_hash (sv, var_get_width (svar), split_hash); + } + + if (matrices == NULL || prev_split_hash != split_hash) + { + row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? + 1 : 0; + + n_splits++; + matrices = xrealloc (matrices, sizeof (double*) * n_splits); + matrices[n_splits - 1] = xmalloc (sizeof_matrix); + } + + prev_split_hash = split_hash; + int c_offset = (mformat->triangle == UPPER) ? row : 0; if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) c_offset++; @@ -137,13 +164,16 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void { const struct variable *var = dict_get_var (dict, - 1 + col - c_offset + var_get_dict_index (mformat->varname)); + 1 + col - c_offset + + var_get_dict_index (mformat->varname)); double e = case_data (c, var)->f; if (e == SYSMIS) continue; - temp_matrix [col + mformat->n_continuous_vars * row] = e; - temp_matrix [row + mformat->n_continuous_vars * col] = e; + + + (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e; + (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; } row++; } @@ -155,8 +185,26 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void const int idx = var_get_dict_index (mformat->varname); row = 0; struct ccase *prev_case = NULL; + prev_split_hash = 1; + n_splits = 0; for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) { + int s; + unsigned int split_hash = 0; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + split_hash = value_hash (sv, var_get_width (svar), split_hash); + } + if (prev_split_hash != split_hash) + { + n_splits++; + row = 0; + } + + prev_split_hash = split_hash; + case_unref (prev_case); struct ccase *outcase = case_create (proto); case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); @@ -175,7 +223,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void union value *dest_val = case_data_rw_idx (outcase, 1 + col + var_get_dict_index (mformat->varname)); - dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row]; + dest_val->f = (matrices[n_splits - 1])[col + mformat->n_continuous_vars * row]; if (col == row && mformat->diagonal == NO_DIAGONAL) dest_val->f = 1.0; } @@ -219,7 +267,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void union value *dest_val = case_data_rw_idx (outcase, 1 + col + var_get_dict_index (mformat->varname)); - dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row]; + dest_val->f = (matrices[n_splits - 1]) [col + mformat->n_continuous_vars * row]; if (col == row && mformat->diagonal == NO_DIAGONAL) dest_val->f = 1.0; } @@ -230,7 +278,10 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (prev_case) case_unref (prev_case); - free (temp_matrix); + int i; + for (i = 0 ; i < n_splits; ++i) + free (matrices[i]); + free (matrices); struct casereader *reader1 = casewriter_make_reader (writer); casereader_destroy (casereader0); return reader1; @@ -266,6 +317,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) mformat.varname = dict_create_var (dict, "VARNAME_", 8); mformat.n_continuous_vars = 0; + mformat.n_split_vars = 0; if (! lex_force_match_id (lexer, "VARIABLES")) goto error; @@ -364,22 +416,19 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) else if (lex_match_id (lexer, "SPLIT")) { lex_match (lexer, T_EQUALS); - struct variable **split_vars = NULL; - size_t n_split_vars; - if (! parse_variables (lexer, dict, &split_vars, &n_split_vars, 0)) + if (! parse_variables (lexer, dict, &mformat.split_vars, &mformat.n_split_vars, 0)) { - free (split_vars); + free (mformat.split_vars); goto error; } int i; - for (i = 0; i < n_split_vars; ++i) + for (i = 0; i < mformat.n_split_vars; ++i) { const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0); - var_set_both_formats (split_vars[i], &fmt); + var_set_both_formats (mformat.split_vars[i], &fmt); } - dict_reorder_vars (dict, split_vars, n_split_vars); - mformat.n_continuous_vars -= n_split_vars; - free (split_vars); + dict_reorder_vars (dict, mformat.split_vars, mformat.n_split_vars); + mformat.n_continuous_vars -= mformat.n_split_vars; } else { diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at index cba500ff10..3047b1f57f 100644 --- a/tests/language/data-io/matrix-data.at +++ b/tests/language/data-io/matrix-data.at @@ -9,7 +9,7 @@ matrix data /format = lower diagonal /file = 'matrix.dat' . - + list. ]) @@ -186,22 +186,27 @@ corr ,var04,7.0000,5.0000,4.0000,1.0000 AT_CLEANUP -AT_SETUP([Matrix data split]) + +AT_SETUP([Matrix data split data]) AT_DATA([matrix-data.pspp], [dnl matrix data - variables = s1 s2 rowtype_ var01 TO var04 - /split = s1 s2 - /format = full diagonal. + variables = s1 s2 rowtype_ var01 var02 var03 + /split=s1 s2. begin data -1 1 mean 34 35 36 37 -1 1 sd 22 11 55 66 -1 0 n 100 101 102 103 -1 0 corr 1 9 8 7 -0 1 corr 9 1 6 5 -0 1 corr 8 6 1 4 -0 0 corr 7 5 4 1 +8 0 mean 21.4 5.0 72.9 +8 0 sd 6.5 1.6 22.8 +8 0 n 106 106 106 +8 0 corr 1 +8 0 corr .41 1 +8 0 corr -.16 -.22 1 +8 1 mean 11.4 1.0 52.9 +8 1 sd 9.5 8.6 12.8 +8 1 n 10 11 12 +8 1 corr 1 +8 1 corr .51 1 +8 1 corr .36 -.41 1 end data. display dictionary. @@ -209,6 +214,7 @@ display dictionary. list. ]) + AT_CHECK([pspp -O format=csv matrix-data.pspp], [0], [dnl Variable,Description,Position s1,Format: F4.0,1 @@ -218,20 +224,21 @@ VARNAME_,Format: A8,4 var01,Format: F10.4,5 var02,Format: F10.4,6 var03,Format: F10.4,7 -var04,Format: F10.4,8 Table: Data List -s1,s2,ROWTYPE_,VARNAME_,var01,var02,var03,var04 -1,1,mean ,,34.0000,35.0000,36.0000,37.0000 -1,1,STDDEV ,,22.0000,11.0000,55.0000,66.0000 -1,0,n ,,100.0000,101.0000,102.0000,103.0000 -1,0,corr ,var01,1.0000,9.0000,8.0000,7.0000 -0,1,corr ,var02,9.0000,1.0000,6.0000,5.0000 -0,1,corr ,var03,8.0000,6.0000,1.0000,4.0000 -0,0,corr ,var04,7.0000,5.0000,4.0000,1.0000 +s1,s2,ROWTYPE_,VARNAME_,var01,var02,var03 +8,0,mean ,,21.4000,5.0000,72.9000 +8,0,STDDEV ,,6.5000,1.6000,22.8000 +8,0,n ,,106.0000,106.0000,106.0000 +8,0,corr ,var01,1.0000,.4100,-.1600 +8,0,corr ,var02,.4100,1.0000,-.2200 +8,0,corr ,var03,-.1600,-.2200,1.0000 +8,1,mean ,,11.4000,1.0000,52.9000 +8,1,STDDEV ,,9.5000,8.6000,12.8000 +8,1,n ,,10.0000,11.0000,12.0000 +8,1,corr ,var01,1.0000,.5100,.3600 +8,1,corr ,var02,.5100,1.0000,-.4100 +8,1,corr ,var03,.3600,-.4100,1.0000 ]) AT_CLEANUP - - - -- 2.30.2