From: Ben Pfaff Date: Fri, 12 Nov 2021 03:27:50 +0000 (-0800) Subject: Basic MGET tests work. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a2ba6d0e0858f848e7719aa2acee8f44c9276f08;p=pspp Basic MGET tests work. --- diff --git a/doc/matrices.texi b/doc/matrices.texi index 74e17c15ad..f5af6ec4ac 100644 --- a/doc/matrices.texi +++ b/doc/matrices.texi @@ -2476,13 +2476,15 @@ the string columns' variable names on @code{STRINGS}. @end display The @code{MGET} command reads the data from a matrix file -(@pxref{Matrix Files}) into matrix variables. Specify the name or +(@pxref{Matrix Files}) into matrix variables. + +All of @code{MGET}'s subcommands are optional. Specify the name or handle of the matrix file to be read on the @code{FILE} subcommand; if it is omitted, then the command reads the active file. By default, @code{MGET} reads all of the data from the matrix file. -Specify a space-delimited list of matrix types @code{TYPE} to limit the -kinds of data to the one specified: +Specify a space-delimited list of matrix types on @code{TYPE} to limit +the kinds of data to the one specified: @table @code @item COV diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c index 32a06914d9..17b66de22e 100644 --- a/src/language/stats/matrix.c +++ b/src/language/stats/matrix.c @@ -3486,6 +3486,7 @@ struct matrix_cmd { struct matrix_state *state; struct file_handle *file; + char *encoding; struct stringi_set rowtypes; } mget; @@ -4319,6 +4320,7 @@ matrix_cmd_execute_display (struct display_command *cmd) for (size_t j = 0; j < sizeof values / sizeof *values; j++) pivot_table_put2 (table, j, i, pivot_value_new_integer (values[j])); } + free (vars); pivot_table_submit (table); } @@ -5598,9 +5600,8 @@ error: } static void -matrix_cmd_execute_get__ (struct get_command *get, - const struct dictionary *dict, - struct casereader *reader) +matrix_cmd_execute_get__ (struct get_command *get, struct casereader *reader, + const struct dictionary *dict) { struct variable **vars; size_t n_vars = 0; @@ -5713,35 +5714,52 @@ matrix_cmd_execute_get__ (struct get_command *get, free (vars); } -static void -matrix_cmd_execute_get (struct get_command *get) +static bool +matrix_open_casereader (const char *command_name, + struct file_handle *file, const char *encoding, + struct dataset *dataset, + struct casereader **readerp, struct dictionary **dictp) { - struct dictionary *dict; - struct casereader *reader; - if (get->file) + if (file) { - reader = any_reader_open_and_decode (get->file, get->encoding, - &dict, NULL); - if (!reader) - return; + *readerp = any_reader_open_and_decode (file, encoding, dictp, NULL); + return *readerp != NULL; } else { - if (dict_get_var_cnt (dataset_dict (get->dataset)) == 0) + if (dict_get_var_cnt (dataset_dict (dataset)) == 0) { - msg (ME, _("GET cannot read empty active file.")); - return; + msg (ME, _("The %s command cannot read empty active file."), + command_name); + return false; } - reader = proc_open (get->dataset); - dict = dict_ref (dataset_dict (get->dataset)); + *readerp = proc_open (dataset); + *dictp = dict_ref (dataset_dict (dataset)); + return true; } +} - matrix_cmd_execute_get__ (get, dict, reader); - +static void +matrix_close_casereader (struct file_handle *file, struct dataset *dataset, + struct casereader *reader, struct dictionary *dict) +{ dict_unref (dict); casereader_destroy (reader); - if (!get->file) - proc_commit (get->dataset); + if (!file) + proc_commit (dataset); +} + +static void +matrix_cmd_execute_get (struct get_command *get) +{ + struct casereader *r; + struct dictionary *d; + if (matrix_open_casereader ("GET", get->file, get->encoding, get->dataset, + &r, &d)) + { + matrix_cmd_execute_get__ (get, r, d); + matrix_close_casereader (get->file, get->dataset, r, d); + } } static const char * @@ -6149,11 +6167,17 @@ static struct matrix_cmd * matrix_parse_mget (struct matrix_state *s) { struct matrix_cmd *cmd = xmalloc (sizeof *cmd); - *cmd = (struct matrix_cmd) { .type = MCMD_MGET, .mget = { .state = s } }; + *cmd = (struct matrix_cmd) { + .type = MCMD_MGET, + .mget = { + .state = s, + .rowtypes = STRINGI_SET_INITIALIZER (cmd->mget.rowtypes), + }, + }; struct mget_command *mget = &cmd->mget; - for (;;) + while (lex_token (s->lexer) != T_ENDCMD) { if (lex_match_id (s->lexer, "FILE")) { @@ -6164,6 +6188,17 @@ matrix_parse_mget (struct matrix_state *s) if (!mget->file) goto error; } + else if (lex_match_id (s->lexer, "ENCODING")) + { + lex_match (s->lexer, T_EQUALS); + if (!lex_force_string (s->lexer)) + goto error; + + free (mget->encoding); + mget->encoding = ss_xstrdup (lex_tokss (s->lexer)); + + lex_get (s->lexer); + } else if (lex_match_id (s->lexer, "TYPE")) { lex_match (s->lexer, T_EQUALS); @@ -6182,11 +6217,7 @@ matrix_parse_mget (struct matrix_state *s) lex_error_expecting (s->lexer, "FILE", "TYPE"); goto error; } - if (lex_token (s->lexer) == T_ENDCMD) - break; - - if (!lex_force_match (s->lexer, T_SLASH)) - goto error; + lex_match (s->lexer, T_SLASH); } return cmd; @@ -6227,7 +6258,9 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, const struct dictionary *d, const struct variable *rowtype_var, struct matrix_state *s, size_t si, size_t fi, - size_t cs, size_t cn) + size_t cs, size_t cn, + struct pivot_table *pt, + struct pivot_dimension *var_dimension) { if (!n_rows) return; @@ -6274,6 +6307,12 @@ matrix_mget_commit_var (struct ccase **rows, size_t n_rows, } } + int var_index = pivot_category_create_leaf ( + var_dimension->root, pivot_value_new_user_text (ds_cstr (&name), SIZE_MAX)); + double values[] = { n_rows, cn }; + for (size_t j = 0; j < sizeof values / sizeof *values; j++) + pivot_table_put2 (pt, j, var_index, pivot_value_new_integer (values[j])); + if (n_missing) msg (SE, ngettext ("Matrix data file variable %s contains a missing " "value, which was treated as zero.", @@ -6292,8 +6331,10 @@ static bool var_changed (const struct ccase *ca, const struct ccase *cb, const struct variable *var) { - return !value_equal (case_data (ca, var), case_data (cb, var), - var_get_width (var)); + return (ca && cb + ? !value_equal (case_data (ca, var), case_data (cb, var), + var_get_width (var)) + : ca || cb); } static bool @@ -6311,28 +6352,25 @@ vars_changed (const struct ccase *ca, const struct ccase *cb, } static void -matrix_cmd_execute_mget (struct mget_command *mget) +matrix_cmd_execute_mget__ (struct mget_command *mget, + struct casereader *r, const struct dictionary *d, + struct pivot_table *pt, + struct pivot_dimension *var_dimension) { - struct dictionary *d; - struct casereader *r = any_reader_open_and_decode (mget->file, "UTF-8", - &d, NULL); - if (!r) - return; - const struct variable *rowtype_ = get_a8_var (d, "ROWTYPE_"); const struct variable *varname_ = get_a8_var (d, "VARNAME_"); if (!rowtype_ || !varname_) - goto exit; + return; if (var_get_dict_index (rowtype_) >= var_get_dict_index (varname_)) { msg (SE, _("ROWTYPE_ must precede VARNAME_ in matrix data file.")); - goto exit; + return; } if (var_get_dict_index (varname_) + 1 >= dict_get_var_cnt (d)) { msg (SE, _("Matrix data file contains no continuous variables.")); - goto exit; + return; } for (size_t i = 0; i < dict_get_var_cnt (d); i++) @@ -6343,7 +6381,7 @@ matrix_cmd_execute_mget (struct mget_command *mget) msg (SE, _("Matrix data file contains unexpected string variable %s."), var_get_name (v)); - goto exit; + return; } } @@ -6372,25 +6410,24 @@ matrix_cmd_execute_mget (struct mget_command *mget) struct ccase *c; while ((c = casereader_read (r)) != NULL) { - bool sd = vars_changed (sc, c, d, ss, sn); - bool fd = sd || vars_changed (fc, c, d, fs, fn); - bool md = fd || !cc || var_changed (cc, c, rowtype_) || var_changed (cc, c, varname_); - if (sd) - { - si++; - case_unref (sc); - sc = case_ref (c); - } - if (fd) + enum { - fi++; - case_unref (fc); - fc = case_ref (c); + SPLITS_CHANGED, + FACTORS_CHANGED, + ROWTYPE_CHANGED, + NOTHING_CHANGED } - if (md) + change + = (sn && (!sc || vars_changed (sc, c, d, ss, sn)) ? SPLITS_CHANGED + : fn && (!fc || vars_changed (fc, c, d, fs, fn)) ? FACTORS_CHANGED + : !cc || var_changed (cc, c, rowtype_) ? ROWTYPE_CHANGED + : NOTHING_CHANGED); + + if (change != NOTHING_CHANGED) { matrix_mget_commit_var (rows, n_rows, d, rowtype_, - mget->state, si, fi, cs, cn); + mget->state, si, fi, cs, cn, + pt, var_dimension); n_rows = 0; case_unref (cc); cc = case_ref (c); @@ -6399,13 +6436,60 @@ matrix_cmd_execute_mget (struct mget_command *mget) if (n_rows >= allocated_rows) rows = x2nrealloc (rows, &allocated_rows, sizeof *rows); rows[n_rows++] = c; + + if (change == SPLITS_CHANGED) + { + si++; + case_unref (sc); + sc = case_ref (c); + + /* Reset the factor number, if there are factors. */ + if (fn) + { + fi = 1; + case_unref (fc); + fc = case_ref (c); + } + } + else if (change == FACTORS_CHANGED) + { + fi++; + case_unref (fc); + fc = case_ref (c); + } } matrix_mget_commit_var (rows, n_rows, d, rowtype_, - mget->state, si, fi, cs, cn); + mget->state, si, fi, cs, cn, + pt, var_dimension); free (rows); -exit: - casereader_destroy (r); + case_unref (sc); + case_unref (fc); + case_unref (cc); +} + +static void +matrix_cmd_execute_mget (struct mget_command *mget) +{ + struct casereader *r; + struct dictionary *d; + if (matrix_open_casereader ("MGET", mget->file, mget->encoding, + mget->state->dataset, &r, &d)) + { + struct pivot_table *pt = pivot_table_create ( + N_("Matrix Variables Created by MGET")); + pivot_dimension_create (pt, PIVOT_AXIS_COLUMN, N_("Dimension"), + N_("Rows"), N_("Columns")); + struct pivot_dimension *var_dimension = pivot_dimension_create ( + pt, PIVOT_AXIS_ROW, N_("Variable")); + matrix_cmd_execute_mget__ (mget, r, d, pt, var_dimension); + if (var_dimension->n_leaves) + pivot_table_submit (pt); + else + pivot_table_unref (pt); + + matrix_close_casereader (mget->file, mget->state->dataset, r, d); + } } static bool diff --git a/tests/language/stats/matrix.at b/tests/language/stats/matrix.at index 0db26d6eb4..abfdde3e4f 100644 --- a/tests/language/stats/matrix.at +++ b/tests/language/stats/matrix.at @@ -3213,4 +3213,97 @@ error: The SAVE command STRINGS subcommand specifies an unknown variable a. error: The SAVE command STRINGS subcommand specifies 2 unknown variables, including a. ]) +AT_CLEANUP + +AT_SETUP([MATRIX - MGET]) +AT_DATA([matrix.sps], [dnl +MATRIX DATA + VARIABLES=ROWTYPE_ var01 TO var08. +BEGIN DATA. +MEAN 24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7 +SD 5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5 +N 92 92 92 92 92 92 92 92 +CORR 1.00 +CORR .18 1.00 +CORR -.22 -.17 1.00 +CORR .36 .31 -.14 1.00 +CORR .27 .16 -.12 .22 1.00 +CORR .33 .15 -.17 .24 .21 1.00 +CORR .50 .29 -.20 .32 .12 .38 1.00 +CORR .17 .29 -.05 .20 .27 .20 .04 1.00 +END DATA. + +MATRIX. +MGET. +DISPLAY. +PRINT MN/FORMAT=F5.1. +PRINT SD/FORMAT=F5.1. +PRINT NC/FORMAT=F5.0. +PRINT CR/FORMAT=F5.2. +END MATRIX. +]) +AT_CHECK([pspp matrix.sps -O format=csv], [0], [dnl +Table: Matrix Variables +,Rows,Columns,Size (kB) +CR,8,8,0 +MN,1,8,0 +NC,1,8,0 +SD,1,8,0 + +MN +24.3 5.4 69.7 20.1 13.4 2.7 27.9 3.7 + +SD +5.7 1.5 23.5 5.8 2.8 4.5 5.4 1.5 + +NC +92 92 92 92 92 92 92 92 + +CR +1.00 .18 -.22 .36 .27 .33 .50 .17 +.18 1.00 -.17 .31 .16 .15 .29 .29 +-.22 -.17 1.00 -.14 -.12 -.17 -.20 -.05 +.36 .31 -.14 1.00 .22 .24 .32 .20 +.27 .16 -.12 .22 1.00 .21 .12 .27 +.33 .15 -.17 .24 .21 1.00 .38 .20 +.50 .29 -.20 .32 .12 .38 1.00 .04 +.17 .29 -.05 .20 .27 .20 .04 1.00 +]) +AT_CLEANUP + +AT_SETUP([MATRIX - MGET with split variables]) +AT_DATA([matrix.sps], [dnl +matrix data + variables = s1 s2 rowtype_ var01 var02 var03 + /split=s1 s2. + +begin data +8 0 mean 21.4 5.0 72.9 +8 0 sd 6.5 1.6 22.8 +8 0 n 106 106 106 +8 0 corr 1 +8 0 corr .41 1 +8 0 corr -.16 -.22 1 +8 1 mean 11.4 1.0 52.9 +8 1 sd 9.5 8.6 12.8 +8 1 n 10 11 12 +8 1 corr 1 +8 1 corr .51 1 +8 1 corr .36 -.41 1 +end data. + +MATRIX. +MGET. +PRINT MNS1/FORMAT=F5.1. +PRINT SDS1/FORMAT=F5.1. +PRINT NCS1/FORMAT=F5.0. +PRINT CRS1/FORMAT=F5.2. +PRINT MNS2/FORMAT=F5.1. +PRINT SDS2/FORMAT=F5.1. +PRINT NCS2/FORMAT=F5.0. +PRINT CRS2/FORMAT=F5.2. +END MATRIX. +]) +AT_CHECK([pspp -O format=csv matrix.sps], [dnl +]) AT_CLEANUP \ No newline at end of file