X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=609e5eb1fd5156365093ac9c70851ab2dbcfce5b;hb=b6d66ec3f328d0e8bf35b71f29332695121f7173;hp=fb214fd644a399c517652e854ad2c2ba31be804b;hpb=bc37c3f489947cf081a930a5d4e58dbd133eb563;p=pspp diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index fb214fd644..609e5eb1fd 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -34,6 +34,7 @@ #include "language/lexer/variable-parser.h" #include "libpspp/i18n.h" #include "libpspp/message.h" +#include "libpspp/misc.h" #include "gl/xsize.h" #include "gl/xalloc.h" @@ -65,6 +66,8 @@ enum triangle FULL }; +static const int ROWTYPE_WIDTH = 8; + struct matrix_format { enum triangle triangle; @@ -74,6 +77,7 @@ struct matrix_format int n_continuous_vars; struct variable **split_vars; size_t n_split_vars; + long n; }; /* @@ -94,19 +98,27 @@ valid rowtype_ values: PROX. */ -/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable - to the string STR. VAR must be of type string. +/* Sets the value of OUTCASE which corresponds to VNAME + to the value STR. VNAME must be of type string. */ static void -set_varname_column (struct ccase *outcase, const struct matrix_format *mformat, - const char *str, int len) +set_varname_column (struct ccase *outcase, const struct variable *vname, + const char *str) { - const struct variable *var = mformat->varname; - uint8_t *s = value_str_rw (case_data_rw (outcase, var), len); + int len = var_get_width (vname); + uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); strncpy ((char *) s, str, len); } +static void +blank_varname_column (struct ccase *outcase, const struct variable *vname) +{ + int len = var_get_width (vname); + uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); + + memset (s, ' ', len); +} static struct casereader * preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux) @@ -115,7 +127,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void const struct caseproto *proto = casereader_get_proto (casereader0); struct casewriter *writer; writer = autopaging_writer_create (proto); - + struct ccase *prev_case = NULL; double **matrices = NULL; size_t n_splits = 0; @@ -155,10 +167,17 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) c_offset++; const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, 8); - if (0 == strncasecmp (val, "corr ", 8) || - 0 == strncasecmp (val, "cov ", 8)) + const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { + if (row >= mformat->n_continuous_vars) + { + msg (SE, + _("There are %d variable declared but the data has at least %d matrix rows."), + mformat->n_continuous_vars, row + 1); + goto error; + } int col; for (col = c_offset; col < mformat->n_continuous_vars; ++col) { @@ -171,9 +190,12 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (e == SYSMIS) continue; - + /* Fill in the lower triangle */ (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e; - (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; + + if (mformat->triangle != FULL) + /* Fill in the upper triangle */ + (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; } row++; } @@ -184,7 +206,25 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void temporary matrix */ const int idx = var_get_dict_index (mformat->varname); row = 0; - struct ccase *prev_case = NULL; + + if (mformat->n >= 0) + { + int col; + struct ccase *outcase = case_create (proto); + union value *v = case_data_rw (outcase, mformat->rowtype); + uint8_t *n = value_str_rw (v, ROWTYPE_WIDTH); + strncpy ((char *) n, "N ", ROWTYPE_WIDTH); + blank_varname_column (outcase, mformat->varname); + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, + 1 + col + var_get_dict_index (mformat->varname)); + dest_val->f = mformat->n; + } + casewriter_write (writer, outcase); + } + prev_split_hash = 1; n_splits = 0; for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) @@ -204,19 +244,30 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void } prev_split_hash = split_hash; - case_unref (prev_case); + const union value *v = case_data (c, mformat->rowtype); + const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + if (mformat->n >= 0) + { + if (0 == strncasecmp (val, "n ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) + { + msg (SW, + _("The N subcommand was specified, but a N record was also found in the data. The N record will be ignored.")); + continue; + } + } + struct ccase *outcase = case_create (proto); case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); - const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, 8); - if (0 == strncasecmp (val, "corr ", 8) || - 0 == strncasecmp (val, "cov ", 8)) + + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { int col; const struct variable *var = dict_get_var (dict, idx + 1 + row); - set_varname_column (outcase, mformat, var_get_name (var), 8); - value_copy (case_data_rw (outcase, mformat->rowtype), v, 8); + set_varname_column (outcase, mformat->varname, var_get_name (var)); + value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH); for (col = 0; col < mformat->n_continuous_vars; ++col) { @@ -231,18 +282,18 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void } else { - set_varname_column (outcase, mformat, " ", 8); + blank_varname_column (outcase, mformat->varname); } /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */ - if (0 == strncasecmp (val, "sd ", 8)) + if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH)) { - value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8, + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, (uint8_t *) "STDDEV", 6, ' '); } - else if (0 == strncasecmp (val, "n_vector", 8)) + else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) { - value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8, + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, (uint8_t *) "N", 1, ' '); } @@ -258,9 +309,8 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (prev_case) case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto)); - const struct variable *var = dict_get_var (dict, idx + 1 + row); - set_varname_column (outcase, mformat, var_get_name (var), 8); + set_varname_column (outcase, mformat->varname, var_get_name (var)); for (col = 0; col < mformat->n_continuous_vars; ++col) { @@ -275,6 +325,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void casewriter_write (writer, outcase); } + if (prev_case) case_unref (prev_case); @@ -285,6 +336,17 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void struct casereader *reader1 = casewriter_make_reader (writer); casereader_destroy (casereader0); return reader1; + + +error: + if (prev_case) + case_unref (prev_case); + + for (i = 0 ; i < n_splits; ++i) + free (matrices[i]); + free (matrices); + casereader_destroy (casereader0); + return NULL; } int @@ -302,6 +364,9 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) mformat.triangle = LOWER; mformat.diagonal = DIAGONAL; + mformat.n_split_vars = 0; + mformat.split_vars = NULL; + mformat.n = -1; dict = (in_input_program () ? dataset_dict (ds) @@ -313,8 +378,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) data_parser_set_warn_missing_fields (parser, false); data_parser_set_span (parser, false); - mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8); - mformat.varname = dict_create_var (dict, "VARNAME_", 8); + mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH); mformat.n_continuous_vars = 0; mformat.n_split_vars = 0; @@ -324,7 +388,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) lex_match (lexer, T_EQUALS); - if (! parse_mixed_vars (lexer, dict, &names, &n_names, 0)) + if (! parse_mixed_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE)) { int i; for (i = 0; i < n_names; ++i) @@ -333,6 +397,15 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) goto error; } + int longest_name = 0; + for (i = 0; i < n_names; ++i) + { + maximize_int (&longest_name, strlen (names[i])); + } + + mformat.varname = dict_create_var (dict, "VARNAME_", + 8 * DIV_RND_UP (longest_name, 8)); + for (i = 0; i < n_names; ++i) { if (0 == strcasecmp (names[i], "ROWTYPE_")) @@ -364,7 +437,22 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) if (! lex_force_match (lexer, T_SLASH)) goto error; - if (lex_match_id (lexer, "FORMAT")) + if (lex_match_id (lexer, "N")) + { + lex_match (lexer, T_EQUALS); + + if (! lex_force_int (lexer)) + goto error; + + mformat.n = lex_integer (lexer); + if (mformat.n < 0) + { + msg (SE, _("%s must not be negative."), "N"); + goto error; + } + lex_get (lexer); + } + else if (lex_match_id (lexer, "FORMAT")) { lex_match (lexer, T_EQUALS); @@ -475,6 +563,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) fh_unref (fh); free (encoding); + free (mformat.split_vars); return CMD_DATA_LIST; @@ -484,6 +573,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) dict_destroy (dict); fh_unref (fh); free (encoding); + free (mformat.split_vars); return CMD_CASCADING_FAILURE; }