X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=50a34642be79cebb749cc07ff0cc7932618d3f50;hb=0fc606c52d7cec253af9b7463b15baabfbc9a33a;hp=db7c9d05bdd29941640027151b89daabc1965828;hpb=4de373154a9c151264dce1394d00abf95272a7a9;p=pspp diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index db7c9d05bd..50a34642be 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -34,6 +34,7 @@ #include "language/lexer/variable-parser.h" #include "libpspp/i18n.h" #include "libpspp/message.h" +#include "libpspp/misc.h" #include "gl/xsize.h" #include "gl/xalloc.h" @@ -65,6 +66,8 @@ enum triangle FULL }; +static const int ROWTYPE_WIDTH = 8; + struct matrix_format { enum triangle triangle; @@ -74,6 +77,7 @@ struct matrix_format int n_continuous_vars; struct variable **split_vars; size_t n_split_vars; + long n; }; /* @@ -94,28 +98,35 @@ valid rowtype_ values: PROX. */ -/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable - to the string STR. VAR must be of type string. +/* Sets the value of OUTCASE which corresponds to VNAME + to the value STR. VNAME must be of type string. */ static void -set_varname_column (struct ccase *outcase, const struct matrix_format *mformat, - const char *str, int len) +set_varname_column (struct ccase *outcase, const struct variable *vname, + const char *str) { - const struct variable *var = mformat->varname; - uint8_t *s = value_str_rw (case_data_rw (outcase, var), len); + int len = var_get_width (vname); + uint8_t *s = case_str_rw (outcase, vname); - strncpy ((char *) s, str, len); + strncpy (CHAR_CAST (char *, s), str, len); } +static void +blank_varname_column (struct ccase *outcase, const struct variable *vname) +{ + int len = var_get_width (vname); + uint8_t *s = case_str_rw (outcase, vname); + + memset (s, ' ', len); +} static struct casereader * preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux) { struct matrix_format *mformat = aux; const struct caseproto *proto = casereader_get_proto (casereader0); - struct casewriter *writer; - writer = autopaging_writer_create (proto); - + struct casewriter *writer = autopaging_writer_create (proto); + struct ccase *prev_case = NULL; double **matrices = NULL; size_t n_splits = 0; @@ -126,20 +137,30 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void /* Make an initial pass to populate our temporary matrix */ struct casereader *pass0 = casereader_clone (casereader0); struct ccase *c; - unsigned int prev_split_hash = 1; + union value *prev_values = XCALLOC (mformat->n_split_vars, union value); int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; + bool first_case = true; for (; (c = casereader_read (pass0)) != NULL; case_unref (c)) { int s; - unsigned int split_hash = 0; - for (s = 0; s < mformat->n_split_vars; ++s) + bool match = false; + if (!first_case) { - const struct variable *svar = mformat->split_vars[s]; - const union value *sv = case_data (c, svar); - split_hash = value_hash (sv, var_get_width (svar), split_hash); + match = true; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + if (! value_equal (prev_values + s, sv, var_get_width (svar))) + { + match = false; + break; + } + } } + first_case = false; - if (matrices == NULL || prev_split_hash != split_hash) + if (matrices == NULL || ! match) { row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; @@ -149,16 +170,31 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void matrices[n_splits - 1] = xmalloc (sizeof_matrix); } - prev_split_hash = split_hash; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + value_clone (prev_values + s, sv, var_get_width (svar)); + } int c_offset = (mformat->triangle == UPPER) ? row : 0; if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) c_offset++; const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, 8); - if (0 == strncasecmp (val, "corr ", 8) || - 0 == strncasecmp (val, "cov ", 8)) + const char *val = CHAR_CAST (const char *, v->s); + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { + if (row >= mformat->n_continuous_vars) + { + msg (SE, + _("There are %d variable declared but the data has at least %d matrix rows."), + mformat->n_continuous_vars, row + 1); + case_unref (c); + casereader_destroy (pass0); + free (prev_values); + goto error; + } int col; for (col = c_offset; col < mformat->n_continuous_vars; ++col) { @@ -171,52 +207,103 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (e == SYSMIS) continue; - + /* Fill in the lower triangle */ (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e; - (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; + + if (mformat->triangle != FULL) + /* Fill in the upper triangle */ + (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e; } row++; } } casereader_destroy (pass0); + free (prev_values); + + if (!matrices) + goto error; /* Now make a second pass to fill in the other triangle from our temporary matrix */ const int idx = var_get_dict_index (mformat->varname); row = 0; - struct ccase *prev_case = NULL; - prev_split_hash = 1; + + if (mformat->n >= 0) + { + int col; + struct ccase *outcase = case_create (proto); + union value *v = case_data_rw (outcase, mformat->rowtype); + memcpy (v->s, "N ", ROWTYPE_WIDTH); + blank_varname_column (outcase, mformat->varname); + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, + 1 + col + var_get_dict_index (mformat->varname)); + dest_val->f = mformat->n; + } + casewriter_write (writer, outcase); + } + n_splits = 0; + prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values); + first_case = true; for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) { int s; - unsigned int split_hash = 0; - for (s = 0; s < mformat->n_split_vars; ++s) + bool match = false; + if (!first_case) { - const struct variable *svar = mformat->split_vars[s]; - const union value *sv = case_data (c, svar); - split_hash = value_hash (sv, var_get_width (svar), split_hash); + match = true; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + if (! value_equal (prev_values + s, sv, var_get_width (svar))) + { + match = false; + break; + } + } } - if (prev_split_hash != split_hash) + first_case = false; + if (! match) { n_splits++; row = 0; } - prev_split_hash = split_hash; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + value_clone (prev_values + s, sv, var_get_width (svar)); + } case_unref (prev_case); + const union value *v = case_data (c, mformat->rowtype); + const char *val = CHAR_CAST (const char *, v->s); + if (mformat->n >= 0) + { + if (0 == strncasecmp (val, "n ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) + { + msg (SW, + _("The N subcommand was specified, but a N record was also found in the data. The N record will be ignored.")); + continue; + } + } + struct ccase *outcase = case_create (proto); case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); - const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, 8); - if (0 == strncasecmp (val, "corr ", 8) || - 0 == strncasecmp (val, "cov ", 8)) + + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { int col; const struct variable *var = dict_get_var (dict, idx + 1 + row); - set_varname_column (outcase, mformat, var_get_name (var), 8); - value_copy (case_data_rw (outcase, mformat->rowtype), v, 8); + set_varname_column (outcase, mformat->varname, var_get_name (var)); + value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH); for (col = 0; col < mformat->n_continuous_vars; ++col) { @@ -231,18 +318,18 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void } else { - set_varname_column (outcase, mformat, " ", 8); + blank_varname_column (outcase, mformat->varname); } /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */ - if (0 == strncasecmp (val, "sd ", 8)) + if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH)) { - value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8, + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, (uint8_t *) "STDDEV", 6, ' '); } - else if (0 == strncasecmp (val, "n_vector", 8)) + else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) { - value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8, + value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH, (uint8_t *) "N", 1, ' '); } @@ -258,9 +345,8 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void if (prev_case) case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto)); - const struct variable *var = dict_get_var (dict, idx + 1 + row); - set_varname_column (outcase, mformat, var_get_name (var), 8); + set_varname_column (outcase, mformat->varname, var_get_name (var)); for (col = 0; col < mformat->n_continuous_vars; ++col) { @@ -274,6 +360,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void casewriter_write (writer, outcase); } + free (prev_values); if (prev_case) case_unref (prev_case); @@ -285,6 +372,19 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void struct casereader *reader1 = casewriter_make_reader (writer); casereader_destroy (casereader0); return reader1; + + +error: + if (prev_case) + case_unref (prev_case); + + if (matrices) + for (i = 0 ; i < n_splits; ++i) + free (matrices[i]); + free (matrices); + casereader_destroy (casereader0); + casewriter_destroy (writer); + return NULL; } int @@ -304,6 +404,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) mformat.diagonal = DIAGONAL; mformat.n_split_vars = 0; mformat.split_vars = NULL; + mformat.n = -1; dict = (in_input_program () ? dataset_dict (ds) @@ -315,8 +416,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) data_parser_set_warn_missing_fields (parser, false); data_parser_set_span (parser, false); - mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8); - mformat.varname = dict_create_var (dict, "VARNAME_", 8); + mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH); mformat.n_continuous_vars = 0; mformat.n_split_vars = 0; @@ -335,6 +435,15 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) goto error; } + int longest_name = 0; + for (i = 0; i < n_names; ++i) + { + maximize_int (&longest_name, strlen (names[i])); + } + + mformat.varname = dict_create_var (dict, "VARNAME_", + 8 * DIV_RND_UP (longest_name, 8)); + for (i = 0; i < n_names; ++i) { if (0 == strcasecmp (names[i], "ROWTYPE_")) @@ -366,7 +475,22 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) if (! lex_force_match (lexer, T_SLASH)) goto error; - if (lex_match_id (lexer, "FORMAT")) + if (lex_match_id (lexer, "N")) + { + lex_match (lexer, T_EQUALS); + + if (! lex_force_int (lexer)) + goto error; + + mformat.n = lex_integer (lexer); + if (mformat.n < 0) + { + msg (SE, _("%s must not be negative."), "N"); + goto error; + } + lex_get (lexer); + } + else if (lex_match_id (lexer, "FORMAT")) { lex_match (lexer, T_EQUALS); @@ -472,7 +596,8 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) } else { - data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat); + data_parser_make_active_file (parser, ds, reader, dict, preprocess, + &mformat); } fh_unref (fh); @@ -484,7 +609,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) error: data_parser_destroy (parser); if (!in_input_program ()) - dict_destroy (dict); + dict_unref (dict); fh_unref (fh); free (encoding); free (mformat.split_vars); @@ -540,4 +665,3 @@ data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) return retval; } -