X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=7676bb1de0157d45c0c74ead6f2a18c387378cc6;hb=a49b940e58f148bf111c647d9b4822025636ff80;hp=752a0be3d26105c5930aa8e1b15b0b093ffcebd1;hpb=b4e3d932f4dfbdf3e51c81b78daabb40e23528b2;p=pspp diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index 752a0be3d2..7676bb1de0 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -77,6 +77,7 @@ struct matrix_format int n_continuous_vars; struct variable **split_vars; size_t n_split_vars; + long n; }; /* @@ -105,16 +106,16 @@ set_varname_column (struct ccase *outcase, const struct variable *vname, const char *str) { int len = var_get_width (vname); - uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); + uint8_t *s = case_str_rw (outcase, vname); - strncpy ((char *) s, str, len); + strncpy (CHAR_CAST (char *, s), str, len); } static void blank_varname_column (struct ccase *outcase, const struct variable *vname) { int len = var_get_width (vname); - uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len); + uint8_t *s = case_str_rw (outcase, vname); memset (s, ' ', len); } @@ -124,8 +125,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void { struct matrix_format *mformat = aux; const struct caseproto *proto = casereader_get_proto (casereader0); - struct casewriter *writer; - writer = autopaging_writer_create (proto); + struct casewriter *writer = autopaging_writer_create (proto); struct ccase *prev_case = NULL; double **matrices = NULL; size_t n_splits = 0; @@ -137,20 +137,30 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void /* Make an initial pass to populate our temporary matrix */ struct casereader *pass0 = casereader_clone (casereader0); struct ccase *c; - unsigned int prev_split_hash = 1; + union value *prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values); int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; + bool first_case = true; for (; (c = casereader_read (pass0)) != NULL; case_unref (c)) { int s; - unsigned int split_hash = 0; - for (s = 0; s < mformat->n_split_vars; ++s) + bool match = false; + if (!first_case) { - const struct variable *svar = mformat->split_vars[s]; - const union value *sv = case_data (c, svar); - split_hash = value_hash (sv, var_get_width (svar), split_hash); + match = true; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + if (! value_equal (prev_values + s, sv, var_get_width (svar))) + { + match = false; + break; + } + } } + first_case = false; - if (matrices == NULL || prev_split_hash != split_hash) + if (matrices == NULL || ! match) { row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0; @@ -160,13 +170,18 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void matrices[n_splits - 1] = xmalloc (sizeof_matrix); } - prev_split_hash = split_hash; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + value_clone (prev_values + s, sv, var_get_width (svar)); + } int c_offset = (mformat->triangle == UPPER) ? row : 0; if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL) c_offset++; const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + const char *val = CHAR_CAST (const char *, v->s); if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { @@ -175,6 +190,9 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void msg (SE, _("There are %d variable declared but the data has at least %d matrix rows."), mformat->n_continuous_vars, row + 1); + case_unref (c); + casereader_destroy (pass0); + free (prev_values); goto error; } int col; @@ -200,37 +218,82 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void } } casereader_destroy (pass0); + free (prev_values); /* Now make a second pass to fill in the other triangle from our temporary matrix */ const int idx = var_get_dict_index (mformat->varname); row = 0; - prev_split_hash = 1; + if (mformat->n >= 0) + { + int col; + struct ccase *outcase = case_create (proto); + union value *v = case_data_rw (outcase, mformat->rowtype); + memcpy (v->s, "N ", ROWTYPE_WIDTH); + blank_varname_column (outcase, mformat->varname); + for (col = 0; col < mformat->n_continuous_vars; ++col) + { + union value *dest_val = + case_data_rw_idx (outcase, + 1 + col + var_get_dict_index (mformat->varname)); + dest_val->f = mformat->n; + } + casewriter_write (writer, outcase); + } + n_splits = 0; + prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values); + first_case = true; for (; (c = casereader_read (casereader0)) != NULL; prev_case = c) { int s; - unsigned int split_hash = 0; - for (s = 0; s < mformat->n_split_vars; ++s) + bool match = false; + if (!first_case) { - const struct variable *svar = mformat->split_vars[s]; - const union value *sv = case_data (c, svar); - split_hash = value_hash (sv, var_get_width (svar), split_hash); + match = true; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + if (! value_equal (prev_values + s, sv, var_get_width (svar))) + { + match = false; + break; + } + } } - if (prev_split_hash != split_hash) + first_case = false; + if (! match) { n_splits++; row = 0; } - prev_split_hash = split_hash; + for (s = 0; s < mformat->n_split_vars; ++s) + { + const struct variable *svar = mformat->split_vars[s]; + const union value *sv = case_data (c, svar); + value_clone (prev_values + s, sv, var_get_width (svar)); + } case_unref (prev_case); + const union value *v = case_data (c, mformat->rowtype); + const char *val = CHAR_CAST (const char *, v->s); + if (mformat->n >= 0) + { + if (0 == strncasecmp (val, "n ", ROWTYPE_WIDTH) || + 0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH)) + { + msg (SW, + _("The N subcommand was specified, but a N record was also found in the data. The N record will be ignored.")); + continue; + } + } + struct ccase *outcase = case_create (proto); case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto)); - const union value *v = case_data (c, mformat->rowtype); - const char *val = (const char *) value_str (v, ROWTYPE_WIDTH); + if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) || 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH)) { @@ -294,7 +357,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void casewriter_write (writer, outcase); } - + free (prev_values); if (prev_case) case_unref (prev_case); @@ -316,6 +379,7 @@ error: free (matrices[i]); free (matrices); casereader_destroy (casereader0); + casewriter_destroy (writer); return NULL; } @@ -336,6 +400,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) mformat.diagonal = DIAGONAL; mformat.n_split_vars = 0; mformat.split_vars = NULL; + mformat.n = -1; dict = (in_input_program () ? dataset_dict (ds) @@ -406,7 +471,22 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) if (! lex_force_match (lexer, T_SLASH)) goto error; - if (lex_match_id (lexer, "FORMAT")) + if (lex_match_id (lexer, "N")) + { + lex_match (lexer, T_EQUALS); + + if (! lex_force_int (lexer)) + goto error; + + mformat.n = lex_integer (lexer); + if (mformat.n < 0) + { + msg (SE, _("%s must not be negative."), "N"); + goto error; + } + lex_get (lexer); + } + else if (lex_match_id (lexer, "FORMAT")) { lex_match (lexer, T_EQUALS); @@ -512,7 +592,8 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) } else { - data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat); + data_parser_make_active_file (parser, ds, reader, dict, preprocess, + &mformat); } fh_unref (fh); @@ -524,7 +605,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds) error: data_parser_destroy (parser); if (!in_input_program ()) - dict_destroy (dict); + dict_unref (dict); fh_unref (fh); free (encoding); free (mformat.split_vars); @@ -580,4 +661,3 @@ data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) return retval; } -