X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-reader.c;h=c0cee0474310b5b1ad3e9a47c792d6e316baad34;hb=35717813005e999b6b807fc3f4bd6bb2d770f301;hp=fa7dcf481927605e44881a45b1c10d54ed34ab0c;hpb=a9ea9305a20963994fd26debd866720c019ba0d5;p=pspp diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c index fa7dcf4819..c0cee04743 100644 --- a/src/language/data-io/matrix-reader.c +++ b/src/language/data-io/matrix-reader.c @@ -20,6 +20,7 @@ #include +#include #include #include #include @@ -76,7 +77,6 @@ s_0 ROWTYPE_ VARNAME_ v_0 v_1 v_2 struct matrix_reader { - const struct dictionary *dict; const struct variable *varname; const struct variable *rowtype; struct casegrouper *grouper; @@ -84,9 +84,6 @@ struct matrix_reader gsl_matrix *n_vectors; gsl_matrix *mean_vectors; gsl_matrix *var_vectors; - - gsl_matrix *correlation; - gsl_matrix *covariance; }; struct matrix_reader * @@ -95,7 +92,6 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas { struct matrix_reader *mr = xzalloc (sizeof *mr); - mr->dict = dict; mr->varname = dict_lookup_var (dict, "varname_"); if (mr->varname == NULL) { @@ -104,6 +100,14 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas return NULL; } + if (!var_is_alpha (mr->varname)) + { + msg (ME, _("Matrix dataset variable %s should be of string type."), + "VARNAME_"); + free (mr); + return NULL; + } + mr->rowtype = dict_lookup_var (dict, "rowtype_"); if (mr->rowtype == NULL) { @@ -112,6 +116,14 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas return NULL; } + if (!var_is_alpha (mr->rowtype)) + { + msg (ME, _("Matrix dataset variable %s should be of string type."), + "ROWTYPE_"); + free (mr); + return NULL; + } + size_t dvarcnt; const struct variable **dvars = NULL; dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH); @@ -149,6 +161,30 @@ destroy_matrix_reader (struct matrix_reader *mr) } +/* + Allocates MATRIX if necessary, + and populates row MROW, from the data in C corresponding to + variables in VARS. N_VARS is the length of VARS. +*/ +static void +matrix_fill_row (gsl_matrix **matrix, + const struct ccase *c, int mrow, + const struct variable **vars, size_t n_vars) +{ + int col; + if (*matrix == NULL) + *matrix = gsl_matrix_alloc (n_vars, n_vars); + + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv = vars [col]; + double x = case_data (c, cv)->f; + assert (col < (*matrix)->size2); + assert (mrow < (*matrix)->size1); + gsl_matrix_set (*matrix, mrow, col, x); + } +} + bool next_matrix_from_reader (struct matrix_material *mm, struct matrix_reader *mr, @@ -156,11 +192,11 @@ next_matrix_from_reader (struct matrix_material *mm, { struct casereader *group; + assert (vars); + gsl_matrix_free (mr->n_vectors); gsl_matrix_free (mr->mean_vectors); gsl_matrix_free (mr->var_vectors); - gsl_matrix_free (mr->correlation); - gsl_matrix_free (mr->covariance); if (!casegrouper_get_next_group (mr->grouper, &group)) return false; @@ -173,19 +209,28 @@ next_matrix_from_reader (struct matrix_material *mm, mm->mean_matrix = mr->mean_vectors; mm->var_matrix = mr->var_vectors; - mr->correlation = NULL; - mr->covariance = NULL; + // FIXME: Make this into a hash table. + unsigned long *table = xmalloc (sizeof (*table) * n_vars); + int i; + for (i = 0; i < n_vars; ++i) + { + const int w = var_get_width (mr->varname); + char s[w]; + memset (s, 0, w); + const char *name = var_get_name (vars[i]); + strncpy (s, name, w); + unsigned long h = hash_bytes (s, w, 0); + table[i] = h; + } struct ccase *c; - int crow = 0; for ( ; (c = casereader_read (group) ); case_unref (c)) { const union value *uv = case_data (c, mr->rowtype); int col, row; for (col = 0; col < n_vars; ++col) { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); + const struct variable *cv = vars[col]; double x = case_data (c, cv)->f; if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8)) for (row = 0; row < n_vars; ++row) @@ -197,38 +242,38 @@ next_matrix_from_reader (struct matrix_material *mm, for (row = 0; row < n_vars; ++row) gsl_matrix_set (mr->var_vectors, row, col, x * x); } - if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + + const union value *uvv = case_data (c, mr->varname); + const uint8_t *vs = value_str (uvv, var_get_width (mr->varname)); + int w = var_get_width (mr->varname); + unsigned long h = hash_bytes (vs, w, 0); + + int mrow = -1; + for (i = 0; i < n_vars; ++i) { - if (mr->correlation == NULL) - mr->correlation = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) + if (table[i] == h) { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->correlation, crow, col, x); + mrow = i; + break; } - crow++; + } + + if (mrow == -1) + continue; + + if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + { + matrix_fill_row (&mm->corr, c, mrow, vars, n_vars); } else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8)) { - if (mr->covariance == NULL) - mr->covariance = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) - { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->covariance, crow, col, x); - } - crow++; + matrix_fill_row (&mm->cov, c, mrow, vars, n_vars); } } casereader_destroy (group); - mm->cov = mr->covariance; - mm->corr = mr->correlation; + free (table); return true; }