X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-reader.c;h=f1dbae184facb82ecd97826cdf465b03d0a252a7;hb=e333a444bba7a84682b26a68c0894a8ca4f353a4;hp=fa7dcf481927605e44881a45b1c10d54ed34ab0c;hpb=a9ea9305a20963994fd26debd866720c019ba0d5;p=pspp diff --git a/src/language/data-io/matrix-reader.c b/src/language/data-io/matrix-reader.c index fa7dcf4819..f1dbae184f 100644 --- a/src/language/data-io/matrix-reader.c +++ b/src/language/data-io/matrix-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2017 Free Software Foundation, Inc. + Copyright (C) 2017, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,10 +21,13 @@ #include #include +#include #include #include #include #include +#include +#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -84,9 +87,6 @@ struct matrix_reader gsl_matrix *n_vectors; gsl_matrix *mean_vectors; gsl_matrix *var_vectors; - - gsl_matrix *correlation; - gsl_matrix *covariance; }; struct matrix_reader * @@ -95,8 +95,8 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas { struct matrix_reader *mr = xzalloc (sizeof *mr); - mr->dict = dict; mr->varname = dict_lookup_var (dict, "varname_"); + mr->dict = dict; if (mr->varname == NULL) { msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_"); @@ -104,6 +104,14 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas return NULL; } + if (!var_is_alpha (mr->varname)) + { + msg (ME, _("Matrix dataset variable %s should be of string type."), + "VARNAME_"); + free (mr); + return NULL; + } + mr->rowtype = dict_lookup_var (dict, "rowtype_"); if (mr->rowtype == NULL) { @@ -112,6 +120,14 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas return NULL; } + if (!var_is_alpha (mr->rowtype)) + { + msg (ME, _("Matrix dataset variable %s should be of string type."), + "ROWTYPE_"); + free (mr); + return NULL; + } + size_t dvarcnt; const struct variable **dvars = NULL; dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH); @@ -122,7 +138,7 @@ create_matrix_reader_from_case_reader (const struct dictionary *dict, struct cas if (vars) { int i; - *vars = xcalloc (sizeof (struct variable **), *n_vars); + *vars = xcalloc (*n_vars, sizeof (struct variable **)); for (i = 0; i < *n_vars; ++i) { @@ -149,6 +165,30 @@ destroy_matrix_reader (struct matrix_reader *mr) } +/* + Allocates MATRIX if necessary, + and populates row MROW, from the data in C corresponding to + variables in VARS. N_VARS is the length of VARS. +*/ +static void +matrix_fill_row (gsl_matrix **matrix, + const struct ccase *c, int mrow, + const struct variable **vars, size_t n_vars) +{ + int col; + if (*matrix == NULL) + *matrix = gsl_matrix_alloc (n_vars, n_vars); + + for (col = 0; col < n_vars; ++col) + { + const struct variable *cv = vars [col]; + double x = case_data (c, cv)->f; + assert (col < (*matrix)->size2); + assert (mrow < (*matrix)->size1); + gsl_matrix_set (*matrix, mrow, col, x); + } +} + bool next_matrix_from_reader (struct matrix_material *mm, struct matrix_reader *mr, @@ -156,11 +196,11 @@ next_matrix_from_reader (struct matrix_material *mm, { struct casereader *group; + assert (vars); + gsl_matrix_free (mr->n_vectors); gsl_matrix_free (mr->mean_vectors); gsl_matrix_free (mr->var_vectors); - gsl_matrix_free (mr->correlation); - gsl_matrix_free (mr->covariance); if (!casegrouper_get_next_group (mr->grouper, &group)) return false; @@ -173,62 +213,72 @@ next_matrix_from_reader (struct matrix_material *mm, mm->mean_matrix = mr->mean_vectors; mm->var_matrix = mr->var_vectors; - mr->correlation = NULL; - mr->covariance = NULL; + struct substring *var_names = XCALLOC (n_vars, struct substring); + for (int i = 0; i < n_vars; ++i) + { + ss_alloc_substring (var_names + i, ss_cstr (var_get_name (vars[i]))); + } struct ccase *c; - int crow = 0; - for ( ; (c = casereader_read (group) ); case_unref (c)) + for (; (c = casereader_read (group)); case_unref (c)) { - const union value *uv = case_data (c, mr->rowtype); + const union value *uv = case_data (c, mr->rowtype); + const char *row_type = CHAR_CAST (const char *, uv->s); int col, row; for (col = 0; col < n_vars; ++col) { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); + const struct variable *cv = vars[col]; double x = case_data (c, cv)->f; - if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8)) + if (0 == strncasecmp (row_type, "N ", 8)) for (row = 0; row < n_vars; ++row) gsl_matrix_set (mr->n_vectors, row, col, x); - else if (0 == strncasecmp ((char *) value_str (uv, 8), "MEAN ", 8)) + else if (0 == strncasecmp (row_type, "MEAN ", 8)) for (row = 0; row < n_vars; ++row) gsl_matrix_set (mr->mean_vectors, row, col, x); - else if (0 == strncasecmp ((char *) value_str (uv, 8), "STDDEV ", 8)) + else if (0 == strncasecmp (row_type, "STDDEV ", 8)) for (row = 0; row < n_vars; ++row) gsl_matrix_set (mr->var_vectors, row, col, x * x); } - if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8)) + + const char *enc = dict_get_encoding (mr->dict); + + const union value *uvv = case_data (c, mr->varname); + int w = var_get_width (mr->varname); + + struct fmt_spec fmt = {FMT_A, 0, 0}; + fmt.w = w; + char *vname = data_out (uvv, enc, &fmt); + struct substring the_name = ss_cstr (vname); + + int mrow = -1; + for (int i = 0; i < n_vars; ++i) { - if (mr->correlation == NULL) - mr->correlation = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) + if (ss_equals (var_names[i], the_name)) { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->correlation, crow, col, x); + mrow = i; + break; } - crow++; } - else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8)) + free (vname); + + if (mrow == -1) + continue; + + if (0 == strncasecmp (row_type, "CORR ", 8)) + { + matrix_fill_row (&mm->corr, c, mrow, vars, n_vars); + } + else if (0 == strncasecmp (row_type, "COV ", 8)) { - if (mr->covariance == NULL) - mr->covariance = gsl_matrix_alloc (n_vars, n_vars); - for (col = 0; col < n_vars; ++col) - { - const struct variable *cv - = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col); - double x = case_data (c, cv)->f; - gsl_matrix_set (mr->covariance, crow, col, x); - } - crow++; + matrix_fill_row (&mm->cov, c, mrow, vars, n_vars); } } casereader_destroy (group); - mm->cov = mr->covariance; - mm->corr = mr->correlation; + for (int i = 0; i < n_vars; ++i) + ss_dealloc (var_names + i); + free (var_names); return true; }