/* PSPP - a program for statistical analysis.
- Copyright (C) 2017 Free Software Foundation, Inc.
+ Copyright (C) 2017, 2019 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdbool.h>
#include <libpspp/message.h>
+#include <libpspp/str.h>
#include <data/casegrouper.h>
#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/variable.h>
+#include <data/data-out.h>
+#include <data/format.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
m >= 0; n >= 0
-The variables ROWTYPE_ and VARNAME_ are of type A8,
-the variables s_x are of type F4.0 (although this reader accepts any type),
+The ROWTYPE_ variable is of type A8.
+The VARNAME_ variable is a string type whose width is not predetermined.
+The variables s_x are of type F4.0 (although this reader accepts any type),
and v_x are of any numeric type.
The values of the ROWTYPE_ variable are in the set {MEAN, STDDEV, N, CORR, COV}
gsl_matrix *n_vectors;
gsl_matrix *mean_vectors;
gsl_matrix *var_vectors;
-
- gsl_matrix *correlation;
- gsl_matrix *covariance;
};
struct matrix_reader *
{
struct matrix_reader *mr = xzalloc (sizeof *mr);
- mr->dict = dict;
mr->varname = dict_lookup_var (dict, "varname_");
+ mr->dict = dict;
if (mr->varname == NULL)
{
msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_");
return NULL;
}
+ if (!var_is_alpha (mr->varname))
+ {
+ msg (ME, _("Matrix dataset variable %s should be of string type."),
+ "VARNAME_");
+ free (mr);
+ return NULL;
+ }
+
mr->rowtype = dict_lookup_var (dict, "rowtype_");
if (mr->rowtype == NULL)
{
return NULL;
}
+ if (!var_is_alpha (mr->rowtype))
+ {
+ msg (ME, _("Matrix dataset variable %s should be of string type."),
+ "ROWTYPE_");
+ free (mr);
+ return NULL;
+ }
+
size_t dvarcnt;
const struct variable **dvars = NULL;
dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH);
}
+/*
+ Allocates MATRIX if necessary,
+ and populates row MROW, from the data in C corresponding to
+ variables in VARS. N_VARS is the length of VARS.
+*/
+static void
+matrix_fill_row (gsl_matrix **matrix,
+ const struct ccase *c, int mrow,
+ const struct variable **vars, size_t n_vars)
+{
+ int col;
+ if (*matrix == NULL)
+ *matrix = gsl_matrix_alloc (n_vars, n_vars);
+
+ for (col = 0; col < n_vars; ++col)
+ {
+ const struct variable *cv = vars [col];
+ double x = case_data (c, cv)->f;
+ assert (col < (*matrix)->size2);
+ assert (mrow < (*matrix)->size1);
+ gsl_matrix_set (*matrix, mrow, col, x);
+ }
+}
+
bool
next_matrix_from_reader (struct matrix_material *mm,
struct matrix_reader *mr,
{
struct casereader *group;
+ assert (vars);
+
gsl_matrix_free (mr->n_vectors);
gsl_matrix_free (mr->mean_vectors);
gsl_matrix_free (mr->var_vectors);
- gsl_matrix_free (mr->correlation);
- gsl_matrix_free (mr->covariance);
if (!casegrouper_get_next_group (mr->grouper, &group))
return false;
mm->mean_matrix = mr->mean_vectors;
mm->var_matrix = mr->var_vectors;
- mr->correlation = NULL;
- mr->covariance = NULL;
+ struct substring *var_names = xcalloc (n_vars, sizeof *var_names);
+ for (int i = 0; i < n_vars; ++i)
+ {
+ ss_alloc_substring (var_names + i, ss_cstr (var_get_name (vars[i])));
+ }
struct ccase *c;
- int crow = 0;
for ( ; (c = casereader_read (group) ); case_unref (c))
{
- const union value *uv = case_data (c, mr->rowtype);
+ const union value *uv = case_data (c, mr->rowtype);
+ const char *row_type = CHAR_CAST (const char *, uv->s);
int col, row;
for (col = 0; col < n_vars; ++col)
{
- const struct variable *cv
- = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
+ const struct variable *cv = vars[col];
double x = case_data (c, cv)->f;
- if (0 == strncasecmp ((char *)value_str (uv, 8), "N ", 8))
+ if (0 == strncasecmp (row_type, "N ", 8))
for (row = 0; row < n_vars; ++row)
gsl_matrix_set (mr->n_vectors, row, col, x);
- else if (0 == strncasecmp ((char *) value_str (uv, 8), "MEAN ", 8))
+ else if (0 == strncasecmp (row_type, "MEAN ", 8))
for (row = 0; row < n_vars; ++row)
gsl_matrix_set (mr->mean_vectors, row, col, x);
- else if (0 == strncasecmp ((char *) value_str (uv, 8), "STDDEV ", 8))
+ else if (0 == strncasecmp (row_type, "STDDEV ", 8))
for (row = 0; row < n_vars; ++row)
gsl_matrix_set (mr->var_vectors, row, col, x * x);
}
- if (0 == strncasecmp ((char *) value_str (uv, 8), "CORR ", 8))
+
+ const char *enc = dict_get_encoding (mr->dict);
+
+ const union value *uvv = case_data (c, mr->varname);
+ int w = var_get_width (mr->varname);
+
+ struct fmt_spec fmt = {FMT_A, 0, 0};
+ fmt.w = w;
+ char *vname = data_out (uvv, enc, &fmt);
+ struct substring the_name = ss_cstr (vname);
+
+ int mrow = -1;
+ for (int i = 0; i < n_vars; ++i)
{
- if (mr->correlation == NULL)
- mr->correlation = gsl_matrix_alloc (n_vars, n_vars);
- for (col = 0; col < n_vars; ++col)
+ if (ss_equals (var_names[i], the_name))
{
- const struct variable *cv
- = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
- double x = case_data (c, cv)->f;
- gsl_matrix_set (mr->correlation, crow, col, x);
+ mrow = i;
+ break;
}
- crow++;
}
- else if (0 == strncasecmp ((char *) value_str (uv, 8), "COV ", 8))
+ free (vname);
+
+ if (mrow == -1)
+ continue;
+
+ if (0 == strncasecmp (row_type, "CORR ", 8))
+ {
+ matrix_fill_row (&mm->corr, c, mrow, vars, n_vars);
+ }
+ else if (0 == strncasecmp (row_type, "COV ", 8))
{
- if (mr->covariance == NULL)
- mr->covariance = gsl_matrix_alloc (n_vars, n_vars);
- for (col = 0; col < n_vars; ++col)
- {
- const struct variable *cv
- = vars ? vars[col] : dict_get_var (mr->dict, var_get_dict_index (mr->varname) + 1 + col);
- double x = case_data (c, cv)->f;
- gsl_matrix_set (mr->covariance, crow, col, x);
- }
- crow++;
+ matrix_fill_row (&mm->cov, c, mrow, vars, n_vars);
}
}
casereader_destroy (group);
- mm->cov = mr->covariance;
- mm->corr = mr->correlation;
+ for (int i = 0; i < n_vars; ++i)
+ ss_dealloc (var_names + i);
+ free (var_names);
return true;
}