gsl_matrix_free (mm->var_matrix);
}
\f
-struct matrix_reader
+static const struct variable *
+find_matrix_string_var (const struct dictionary *dict, const char *name)
{
- const struct dictionary *dict;
- const struct variable *varname;
- const struct variable *rowtype;
- struct casegrouper *grouper;
-};
-
-struct matrix_reader *
-create_matrix_reader_from_case_reader (const struct dictionary *dict, struct casereader *in_reader,
- const struct variable ***vars, size_t *n_vars)
-{
- struct matrix_reader *mr = xzalloc (sizeof *mr);
-
- mr->varname = dict_lookup_var (dict, "varname_");
- mr->dict = dict;
- if (mr->varname == NULL)
+ const struct variable *var = dict_lookup_var (dict, name);
+ if (var == NULL)
{
- msg (ME, _("Matrix dataset lacks a variable called %s."), "VARNAME_");
- free (mr);
+ msg (ME, _("Matrix dataset lacks a variable called %s."), name);
return NULL;
}
-
- if (!var_is_alpha (mr->varname))
+ if (!var_is_alpha (var))
{
- msg (ME, _("Matrix dataset variable %s should be of string type."),
- "VARNAME_");
- free (mr);
+ msg (ME, _("Matrix dataset variable %s should be of string type."), name);
return NULL;
}
+ return var;
+}
- mr->rowtype = dict_lookup_var (dict, "rowtype_");
- if (mr->rowtype == NULL)
- {
- msg (ME, _("Matrix dataset lacks a variable called %s."), "ROWTYPE_");
- free (mr);
- return NULL;
- }
+struct matrix_reader *
+matrix_reader_create (const struct dictionary *dict,
+ struct casereader *in_reader)
+{
+ const struct variable *varname = find_matrix_string_var (dict, "VARNAME_");
+ const struct variable *rowtype = find_matrix_string_var (dict, "ROWTYPE_");
+ if (!varname || !rowtype)
+ return NULL;
- if (!var_is_alpha (mr->rowtype))
+ for (size_t i = 0; i < dict_get_var_cnt (dict); i++)
{
- msg (ME, _("Matrix dataset variable %s should be of string type."),
- "ROWTYPE_");
- free (mr);
- return NULL;
+ const struct variable *v = dict_get_var (dict, i);
+ if (!var_is_numeric (v) && v != rowtype && v != varname)
+ {
+ msg (ME, _("Matrix dataset variable %s should be numeric."),
+ var_get_name (v));
+ return NULL;
+ }
}
size_t dvarcnt;
const struct variable **dvars = NULL;
dict_get_vars (dict, &dvars, &dvarcnt, DC_SCRATCH);
- if (n_vars)
- *n_vars = dvarcnt - var_get_dict_index (mr->varname) - 1;
-
- if (vars)
+ /* Continuous variables and split variables. */
+ const struct variable **cvars = dvars + var_get_dict_index (varname) + 1;
+ size_t n_cvars = dvarcnt - var_get_dict_index (varname) - 1;
+ const struct variable **svars = dvars;
+ size_t n_svars = var_get_dict_index (rowtype);
+ if (!n_cvars)
{
- int i;
- *vars = xcalloc (*n_vars, sizeof (struct variable **));
-
- for (i = 0; i < *n_vars; ++i)
- {
- (*vars)[i] = dvars[i + var_get_dict_index (mr->varname) + 1];
- }
+ msg (ME, _("Matrix dataset does not have any continuous variables."));
+ free (dvars);
+ return NULL;
}
- /* All the variables before ROWTYPE_ (if any) are split variables */
- mr->grouper = casegrouper_create_vars (in_reader, dvars, var_get_dict_index (mr->rowtype));
-
+ struct matrix_reader *mr = xmalloc (sizeof *mr);
+ *mr = (struct matrix_reader) {
+ .n_cvars = n_cvars,
+ .cvars = xmemdup (cvars, n_cvars * sizeof *cvars),
+ .rowtype = rowtype,
+ .varname = varname,
+ .dict = dict,
+ .grouper = casegrouper_create_vars (in_reader, svars, n_svars)
+ };
free (dvars);
return mr;
}
bool
-destroy_matrix_reader (struct matrix_reader *mr)
+matrix_reader_destroy (struct matrix_reader *mr)
{
if (mr == NULL)
return false;
return -1;
}
-bool
-next_matrix_from_reader (struct matrix_material *mm,
- struct matrix_reader *mr,
- const struct variable **vars, int n_vars)
+struct substring
+matrix_reader_get_string (const struct ccase *c, const struct variable *var)
{
- struct casereader *group;
+ struct substring s = case_ss (c, var);
+ ss_rtrim (&s, ss_cstr (CC_SPACES));
+ return s;
+}
- assert (vars);
+void
+matrix_reader_set_string (struct ccase *c, const struct variable *var,
+ struct substring src)
+{
+ struct substring dst = case_ss (c, var);
+ for (size_t i = 0; i < dst.length; i++)
+ dst.string[i] = i < src.length ? src.string[i] : ' ';
+}
+bool
+matrix_reader_next (struct matrix_material *mm, struct matrix_reader *mr,
+ struct casereader **groupp)
+{
+ struct casereader *group;
if (!casegrouper_get_next_group (mr->grouper, &group))
{
*mm = (struct matrix_material) MATRIX_MATERIAL_INIT;
+ if (groupp)
+ *groupp = NULL;
return false;
}
+ if (groupp)
+ *groupp = casereader_clone (group);
+
+ const struct variable **vars = mr->cvars;
+ size_t n_vars = mr->n_cvars;
+
*mm = (struct matrix_material) {
.n = gsl_matrix_calloc (n_vars, n_vars),
.mean_matrix = gsl_matrix_calloc (n_vars, n_vars),
struct ccase *c;
for (; (c = casereader_read (group)); case_unref (c))
{
- struct substring rowtype = case_ss (c, mr->rowtype);
- ss_rtrim (&rowtype, ss_cstr (CC_SPACES));
+ struct substring rowtype = matrix_reader_get_string (c, mr->rowtype);
gsl_matrix *v
= (ss_equals_case (rowtype, ss_cstr ("N")) ? mm->n
for (size_t i = 0; i < N_MATRICES; i++)
if (matrices[i].good_rows && matrices[i].good_rows != n_vars)
- msg (SW, _("%s matrix has %d columns but %zu rows named variables "
+ msg (SW, _("%s matrix has %zu columns but %zu rows named variables "
"to be analyzed (and %zu rows named unknown variables)."),
matrices[i].name, n_vars, matrices[i].good_rows,
matrices[i].bad_rows);
int
cmd_debug_matrix_read (struct lexer *lexer UNUSED, struct dataset *ds)
{
- const struct variable **vars;
- size_t n_vars;
- struct matrix_reader *mr = create_matrix_reader_from_case_reader (
- dataset_dict (ds), proc_open (ds), &vars, &n_vars);
+ struct matrix_reader *mr = matrix_reader_create (dataset_dict (ds),
+ proc_open (ds));
if (!mr)
return CMD_FAILURE;
if (!i)
pivot_category_create_leaf_rc (d->root, pivot_value_new_text ("Value"),
PIVOT_RC_CORRELATION);
- for (size_t j = 0; j < n_vars; j++)
+ for (size_t j = 0; j < mr->n_cvars; j++)
pivot_category_create_leaf_rc (
- d->root, pivot_value_new_variable (vars[j]), PIVOT_RC_CORRELATION);
+ d->root, pivot_value_new_variable (mr->cvars[j]),
+ PIVOT_RC_CORRELATION);
}
struct pivot_dimension *stat = pivot_dimension_create (pt, PIVOT_AXIS_ROW,
int split_num = 0;
struct matrix_material mm = MATRIX_MATERIAL_INIT;
- while (next_matrix_from_reader (&mm, mr, vars, n_vars))
+ while (matrix_reader_next (&mm, mr, NULL))
{
pivot_category_create_leaf (split->root,
pivot_value_new_integer (split_num + 1));
{
if (i == MM_COV || i == MM_CORR)
{
- for (size_t y = 0; y < n_vars; y++)
- for (size_t x = 0; x < n_vars; x++)
+ for (size_t y = 0; y < mr->n_cvars; y++)
+ for (size_t x = 0; x < mr->n_cvars; x++)
pivot_table_put4 (
pt, y + 1, x, i, split_num,
pivot_value_new_number (gsl_matrix_get (m[i], y, x)));
}
else
- for (size_t x = 0; x < n_vars; x++)
+ for (size_t x = 0; x < mr->n_cvars; x++)
{
double n = gsl_matrix_get (m[i], 0, x);
if (i == MM_STDDEV)
proc_commit (ds);
- destroy_matrix_reader (mr);
- free (vars);
+ matrix_reader_destroy (mr);
return CMD_SUCCESS;
}
#include <config.h>
+#include <math.h>
+
+#include "data/any-reader.h"
+#include "data/any-writer.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
#include "data/dataset.h"
+#include "data/dictionary.h"
#include "language/data-io/file-handle.h"
+#include "language/data-io/matrix-reader.h"
#include "language/lexer/lexer.h"
#include "language/command.h"
int
cmd_mconvert (struct lexer *lexer, struct dataset *ds)
{
- bool append = false;
+ bool append UNUSED = false;
struct file_handle *in = NULL;
struct file_handle *out = NULL;
while (lex_token (lexer) != T_ENDCMD)
goto error;
}
- /* XXX */
+ assert (in);
+ assert (out);
+
+ struct dictionary *d;
+ struct casereader *cr = any_reader_open_and_decode (in, NULL, &d, NULL);
+ if (!cr)
+ goto error;
+
+ struct matrix_reader *mr = matrix_reader_create (d, cr);
+ if (!mr)
+ {
+ casereader_destroy (cr);
+ dict_unref (d);
+ goto error;
+ }
+
+ struct casewriter *cw = any_writer_open (out, d);
+ if (!cw)
+ {
+ matrix_reader_destroy (mr);
+ casereader_destroy (cr);
+ dict_unref (d);
+ goto error;
+ }
+
+ for (;;)
+ {
+ struct matrix_material mm;
+ struct casereader *group;
+ if (!matrix_reader_next (&mm, mr, &group))
+ break;
+
+ struct ccase *model = NULL;
+ for (;;)
+ {
+ struct ccase *c = casereader_read (group);
+ if (!c)
+ break;
+
+ if (!model)
+ {
+ struct substring rowtype
+ = matrix_reader_get_string (c, mr->rowtype);
+ if (ss_equals_case (rowtype, ss_cstr ("COV"))
+ || ss_equals_case (rowtype, ss_cstr ("CORR")))
+ model = case_ref (c);
+ }
+ casewriter_write (cw, c);
+ }
+
+ if (!model)
+ continue;
+
+ if (mm.cov && !mm.corr)
+ {
+ assert (mm.cov->size1 == mr->n_cvars);
+ assert (mm.cov->size2 == mr->n_cvars);
+
+ for (size_t y = 0; y < mr->n_cvars; y++)
+ {
+ struct ccase *c = case_clone (model);
+ for (size_t x = 0; x < mr->n_cvars; x++)
+ {
+ double d1 = gsl_matrix_get (mm.cov, x, x);
+ double d2 = gsl_matrix_get (mm.cov, y, y);
+ double cov = gsl_matrix_get (mm.cov, y, x);
+ *case_num_rw (c, mr->cvars[x]) = cov / sqrt (d1 * d2);
+ }
+ matrix_reader_set_string (c, mr->rowtype, ss_cstr ("CORR"));
+ matrix_reader_set_string (c, mr->varname,
+ ss_cstr (var_get_name (mr->cvars[y])));
+ casewriter_write (cw, c);
+ }
+
+ struct ccase *c = case_clone (model);
+ for (size_t x = 0; x < mr->n_cvars; x++)
+ {
+ double variance = gsl_matrix_get (mm.cov, x, x);
+ *case_num_rw (c, mr->cvars[x]) = sqrt (variance);
+ }
+ matrix_reader_set_string (c, mr->rowtype, ss_cstr ("STDDEV"));
+ matrix_reader_set_string (c, mr->varname, ss_empty ());
+ casewriter_write (cw, c);
+ }
+
+ if (mm.corr && !mm.cov)
+ {
+ assert (mm.corr->size1 == mr->n_cvars);
+ assert (mm.corr->size2 == mr->n_cvars);
+
+ for (size_t y = 0; y < mr->n_cvars; y++)
+ {
+ struct ccase *c = case_clone (model);
+ for (size_t x = 0; x < mr->n_cvars; x++)
+ {
+ double d1 = gsl_matrix_get (mm.var_matrix, x, x);
+ double d2 = gsl_matrix_get (mm.var_matrix, y, y);
+ double corr = gsl_matrix_get (mm.corr, y, x);
+ *case_num_rw (c, mr->cvars[x]) = corr * sqrt (d1 * d2);
+ }
+ casewriter_write (cw, c);
+ }
+ }
+
+ case_unref (model);
+ }
+
+ matrix_reader_destroy (mr);
+ casewriter_destroy (cw);
+ fh_unref (in);
+ fh_unref (out);
+ dict_unref (d);
+ return CMD_SUCCESS;
error:
fh_unref (in);
fh_unref (out);
+ dict_unref (d);
return CMD_FAILURE;
}