#include "language/lexer/variable-parser.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
+#include "libpspp/misc.h"
#include "gl/xsize.h"
#include "gl/xalloc.h"
FULL
};
+static const int ROWTYPE_WIDTH = 8;
+
struct matrix_format
{
enum triangle triangle;
const struct variable *rowtype;
const struct variable *varname;
int n_continuous_vars;
+ struct variable **split_vars;
+ size_t n_split_vars;
};
/*
PROX.
*/
-/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable
- to the string STR. VAR must be of type string.
+/* Sets the value of OUTCASE which corresponds to VNAME
+ to the value STR. VNAME must be of type string.
*/
static void
-set_varname_column (struct ccase *outcase, const struct matrix_format *mformat,
- const char *str, int len)
+set_varname_column (struct ccase *outcase, const struct variable *vname,
+ const char *str)
{
- const struct variable *var = mformat->varname;
- uint8_t *s = value_str_rw (case_data_rw (outcase, var), len);
+ int len = var_get_width (vname);
+ uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
strncpy ((char *) s, str, len);
}
+static void
+blank_varname_column (struct ccase *outcase, const struct variable *vname)
+{
+ int len = var_get_width (vname);
+ uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
+
+ memset (s, ' ', len);
+}
static struct casereader *
preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux)
struct casewriter *writer;
writer = autopaging_writer_create (proto);
- double *temp_matrix =
- xcalloc (sizeof (*temp_matrix),
- mformat->n_continuous_vars * mformat->n_continuous_vars);
+ double **matrices = NULL;
+ size_t n_splits = 0;
+
+ const size_t sizeof_matrix =
+ sizeof (double) * mformat->n_continuous_vars * mformat->n_continuous_vars;
+
/* Make an initial pass to populate our temporary matrix */
struct casereader *pass0 = casereader_clone (casereader0);
struct ccase *c;
+ unsigned int prev_split_hash = 1;
int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0;
for (; (c = casereader_read (pass0)) != NULL; case_unref (c))
{
+ int s;
+ unsigned int split_hash = 0;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ split_hash = value_hash (sv, var_get_width (svar), split_hash);
+ }
+
+ if (matrices == NULL || prev_split_hash != split_hash)
+ {
+ row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ?
+ 1 : 0;
+
+ n_splits++;
+ matrices = xrealloc (matrices, sizeof (double*) * n_splits);
+ matrices[n_splits - 1] = xmalloc (sizeof_matrix);
+ }
+
+ prev_split_hash = split_hash;
+
int c_offset = (mformat->triangle == UPPER) ? row : 0;
if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
c_offset++;
const union value *v = case_data (c, mformat->rowtype);
- const char *val = (const char *) value_str (v, 8);
- if (0 == strncasecmp (val, "corr ", 8) ||
- 0 == strncasecmp (val, "cov ", 8))
+ const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+ if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
+ 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
{
int col;
for (col = c_offset; col < mformat->n_continuous_vars; ++col)
{
const struct variable *var =
dict_get_var (dict,
- 1 + col - c_offset + var_get_dict_index (mformat->varname));
+ 1 + col - c_offset +
+ var_get_dict_index (mformat->varname));
double e = case_data (c, var)->f;
if (e == SYSMIS)
continue;
- temp_matrix [col + mformat->n_continuous_vars * row] = e;
- temp_matrix [row + mformat->n_continuous_vars * col] = e;
+
+
+ (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e;
+ (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
}
row++;
}
const int idx = var_get_dict_index (mformat->varname);
row = 0;
struct ccase *prev_case = NULL;
+ prev_split_hash = 1;
+ n_splits = 0;
for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
{
+ int s;
+ unsigned int split_hash = 0;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ split_hash = value_hash (sv, var_get_width (svar), split_hash);
+ }
+ if (prev_split_hash != split_hash)
+ {
+ n_splits++;
+ row = 0;
+ }
+
+ prev_split_hash = split_hash;
+
case_unref (prev_case);
struct ccase *outcase = case_create (proto);
case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
const union value *v = case_data (c, mformat->rowtype);
- const char *val = (const char *) value_str (v, 8);
- if (0 == strncasecmp (val, "corr ", 8) ||
- 0 == strncasecmp (val, "cov ", 8))
+ const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+ if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
+ 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
{
int col;
const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat, var_get_name (var), 8);
- value_copy (case_data_rw (outcase, mformat->rowtype), v, 8);
+ set_varname_column (outcase, mformat->varname, var_get_name (var));
+ value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH);
for (col = 0; col < mformat->n_continuous_vars; ++col)
{
union value *dest_val =
case_data_rw_idx (outcase,
1 + col + var_get_dict_index (mformat->varname));
- dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row];
+ dest_val->f = (matrices[n_splits - 1])[col + mformat->n_continuous_vars * row];
if (col == row && mformat->diagonal == NO_DIAGONAL)
dest_val->f = 1.0;
}
}
else
{
- set_varname_column (outcase, mformat, " ", 8);
+ blank_varname_column (outcase, mformat->varname);
+ }
+
+ /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */
+ if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH))
+ {
+ value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
+ (uint8_t *) "STDDEV", 6, ' ');
+ }
+ else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
+ {
+ value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
+ (uint8_t *) "N", 1, ' ');
}
casewriter_write (writer, outcase);
const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat, var_get_name (var), 8);
+ set_varname_column (outcase, mformat->varname, var_get_name (var));
for (col = 0; col < mformat->n_continuous_vars; ++col)
{
union value *dest_val =
case_data_rw_idx (outcase, 1 + col +
var_get_dict_index (mformat->varname));
- dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row];
+ dest_val->f = (matrices[n_splits - 1]) [col + mformat->n_continuous_vars * row];
if (col == row && mformat->diagonal == NO_DIAGONAL)
dest_val->f = 1.0;
}
if (prev_case)
case_unref (prev_case);
- free (temp_matrix);
+ int i;
+ for (i = 0 ; i < n_splits; ++i)
+ free (matrices[i]);
+ free (matrices);
struct casereader *reader1 = casewriter_make_reader (writer);
casereader_destroy (casereader0);
return reader1;
mformat.triangle = LOWER;
mformat.diagonal = DIAGONAL;
+ mformat.n_split_vars = 0;
+ mformat.split_vars = NULL;
dict = (in_input_program ()
? dataset_dict (ds)
data_parser_set_warn_missing_fields (parser, false);
data_parser_set_span (parser, false);
- mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8);
- mformat.varname = dict_create_var (dict, "VARNAME_", 8);
+ mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH);
mformat.n_continuous_vars = 0;
+ mformat.n_split_vars = 0;
if (! lex_force_match_id (lexer, "VARIABLES"))
goto error;
lex_match (lexer, T_EQUALS);
- if (! parse_mixed_vars (lexer, dict, &names, &n_names, 0))
+ if (! parse_mixed_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
{
int i;
for (i = 0; i < n_names; ++i)
goto error;
}
+ int longest_name = 0;
+ for (i = 0; i < n_names; ++i)
+ {
+ maximize_int (&longest_name, strlen (names[i]));
+ }
+
+ mformat.varname = dict_create_var (dict, "VARNAME_",
+ 8 * DIV_RND_UP (longest_name, 8));
+
for (i = 0; i < n_names; ++i)
{
if (0 == strcasecmp (names[i], "ROWTYPE_"))
else if (lex_match_id (lexer, "SPLIT"))
{
lex_match (lexer, T_EQUALS);
- struct variable **split_vars = NULL;
- size_t n_split_vars;
- if (! parse_variables (lexer, dict, &split_vars, &n_split_vars, 0))
+ if (! parse_variables (lexer, dict, &mformat.split_vars, &mformat.n_split_vars, 0))
{
- free (split_vars);
+ free (mformat.split_vars);
goto error;
}
int i;
- for (i = 0; i < n_split_vars; ++i)
+ for (i = 0; i < mformat.n_split_vars; ++i)
{
const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0);
- var_set_both_formats (split_vars[i], &fmt);
+ var_set_both_formats (mformat.split_vars[i], &fmt);
}
- dict_reorder_vars (dict, split_vars, n_split_vars);
- mformat.n_continuous_vars -= n_split_vars;
- free (split_vars);
+ dict_reorder_vars (dict, mformat.split_vars, mformat.n_split_vars);
+ mformat.n_continuous_vars -= mformat.n_split_vars;
}
else
{
fh_unref (fh);
free (encoding);
+ free (mformat.split_vars);
return CMD_DATA_LIST;
dict_destroy (dict);
fh_unref (fh);
free (encoding);
+ free (mformat.split_vars);
return CMD_CASCADING_FAILURE;
}