#include "language/lexer/variable-parser.h"
#include "libpspp/i18n.h"
#include "libpspp/message.h"
+#include "libpspp/misc.h"
#include "gl/xsize.h"
#include "gl/xalloc.h"
FULL
};
+static const int ROWTYPE_WIDTH = 8;
+
struct matrix_format
{
enum triangle triangle;
int n_continuous_vars;
struct variable **split_vars;
size_t n_split_vars;
+ long n;
};
/*
PROX.
*/
-/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable
- to the string STR. VAR must be of type string.
+/* Sets the value of OUTCASE which corresponds to VNAME
+ to the value STR. VNAME must be of type string.
*/
static void
-set_varname_column (struct ccase *outcase, const struct matrix_format *mformat,
- const char *str, int len)
+set_varname_column (struct ccase *outcase, const struct variable *vname,
+ const char *str)
{
- const struct variable *var = mformat->varname;
- uint8_t *s = value_str_rw (case_data_rw (outcase, var), len);
+ int len = var_get_width (vname);
+ uint8_t *s = case_str_rw (outcase, vname);
- strncpy ((char *) s, str, len);
+ strncpy (CHAR_CAST (char *, s), str, len);
}
+static void
+blank_varname_column (struct ccase *outcase, const struct variable *vname)
+{
+ int len = var_get_width (vname);
+ uint8_t *s = case_str_rw (outcase, vname);
+
+ memset (s, ' ', len);
+}
static struct casereader *
preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux)
{
struct matrix_format *mformat = aux;
const struct caseproto *proto = casereader_get_proto (casereader0);
- struct casewriter *writer;
- writer = autopaging_writer_create (proto);
-
+ struct casewriter *writer = autopaging_writer_create (proto);
+ struct ccase *prev_case = NULL;
double **matrices = NULL;
size_t n_splits = 0;
/* Make an initial pass to populate our temporary matrix */
struct casereader *pass0 = casereader_clone (casereader0);
struct ccase *c;
- unsigned int prev_split_hash = 1;
+ union value *prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values);
int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0;
+ bool first_case = true;
for (; (c = casereader_read (pass0)) != NULL; case_unref (c))
{
int s;
- unsigned int split_hash = 0;
- for (s = 0; s < mformat->n_split_vars; ++s)
+ bool match = false;
+ if (!first_case)
{
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- split_hash = value_hash (sv, var_get_width (svar), split_hash);
+ match = true;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ if (! value_equal (prev_values + s, sv, var_get_width (svar)))
+ {
+ match = false;
+ break;
+ }
+ }
}
+ first_case = false;
- if (matrices == NULL || prev_split_hash != split_hash)
+ if (matrices == NULL || ! match)
{
row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ?
1 : 0;
matrices[n_splits - 1] = xmalloc (sizeof_matrix);
}
- prev_split_hash = split_hash;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ value_clone (prev_values + s, sv, var_get_width (svar));
+ }
int c_offset = (mformat->triangle == UPPER) ? row : 0;
if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
c_offset++;
const union value *v = case_data (c, mformat->rowtype);
- const char *val = (const char *) value_str (v, 8);
- if (0 == strncasecmp (val, "corr ", 8) ||
- 0 == strncasecmp (val, "cov ", 8))
+ const char *val = CHAR_CAST (const char *, v->s);
+ if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
+ 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
{
+ if (row >= mformat->n_continuous_vars)
+ {
+ msg (SE,
+ _("There are %d variable declared but the data has at least %d matrix rows."),
+ mformat->n_continuous_vars, row + 1);
+ case_unref (c);
+ casereader_destroy (pass0);
+ free (prev_values);
+ goto error;
+ }
int col;
for (col = c_offset; col < mformat->n_continuous_vars; ++col)
{
if (e == SYSMIS)
continue;
-
+ /* Fill in the lower triangle */
(matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e;
- (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
+
+ if (mformat->triangle != FULL)
+ /* Fill in the upper triangle */
+ (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
}
row++;
}
}
casereader_destroy (pass0);
+ free (prev_values);
/* Now make a second pass to fill in the other triangle from our
temporary matrix */
const int idx = var_get_dict_index (mformat->varname);
row = 0;
- struct ccase *prev_case = NULL;
- prev_split_hash = 1;
+
+ if (mformat->n >= 0)
+ {
+ int col;
+ struct ccase *outcase = case_create (proto);
+ union value *v = case_data_rw (outcase, mformat->rowtype);
+ memcpy (v->s, "N ", ROWTYPE_WIDTH);
+ blank_varname_column (outcase, mformat->varname);
+ for (col = 0; col < mformat->n_continuous_vars; ++col)
+ {
+ union value *dest_val =
+ case_data_rw_idx (outcase,
+ 1 + col + var_get_dict_index (mformat->varname));
+ dest_val->f = mformat->n;
+ }
+ casewriter_write (writer, outcase);
+ }
+
n_splits = 0;
+ prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values);
+ first_case = true;
for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
{
int s;
- unsigned int split_hash = 0;
- for (s = 0; s < mformat->n_split_vars; ++s)
+ bool match = false;
+ if (!first_case)
{
- const struct variable *svar = mformat->split_vars[s];
- const union value *sv = case_data (c, svar);
- split_hash = value_hash (sv, var_get_width (svar), split_hash);
+ match = true;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ if (! value_equal (prev_values + s, sv, var_get_width (svar)))
+ {
+ match = false;
+ break;
+ }
+ }
}
- if (prev_split_hash != split_hash)
+ first_case = false;
+ if (! match)
{
n_splits++;
row = 0;
}
- prev_split_hash = split_hash;
+ for (s = 0; s < mformat->n_split_vars; ++s)
+ {
+ const struct variable *svar = mformat->split_vars[s];
+ const union value *sv = case_data (c, svar);
+ value_clone (prev_values + s, sv, var_get_width (svar));
+ }
case_unref (prev_case);
+ const union value *v = case_data (c, mformat->rowtype);
+ const char *val = CHAR_CAST (const char *, v->s);
+ if (mformat->n >= 0)
+ {
+ if (0 == strncasecmp (val, "n ", ROWTYPE_WIDTH) ||
+ 0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
+ {
+ msg (SW,
+ _("The N subcommand was specified, but a N record was also found in the data. The N record will be ignored."));
+ continue;
+ }
+ }
+
struct ccase *outcase = case_create (proto);
case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
- const union value *v = case_data (c, mformat->rowtype);
- const char *val = (const char *) value_str (v, 8);
- if (0 == strncasecmp (val, "corr ", 8) ||
- 0 == strncasecmp (val, "cov ", 8))
+
+ if (0 == strncasecmp (val, "corr ", ROWTYPE_WIDTH) ||
+ 0 == strncasecmp (val, "cov ", ROWTYPE_WIDTH))
{
int col;
const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat, var_get_name (var), 8);
- value_copy (case_data_rw (outcase, mformat->rowtype), v, 8);
+ set_varname_column (outcase, mformat->varname, var_get_name (var));
+ value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH);
for (col = 0; col < mformat->n_continuous_vars; ++col)
{
}
else
{
- set_varname_column (outcase, mformat, " ", 8);
+ blank_varname_column (outcase, mformat->varname);
}
/* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */
- if (0 == strncasecmp (val, "sd ", 8))
+ if (0 == strncasecmp (val, "sd ", ROWTYPE_WIDTH))
{
- value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8,
+ value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
(uint8_t *) "STDDEV", 6, ' ');
}
- else if (0 == strncasecmp (val, "n_vector", 8))
+ else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
{
- value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8,
+ value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
(uint8_t *) "N", 1, ' ');
}
if (prev_case)
case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto));
-
const struct variable *var = dict_get_var (dict, idx + 1 + row);
- set_varname_column (outcase, mformat, var_get_name (var), 8);
+ set_varname_column (outcase, mformat->varname, var_get_name (var));
for (col = 0; col < mformat->n_continuous_vars; ++col)
{
casewriter_write (writer, outcase);
}
+ free (prev_values);
if (prev_case)
case_unref (prev_case);
struct casereader *reader1 = casewriter_make_reader (writer);
casereader_destroy (casereader0);
return reader1;
+
+
+error:
+ if (prev_case)
+ case_unref (prev_case);
+
+ for (i = 0 ; i < n_splits; ++i)
+ free (matrices[i]);
+ free (matrices);
+ casereader_destroy (casereader0);
+ casewriter_destroy (writer);
+ return NULL;
}
int
mformat.diagonal = DIAGONAL;
mformat.n_split_vars = 0;
mformat.split_vars = NULL;
+ mformat.n = -1;
dict = (in_input_program ()
? dataset_dict (ds)
data_parser_set_warn_missing_fields (parser, false);
data_parser_set_span (parser, false);
- mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8);
- mformat.varname = dict_create_var (dict, "VARNAME_", 8);
+ mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH);
mformat.n_continuous_vars = 0;
mformat.n_split_vars = 0;
goto error;
}
+ int longest_name = 0;
+ for (i = 0; i < n_names; ++i)
+ {
+ maximize_int (&longest_name, strlen (names[i]));
+ }
+
+ mformat.varname = dict_create_var (dict, "VARNAME_",
+ 8 * DIV_RND_UP (longest_name, 8));
+
for (i = 0; i < n_names; ++i)
{
if (0 == strcasecmp (names[i], "ROWTYPE_"))
if (! lex_force_match (lexer, T_SLASH))
goto error;
- if (lex_match_id (lexer, "FORMAT"))
+ if (lex_match_id (lexer, "N"))
+ {
+ lex_match (lexer, T_EQUALS);
+
+ if (! lex_force_int (lexer))
+ goto error;
+
+ mformat.n = lex_integer (lexer);
+ if (mformat.n < 0)
+ {
+ msg (SE, _("%s must not be negative."), "N");
+ goto error;
+ }
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "FORMAT"))
{
lex_match (lexer, T_EQUALS);
}
else
{
- data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat);
+ data_parser_make_active_file (parser, ds, reader, dict, preprocess,
+ &mformat);
}
fh_unref (fh);
error:
data_parser_destroy (parser);
if (!in_input_program ())
- dict_destroy (dict);
+ dict_unref (dict);
fh_unref (fh);
free (encoding);
free (mformat.split_vars);
return retval;
}
-