struct hmap values;
};
+struct pivot_splits_dup
+ {
+ struct hmap_node hmap_node;
+ union value *values;
+ };
+
struct pivot_splits
{
struct pivot_splits_var *vars;
size_t n;
char *encoding;
+ struct hmap dups;
+
size_t dindexes[MAX_SPLITS];
int warnings_left;
.vars = psvars,
.n = n,
.encoding = xstrdup (dict_get_encoding (dict)),
+ .dups = HMAP_INITIALIZER (ps->dups),
.dindexes = { [0] = SIZE_MAX },
.warnings_left = 5,
};
"split values.", -ps->warnings_left),
-ps->warnings_left);
+ struct pivot_splits_dup *dup, *next_dup;
+ HMAP_FOR_EACH_SAFE (dup, next_dup, struct pivot_splits_dup, hmap_node,
+ &ps->dups)
+ {
+ for (size_t i = 0; i < ps->n; i++)
+ value_destroy (&dup->values[i], ps->vars[i].width);
+ free (dup->values);
+ free (dup);
+ }
+ hmap_destroy (&ps->dups);
+
for (size_t i = 0; i < ps->n; i++)
{
struct pivot_splits_var *psvar = &ps->vars[i];
hmap_destroy (&psvar->values);
}
free (ps->vars);
+
free (ps->encoding);
free (ps);
}
return NULL;
}
+static bool
+pivot_splits_find_dup (struct pivot_splits *ps, const struct ccase *example)
+{
+ unsigned int hash = 0;
+ for (size_t i = 0; i < ps->n; i++)
+ {
+ struct pivot_splits_var *psvar = &ps->vars[i];
+ const union value *value = case_data (example, psvar->var);
+ hash = value_hash (value, psvar->width, hash);
+ }
+ struct pivot_splits_dup *dup;
+ HMAP_FOR_EACH_WITH_HASH (dup, struct pivot_splits_dup, hmap_node, hash,
+ &ps->dups)
+ {
+ bool equal = true;
+ for (size_t i = 0; i < ps->n && equal; i++)
+ {
+ struct pivot_splits_var *psvar = &ps->vars[i];
+ const union value *value = case_data (example, psvar->var);
+ equal = value_equal (value, &dup->values[i], psvar->width);
+ }
+ if (equal)
+ return true;
+ }
+
+ union value *values = xmalloc (ps->n * sizeof *values);
+ for (size_t i = 0; i < ps->n; i++)
+ {
+ struct pivot_splits_var *psvar = &ps->vars[i];
+ const union value *value = case_data (example, psvar->var);
+ value_clone (&values[i], value, psvar->width);
+ }
+
+ dup = xmalloc (sizeof *dup);
+ dup->values = values;
+ hmap_insert (&ps->dups, &dup->hmap_node, hash);
+ return false;
+}
+
/* Begins adding data for a new split file group to the pivot table associated
with PS. EXAMPLE should be a case from the new split file group.
if (!ps)
return;
- size_t n_changed = 0;
for (size_t i = 0; i < ps->n; i++)
{
struct pivot_splits_var *psvar = &ps->vars[i];
pivot_value_new_var_value (psvar->var, value));
}
- if (ps->dindexes[i] != psval->leaf)
- {
- ps->dindexes[i] = psval->leaf;
- n_changed++;
- }
+ ps->dindexes[i] = psval->leaf;
}
- if (!n_changed)
+ if (pivot_splits_find_dup (ps, example))
{
if (ps->warnings_left-- > 0)
{