#include "libpspp/pool.h"
#include "libpspp/str.h"
#include "libpspp/string-array.h"
+#include "libpspp/ll.h"
#include "gl/intprops.h"
#include "gl/minmax.h"
/* A dictionary. */
struct dictionary
{
+ int ref_cnt;
struct vardict_info *var; /* Variables. */
size_t var_cnt, var_cap; /* Number of variables, capacity. */
struct caseproto *proto; /* Prototype for dictionary cases
struct mrset **mrsets; /* Multiple response sets. */
size_t n_mrsets; /* Number of multiple response sets. */
+ /* Whether variable names must be valid identifiers. Normally, this is
+ true, but sometimes a dictionary is prepared for external use
+ (e.g. output to a CSV file) where names don't have to be valid. */
+ bool names_must_be_ids;
+
char *encoding; /* Character encoding of string data */
const struct dict_callbacks *callbacks; /* Callbacks on dictionary
dict_id_is_valid (const struct dictionary *dict, const char *id,
bool issue_error)
{
- return id_is_valid (id, dict->encoding, issue_error);
+ return (!dict->names_must_be_ids
+ || id_is_valid (id, dict->encoding, issue_error));
}
void
dict_dump (const struct dictionary *d)
{
int i;
- for (i = 0 ; i < d->var_cnt ; ++i )
+ for (i = 0 ; i < d->var_cnt ; ++i)
{
const struct variable *v = d->var[i].var;
printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
struct dictionary *d = xzalloc (sizeof *d);
d->encoding = xstrdup (encoding);
+ d->names_must_be_ids = true;
hmap_init (&d->name_map);
attrset_init (&d->attributes);
+ d->ref_cnt = 1;
return d;
}
size_t i;
d = dict_create (s->encoding);
+ dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s));
for (i = 0; i < s->var_cnt; i++)
{
d->split_cnt = s->split_cnt;
if (d->split_cnt > 0)
{
- d->split = xnmalloc (d->split_cnt, sizeof *d->split);
+ d->split = xnmalloc (d->split_cnt, sizeof *d->split);
for (i = 0; i < d->split_cnt; i++)
d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
}
{
/* FIXME? Should we really clear case_limit, label, documents?
Others are necessarily cleared by deleting all the variables.*/
- while (d->var_cnt > 0 )
+ while (d->var_cnt > 0)
{
dict_delete_var (d, d->var[d->var_cnt - 1].var);
}
}
/* Clears a dictionary and destroys it. */
+static void
+_dict_destroy (struct dictionary *d)
+{
+ /* In general, we don't want callbacks occurring, if the dictionary
+ is being destroyed */
+ d->callbacks = NULL ;
+
+ dict_clear (d);
+ string_array_destroy (&d->documents);
+ hmap_destroy (&d->name_map);
+ attrset_destroy (&d->attributes);
+ dict_clear_mrsets (d);
+ free (d->encoding);
+ free (d);
+}
+
+struct dictionary *
+dict_ref (struct dictionary *d)
+{
+ d->ref_cnt++;
+ return d;
+}
+
void
-dict_destroy (struct dictionary *d)
+dict_unref (struct dictionary *d)
{
- if (d != NULL)
- {
- /* In general, we don't want callbacks occurring, if the dictionary
- is being destroyed */
- d->callbacks = NULL ;
-
- dict_clear (d);
- string_array_destroy (&d->documents);
- hmap_destroy (&d->name_map);
- attrset_destroy (&d->attributes);
- dict_clear_mrsets (d);
- free (d->encoding);
- free (d);
- }
+ if (d == NULL)
+ return;
+ d->ref_cnt--;
+ assert (d->ref_cnt >= 0);
+ if (d->ref_cnt == 0)
+ _dict_destroy (d);
}
/* Returns the number of variables in D. */
vardict->case_index = case_index;
var_set_vardict (v, vardict);
- if ( d->changed ) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->var_added )
+ if (d->changed) d->changed (d, d->changed_data);
+ if (d->callbacks && d->callbacks->var_added)
d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
invalidate_proto (d);
hmap_insert_fast (&d->name_map, &vardict->name_node,
vardict->name_node.hash);
- if ( d->changed ) d->changed (d, d->changed_data);
+ if (d->changed) d->changed (d, d->changed_data);
if (old)
{
d->callbacks->var_changed (d, var_get_dict_index (var), VAR_TRAIT_POSITION, old, d->cb_data);
/* Free memory. */
var_clear_vardict (v);
- if ( d->changed ) d->changed (d, d->changed_data);
+ if (d->changed) d->changed (d, d->changed_data);
invalidate_proto (d);
- if (d->callbacks && d->callbacks->var_deleted )
+ if (d->callbacks && d->callbacks->var_deleted)
d->callbacks->var_deleted (d, v, dict_index, case_index, d->cb_data);
var_destroy (v);
/* Deletes the COUNT variables in D starting at index IDX. This
is unsafe; see the comment on dict_delete_var() for
- details. */
+ details. Deleting consecutive vars will result in less callbacks
+ compared to iterating over dict_delete_var.
+ A simple while loop over dict_delete_var will
+ produce (d->var_cnt - IDX) * COUNT variable changed callbacks
+ plus COUNT variable delete callbacks.
+ This here produces d->var_cnt - IDX variable changed callbacks
+ plus COUNT variable delete callbacks. */
void
dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
{
- /* FIXME: this can be done in O(count) time, but this algorithm
- is O(count**2). */
assert (idx + count <= d->var_cnt);
- while (count-- > 0)
- dict_delete_var (d, d->var[idx].var);
+ /* We need to store the variable and the corresponding case_index
+ for the delete callbacks later. We store them in a linked list.*/
+ struct delvar {
+ struct ll ll;
+ struct variable *var;
+ int case_index;
+ };
+ struct ll_list list = LL_INITIALIZER (list);
+
+ for (size_t i = idx; i < idx + count; i++)
+ {
+ struct delvar *dv = xmalloc (sizeof (struct delvar));
+ assert (dv);
+ struct variable *v = d->var[i].var;
+
+ dict_unset_split_var (d, v);
+ dict_unset_mrset_var (d, v);
+
+ if (d->weight == v)
+ dict_set_weight (d, NULL);
+
+ if (d->filter == v)
+ dict_set_filter (d, NULL);
+
+ dv->var = v;
+ dv->case_index = var_get_case_index (v);
+ ll_push_tail (&list, (struct ll *)dv);
+ }
+
+ dict_clear_vectors (d);
+
+ /* Remove variables from var array. */
+ unindex_vars (d, idx, d->var_cnt);
+ remove_range (d->var, d->var_cnt, sizeof *d->var, idx, count);
+ d->var_cnt -= count;
+
+ /* Reindexing will result variable-changed callback */
+ reindex_vars (d, idx, d->var_cnt);
+
+ invalidate_proto (d);
+ if (d->changed) d->changed (d, d->changed_data);
+
+ /* Now issue the variable delete callbacks and delete
+ the variables. The vardict is not valid at this point
+ anymore. That is the reason why we stored the
+ caseindex before reindexing. */
+ for (size_t vi = idx; vi < idx + count; vi++)
+ {
+ struct delvar *dv = (struct delvar *) ll_pop_head (&list);
+ var_clear_vardict (dv->var);
+ if (d->callbacks && d->callbacks->var_deleted)
+ d->callbacks->var_deleted (d, dv->var, vi, dv->case_index, d->cb_data);
+ var_destroy (dv->var);
+ free (dv);
+ }
}
/* Deletes scratch variables from dictionary D. */
/* FIXME: this can be done in O(count) time, but this algorithm
is O(count**2). */
- for (i = 0; i < d->var_cnt; )
+ for (i = 0; i < d->var_cnt;)
if (var_get_dict_class (d->var[i].var) == DC_SCRATCH)
dict_delete_var (d, d->var[i].var);
else
var_set_vardict (v, vardict);
}
-/* Changes the name of V in D to name NEW_NAME. Assert-fails if
- a variable named NEW_NAME is already in D, except that
- NEW_NAME may be the same as V's existing name. */
-void
-dict_rename_var (struct dictionary *d, struct variable *v,
- const char *new_name)
+/* Tries to changes the name of V in D to name NEW_NAME. Returns true if
+ successful, false if a variable (other than V) with the given name already
+ exists in D. */
+bool
+dict_try_rename_var (struct dictionary *d, struct variable *v,
+ const char *new_name)
{
- struct variable *old = var_clone (v);
- assert (!utf8_strcasecmp (var_get_name (v), new_name)
- || dict_lookup_var (d, new_name) == NULL);
+ struct variable *conflict = dict_lookup_var (d, new_name);
+ if (conflict && v != conflict)
+ return false;
+ struct variable *old = var_clone (v);
unindex_var (d, var_get_vardict (v));
rename_var (v, new_name);
reindex_var (d, var_get_vardict (v));
if (settings_get_algorithm () == ENHANCED)
var_clear_short_names (v);
- if ( d->changed ) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->var_changed )
+ if (d->changed) d->changed (d, d->changed_data);
+ if (d->callbacks && d->callbacks->var_changed)
d->callbacks->var_changed (d, var_get_dict_index (v), VAR_TRAIT_NAME, old, d->cb_data);
var_destroy (old);
+
+ return true;
+}
+
+/* Changes the name of V in D to name NEW_NAME. Assert-fails if
+ a variable named NEW_NAME is already in D, except that
+ NEW_NAME may be the same as V's existing name. */
+void
+dict_rename_var (struct dictionary *d, struct variable *v,
+ const char *new_name)
+{
+ bool ok UNUSED = dict_try_rename_var (d, v, new_name);
+ assert (ok);
}
/* Renames COUNT variables specified in VARS to the names given
return make_numeric_name (dict, num_start);
}
+/* Returns whether variable names must be valid identifiers. Normally, this is
+ true, but sometimes a dictionary is prepared for external use (e.g. output
+ to a CSV file) where names don't have to be valid. */
+bool
+dict_get_names_must_be_ids (const struct dictionary *d)
+{
+ return d->names_must_be_ids;
+}
+
+/* Sets whether variable names must be valid identifiers. Normally, this is
+ true, but sometimes a dictionary is prepared for external use (e.g. output
+ to a CSV file) where names don't have to be valid.
+
+ Changing this setting from false to true doesn't make the dictionary check
+ all the existing variable names, so it can cause an invariant violation. */
+void
+dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids)
+{
+ d->names_must_be_ids = names_must_be_ids;
+}
+
/* Returns the weighting variable in dictionary D, or a null
pointer if the dictionary is unweighted. */
struct variable *
}
}
+/* Returns the format to use for weights. */
+const struct fmt_spec *
+dict_get_weight_format (const struct dictionary *d)
+{
+ return d->weight ? var_get_print_format (d->weight) : &F_8_0;
+}
+
/* Sets the weighting variable of D to V, or turning off
weighting if V is a null pointer. */
void
d->weight = v;
if (d->changed) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->weight_changed )
+ if (d->callbacks && d->callbacks->weight_changed)
d->callbacks->weight_changed (d,
v ? var_get_dict_index (v) : -1,
d->cb_data);
d->filter = v;
if (d->changed) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->filter_changed )
+ if (d->callbacks && d->callbacks->filter_changed)
d->callbacks->filter_changed (d,
v ? var_get_dict_index (v) : -1,
d->cb_data);
assert (cnt == 0 || split != NULL);
d->split_cnt = cnt;
- if ( cnt > 0 )
+ if (cnt > 0)
{
d->split = xnrealloc (d->split, cnt, sizeof *d->split) ;
memcpy (d->split, split, cnt * sizeof *d->split);
}
if (d->changed) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->split_changed )
+ if (d->callbacks && d->callbacks->split_changed)
d->callbacks->split_changed (d, d->cb_data);
}
const char *s;
dict_clear_documents (d);
- for (s = new_docs; *s != '\0'; )
+ for (s = new_docs; *s != '\0';)
{
size_t len = strcspn (s, "\n");
char *line = xmemdup0 (s, len);
assert (dict_contains_var (dict, var));
- for (i = 0; i < dict->n_mrsets; )
+ for (i = 0; i < dict->n_mrsets;)
{
struct mrset *mrset = dict->mrsets[i];
size_t j;
- for (j = 0; j < mrset->n_vars; )
+ for (j = 0; j < mrset->n_vars;)
if (mrset->vars[j] == var)
remove_element (mrset->vars, mrset->n_vars--,
sizeof *mrset->vars, j);
void
dict_var_changed (const struct variable *v, unsigned int what, struct variable *oldvar)
{
- if ( var_has_vardict (v))
+ if (var_has_vardict (v))
{
const struct vardict_info *vardict = var_get_vardict (v);
struct dictionary *d = vardict->dict;
- if ( NULL == d)
+ if (NULL == d)
return;
- if (d->changed ) d->changed (d, d->changed_data);
- if ( d->callbacks && d->callbacks->var_changed )
+ if (d->changed) d->changed (d, d->changed_data);
+ if (d->callbacks && d->callbacks->var_changed)
d->callbacks->var_changed (d, var_get_dict_index (v), what, oldvar, d->cb_data);
}
var_destroy (oldvar);
valgrind --leak-check --show-reachable won't show internal_dict. */
if (dict_get_var_cnt (internal_dict) == 0)
{
- dict_destroy (internal_dict);
+ dict_unref (internal_dict);
internal_dict = NULL;
}
}