You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
-#include "error.h"
+#include "dictionary.h"
#include <stdlib.h>
+#include <ctype.h>
#include "algorithm.h"
#include "alloc.h"
#include "case.h"
+#include "cat.h"
+#include "error.h"
#include "hash.h"
#include "misc.h"
+#include "settings.h"
#include "str.h"
#include "value-labels.h"
#include "var.h"
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
/* A dictionary. */
struct dictionary
{
d->var = NULL;
d->var_cnt = d->var_cap = 0;
- d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
+ d->name_tab = hsh_create (8, compare_var_names, hash_var_name, NULL, NULL);
d->next_value_idx = 0;
d->split = NULL;
d->split_cnt = 0;
{
struct dictionary *d;
size_t i;
-
+
assert (s != NULL);
-
+
d = dict_create ();
- for (i = 0; i < s->var_cnt; i++)
- dict_clone_var (d, s->var[i], s->var[i]->name);
+
+ for (i = 0; i < s->var_cnt; i++)
+ {
+ struct variable *sv = s->var[i];
+ struct variable *dv = dict_clone_var_assert (d, sv, sv->name);
+ var_set_short_name (dv, sv->short_name);
+ }
+
d->next_value_idx = s->next_value_idx;
d->split_cnt = s->split_cnt;
if (d->split_cnt > 0)
{
- d->split = xmalloc (d->split_cnt * sizeof *d->split);
+ d->split = xnmalloc (d->split_cnt, sizeof *d->split);
for (i = 0; i < d->split_cnt; i++)
d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
}
dict_set_label (d, dict_get_label (s));
dict_set_documents (d, dict_get_documents (s));
+ d->vector_cnt = s->vector_cnt;
+ d->vector = xnmalloc (d->vector_cnt, sizeof *d->vector);
for (i = 0; i < s->vector_cnt; i++)
- dict_create_vector (d, s->vector[i]->name,
- s->vector[i]->var, s->vector[i]->cnt);
+ {
+ struct vector *sv = s->vector[i];
+ struct vector *dv = d->vector[i] = xmalloc (sizeof *dv);
+ int j;
+
+ dv->idx = i;
+ strcpy (dv->name, sv->name);
+ dv->cnt = sv->cnt;
+ dv->var = xnmalloc (dv->cnt, sizeof *dv->var);
+ for (j = 0; j < dv->cnt; j++)
+ dv->var[j] = d->var[sv->var[j]->index];
+ }
return d;
}
for (i = 0; i < d->var_cnt; i++)
{
struct variable *v = d->var[i];
+ var_clear_aux (v);
val_labs_destroy (v->val_labs);
free (v->label);
free (v);
dict_clear_vectors (d);
}
+/* Destroys the aux data for every variable in D, by calling
+ var_clear_aux() for each variable. */
+void
+dict_clear_aux (struct dictionary *d)
+{
+ int i;
+
+ assert (d != NULL);
+
+ for (i = 0; i < d->var_cnt; i++)
+ var_clear_aux (d->var[i]);
+}
+
/* Clears a dictionary and destroys it. */
void
dict_destroy (struct dictionary *d)
if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
count++;
- *vars = xmalloc (count * sizeof **vars);
+ *vars = xnmalloc (count, sizeof **vars);
*cnt = 0;
for (i = 0; i < d->var_cnt; i++)
if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
assert (*cnt == count);
}
+
/* Creates and returns a new variable in D with the given NAME
and WIDTH. Returns a null pointer if the given NAME would
duplicate that of an existing variable in the dictionary. */
struct variable *
-dict_create_var (struct dictionary *d, const char *name, int width)
+dict_create_var (struct dictionary *d, const char *name, int width)
{
struct variable *v;
assert (d != NULL);
assert (name != NULL);
- assert (strlen (name) >= 1 && strlen (name) <= 8);
+
assert (width >= 0 && width < 256);
+ assert (var_is_valid_name(name,0));
+
/* Make sure there's not already a variable by that name. */
if (dict_lookup_var (d, name) != NULL)
return NULL;
/* Allocate and initialize variable. */
v = xmalloc (sizeof *v);
- strncpy (v->name, name, sizeof v->name);
- v->name[8] = '\0';
- v->index = d->var_cnt;
+ str_copy_trunc (v->name, sizeof v->name, name);
v->type = width == 0 ? NUMERIC : ALPHA;
v->width = width;
v->fv = d->next_value_idx;
v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
v->init = 1;
- v->reinit = dict_class_from_id (name) != DC_SCRATCH;
- v->miss_type = MISSING_NONE;
+ v->reinit = dict_class_from_id (v->name) != DC_SCRATCH;
+ v->index = d->var_cnt;
+ mv_init (&v->miss, width);
if (v->type == NUMERIC)
{
- v->print.type = FMT_F;
- v->print.w = 8;
- v->print.d = 2;
+ v->print = f8_2;
+ v->alignment = ALIGN_RIGHT;
+ v->display_width = 8;
+ v->measure = MEASURE_SCALE;
}
else
{
- v->print.type = FMT_A;
- v->print.w = v->width;
- v->print.d = 0;
+ v->print = make_output_format (FMT_A, v->width, 0);
+ v->alignment = ALIGN_LEFT;
+ v->display_width = 8;
+ v->measure = MEASURE_NOMINAL;
}
v->write = v->print;
v->val_labs = val_labs_create (v->width);
v->label = NULL;
+ var_clear_short_name (v);
+ v->aux = NULL;
+ v->aux_dtor = NULL;
+ v->obs_vals = NULL;
/* Update dictionary. */
if (d->var_cnt >= d->var_cap)
{
d->var_cap = 8 + 2 * d->var_cap;
- d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
+ d->var = xnrealloc (d->var, d->var_cap, sizeof *d->var);
}
d->var[v->index] = v;
d->var_cnt++;
hsh_force_insert (d->name_tab, v);
+
d->next_value_idx += v->nv;
return v;
and WIDTH. Assert-fails if the given NAME would duplicate
that of an existing variable in the dictionary. */
struct variable *
-dict_create_var_assert (struct dictionary *d, const char *name, int width)
+dict_create_var_assert (struct dictionary *d, const char *name, int width)
{
struct variable *v = dict_create_var (d, name, width);
assert (v != NULL);
return v;
}
-/* Creates a new variable in D named NAME, as a copy of existing
- variable OV, which need not be in D or in any dictionary. */
+/* Creates and returns a new variable in D with name NAME, as a
+ copy of existing variable OV, which need not be in D or in any
+ dictionary. Returns a null pointer if the given NAME would
+ duplicate that of an existing variable in the dictionary. */
struct variable *
dict_clone_var (struct dictionary *d, const struct variable *ov,
const char *name)
assert (d != NULL);
assert (ov != NULL);
assert (name != NULL);
- assert (strlen (name) >= 1 && strlen (name) <= 8);
+
+ assert (strlen (name) >= 1);
+ assert (strlen (name) <= LONG_NAME_LEN);
nv = dict_create_var (d, name, ov->width);
if (nv == NULL)
return NULL;
+ /* Copy most members not copied via dict_create_var().
+ short_name[] is intentionally not copied, because there is
+ no reason to give a new variable with potentially a new name
+ the same short name. */
nv->init = 1;
nv->reinit = ov->reinit;
- nv->miss_type = ov->miss_type;
- memcpy (nv->missing, ov->missing, sizeof nv->missing);
+ mv_copy (&nv->miss, &ov->miss);
nv->print = ov->print;
nv->write = ov->write;
val_labs_destroy (nv->val_labs);
nv->val_labs = val_labs_copy (ov->val_labs);
if (ov->label != NULL)
nv->label = xstrdup (ov->label);
+ nv->measure = ov->measure;
+ nv->display_width = ov->display_width;
+ nv->alignment = ov->alignment;
return nv;
}
-/* Changes the name of V in D to name NEW_NAME. Assert-fails if
- a variable named NEW_NAME is already in D, except that
- NEW_NAME may be the same as V's existing name. */
-void
-dict_rename_var (struct dictionary *d, struct variable *v,
- const char *new_name)
+/* Creates and returns a new variable in D with name NAME, as a
+ copy of existing variable OV, which need not be in D or in any
+ dictionary. Assert-fails if the given NAME would duplicate
+ that of an existing variable in the dictionary. */
+struct variable *
+dict_clone_var_assert (struct dictionary *d, const struct variable *ov,
+ const char *name)
{
- assert (d != NULL);
+ struct variable *v = dict_clone_var (d, ov, name);
assert (v != NULL);
- assert (new_name != NULL);
- assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
- assert (dict_contains_var (d, v));
-
- if (!strcmp (v->name, new_name))
- return;
-
- assert (dict_lookup_var (d, new_name) == NULL);
-
- hsh_force_delete (d->name_tab, v);
- strncpy (v->name, new_name, sizeof v->name);
- v->name[8] = '\0';
- hsh_force_insert (d->name_tab, v);
+ return v;
}
/* Returns the variable named NAME in D, or a null pointer if no
assert (d != NULL);
assert (name != NULL);
- assert (strlen (name) >= 1 && strlen (name) <= 8);
-
- strncpy (v.name, name, sizeof v.name);
- v.name[8] = '\0';
+ str_copy_trunc (v.name, sizeof v.name, name);
return hsh_find (d->name_tab, &v);
}
/* Compares two double pointers to variables, which should point
to elements of a struct dictionary's `var' member array. */
static int
-compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
+compare_var_ptrs (const void *a_, const void *b_, void *aux UNUSED)
{
struct variable *const *a = a_;
struct variable *const *b = b_;
- if (a > b)
- return 1;
- else if (a < b)
- return -1;
- else
- return 0;
+ return *a < *b ? -1 : *a > *b;
}
/* Deletes variable V from dictionary D and frees V.
assert (d != NULL);
assert (v != NULL);
assert (dict_contains_var (d, v));
- assert (d->var[v->index] == v);
- /* Remove v from splits, weight, filter variables. */
+ /* Delete aux data. */
+ var_clear_aux (v);
+
+ /* Remove V from splits, weight, filter variables. */
d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
- &v,
- compare_variable_dblptrs, NULL);
+ &v, compare_var_ptrs, NULL);
if (d->weight == v)
d->weight = NULL;
if (d->filter == v)
d->filter = NULL;
dict_clear_vectors (d);
- /* Remove v from var array. */
+ /* Remove V from var array. */
+ remove_element (d->var, d->var_cnt, sizeof *d->var, v->index);
d->var_cnt--;
- memmove (d->var + v->index, d->var + v->index + 1,
- (d->var_cnt - v->index) * sizeof *d->var);
/* Update index. */
for (i = v->index; i < d->var_cnt; i++)
/* Free memory. */
val_labs_destroy (v->val_labs);
+ cat_stored_values_destroy (v);
free (v->label);
free (v);
}
dict_delete_var (d, *vars++);
}
+/* Deletes scratch variables from dictionary D. */
+void
+dict_delete_scratch_vars (struct dictionary *d)
+{
+ int i;
+
+ /* FIXME: this can be done in O(count) time, but this algorithm
+ is O(count**2). */
+ assert (d != NULL);
+
+ for (i = 0; i < d->var_cnt; )
+ if (dict_class_from_id (d->var[i]->name) == DC_SCRATCH)
+ dict_delete_var (d, d->var[i]);
+ else
+ i++;
+}
+
+/* Moves V to 0-based position IDX in D. Other variables in D,
+ if any, retain their relative positions. Runs in time linear
+ in the distance moved. */
+void
+dict_reorder_var (struct dictionary *d, struct variable *v,
+ size_t new_index)
+{
+ size_t min_idx, max_idx;
+ size_t i;
+
+ assert (d != NULL);
+ assert (v != NULL);
+ assert (dict_contains_var (d, v));
+ assert (new_index < d->var_cnt);
+
+ move_element (d->var, d->var_cnt, sizeof *d->var, v->index, new_index);
+
+ min_idx = min (v->index, new_index);
+ max_idx = max (v->index, new_index);
+ for (i = min_idx; i <= max_idx; i++)
+ d->var[i]->index = i;
+}
+
/* Reorders the variables in D, placing the COUNT variables
listed in ORDER in that order at the beginning of D. The
other variables in D, if any, retain their relative
assert (count == 0 || order != NULL);
assert (count <= d->var_cnt);
- new_var = xmalloc (d->var_cnt * sizeof *new_var);
+ new_var = xnmalloc (d->var_cnt, sizeof *new_var);
memcpy (new_var, order, count * sizeof *new_var);
for (i = 0; i < count; i++)
{
d->var = new_var;
}
+/* Changes the name of V in D to name NEW_NAME. Assert-fails if
+ a variable named NEW_NAME is already in D, except that
+ NEW_NAME may be the same as V's existing name. */
+void
+dict_rename_var (struct dictionary *d, struct variable *v,
+ const char *new_name)
+{
+ assert (d != NULL);
+ assert (v != NULL);
+ assert (new_name != NULL);
+ assert (var_is_valid_name (new_name, false));
+ assert (dict_contains_var (d, v));
+ assert (!compare_var_names (v->name, new_name, NULL)
+ || dict_lookup_var (d, new_name) == NULL);
+
+ hsh_force_delete (d->name_tab, v);
+ str_copy_trunc (v->name, sizeof v->name, new_name);
+ hsh_force_insert (d->name_tab, v);
+
+ if (get_algorithm () == ENHANCED)
+ var_clear_short_name (v);
+}
+
/* Renames COUNT variables specified in VARS to the names given
in NEW_NAMES within dictionary D. If the renaming would
result in a duplicate variable name, returns zero and stores a
assert (count == 0 || vars != NULL);
assert (count == 0 || new_names != NULL);
- old_names = xmalloc (count * sizeof *old_names);
+ /* Remove the variables to be renamed from the name hash,
+ save their names, and rename them. */
+ old_names = xnmalloc (count, sizeof *old_names);
for (i = 0; i < count; i++)
{
assert (d->var[vars[i]->index] == vars[i]);
+ assert (var_is_valid_name (new_names[i], false));
hsh_force_delete (d->name_tab, vars[i]);
old_names[i] = xstrdup (vars[i]->name);
+ strcpy (vars[i]->name, new_names[i]);
}
-
+
+ /* Add the renamed variables back into the name hash,
+ checking for conflicts. */
for (i = 0; i < count; i++)
{
assert (new_names[i] != NULL);
assert (*new_names[i] != '\0');
- assert (strlen (new_names[i]) < 9);
- strcpy (vars[i]->name, new_names[i]);
- if (hsh_insert (d->name_tab, vars[i]) != NULL)
+ assert (strlen (new_names[i]) >= 1);
+ assert (strlen (new_names[i]) <= LONG_NAME_LEN);
+
+ if (hsh_insert (d->name_tab, vars[i]) != NULL)
{
+ /* There is a name conflict.
+ Back out all the name changes that have already
+ taken place, and indicate failure. */
size_t fail_idx = i;
if (err_name != NULL)
*err_name = new_names[i];
}
success = 0;
- break;
+ goto done;
}
}
+ /* Clear short names. */
+ if (get_algorithm () == ENHANCED)
+ for (i = 0; i < count; i++)
+ var_clear_short_name (vars[i]);
+
+ done:
+ /* Free the old names we kept around. */
for (i = 0; i < count; i++)
free (old_names[i]);
free (old_names);
else
{
double w = case_num (c, d->weight->fv);
- if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) )
+ if (w < 0.0 || mv_is_num_missing (&d->weight->miss, w))
w = 0.0;
if ( w == 0.0 && *warn_on_invalid ) {
*warn_on_invalid = 0;
i++;
}
else
- dict_delete_var (default_dict, v);
+ dict_delete_var (d, v);
+ }
+}
+
+/* Copies values from SRC, which represents a case arranged
+ according to dictionary D, to DST, which represents a case
+ arranged according to the dictionary that will be produced by
+ dict_compact_values(D). */
+void
+dict_compact_case (const struct dictionary *d,
+ struct ccase *dst, const struct ccase *src)
+{
+ size_t i;
+ size_t value_idx;
+
+ value_idx = 0;
+ for (i = 0; i < d->var_cnt; i++)
+ {
+ struct variable *v = d->var[i];
+
+ if (dict_class_from_id (v->name) != DC_SCRATCH)
+ {
+ case_copy (dst, value_idx, src, v->fv, v->nv);
+ value_idx += v->nv;
+ }
}
}
size_t next_value_idx;
int *idx_to_fv;
- idx_to_fv = xmalloc (d->var_cnt * sizeof *idx_to_fv);
+ idx_to_fv = xnmalloc (d->var_cnt, sizeof *idx_to_fv);
next_value_idx = 0;
for (i = 0; i < d->var_cnt; i++)
{
assert (cnt == 0 || split != NULL);
d->split_cnt = cnt;
- d->split = xrealloc (d->split, cnt * sizeof *d->split);
+ d->split = xnrealloc (d->split, cnt, sizeof *d->split);
memcpy (d->split, split, cnt * sizeof *d->split);
}
struct variable **var, size_t cnt)
{
struct vector *vector;
+ size_t i;
assert (d != NULL);
assert (name != NULL);
- assert (strlen (name) > 0 && strlen (name) < 9);
+ assert (var_is_valid_name (name, false));
assert (var != NULL);
assert (cnt > 0);
if (dict_lookup_vector (d, name) != NULL)
return 0;
- d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
+ d->vector = xnrealloc (d->vector, d->vector_cnt + 1, sizeof *d->vector);
vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
vector->idx = d->vector_cnt++;
- strncpy (vector->name, name, 8);
- vector->name[8] = '\0';
- vector->var = xmalloc (cnt * sizeof *var);
- memcpy (vector->var, var, cnt * sizeof *var);
+ str_copy_trunc (vector->name, sizeof vector->name, name);
+ vector->var = xnmalloc (cnt, sizeof *var);
+ for (i = 0; i < cnt; i++)
+ {
+ assert (dict_contains_var (d, var[i]));
+ vector->var[i] = var[i];
+ }
vector->cnt = cnt;
return 1;
assert (name != NULL);
for (i = 0; i < d->vector_cnt; i++)
- if (!strcmp (d->vector[i]->name, name))
+ if (!strcasecmp (d->vector[i]->name, name))
return d->vector[i];
return NULL;
}
d->vector = NULL;
d->vector_cnt = 0;
}
+
+/* Compares two strings. */
+static int
+compare_strings (const void *a, const void *b, void *aux UNUSED)
+{
+ return strcmp (a, b);
+}
+
+/* Hashes a string. */
+static unsigned
+hash_string (const void *s, void *aux UNUSED)
+{
+ return hsh_hash_string (s);
+}
+
+/* Assigns a valid, unique short_name[] to each variable in D.
+ Each variable whose actual name is short has highest priority
+ for that short name. Otherwise, variables with an existing
+ short_name[] have the next highest priority for a given short
+ name; if it is already taken, then the variable is treated as
+ if short_name[] had been empty. Otherwise, long names are
+ truncated to form short names. If that causes conflicts,
+ variables are renamed as PREFIX_A, PREFIX_B, and so on. */
+void
+dict_assign_short_names (struct dictionary *d)
+{
+ struct hsh_table *short_names;
+ size_t i;
+
+ /* Give variables whose names are short the corresponding short
+ names, and clear short_names[] that conflict with a variable
+ name. */
+ for (i = 0; i < d->var_cnt; i++)
+ {
+ struct variable *v = d->var[i];
+ if (strlen (v->name) <= SHORT_NAME_LEN)
+ var_set_short_name (v, v->name);
+ else if (dict_lookup_var (d, v->short_name) != NULL)
+ var_clear_short_name (v);
+ }
+
+ /* Each variable with an assigned short_name[] now gets it
+ unless there is a conflict. */
+ short_names = hsh_create (d->var_cnt, compare_strings, hash_string,
+ NULL, NULL);
+ for (i = 0; i < d->var_cnt; i++)
+ {
+ struct variable *v = d->var[i];
+ if (v->short_name[0] && hsh_insert (short_names, v->short_name) != NULL)
+ var_clear_short_name (v);
+ }
+
+ /* Now assign short names to remaining variables. */
+ for (i = 0; i < d->var_cnt; i++)
+ {
+ struct variable *v = d->var[i];
+ if (v->short_name[0] == '\0')
+ {
+ int sfx;
+
+ /* Form initial short_name. */
+ var_set_short_name (v, v->name);
+
+ /* Try _A, _B, ... _AA, _AB, etc., if needed. */
+ for (sfx = 0; hsh_insert (short_names, v->short_name) != NULL; sfx++)
+ var_set_short_name_suffix (v, v->name, sfx);
+ }
+ }
+
+ /* Get rid of hash table. */
+ hsh_destroy (short_names);
+}