X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdictionary.c;h=6fbd5dfa575783efe2447225682fc6cbe1c055c6;hb=3dc1b25c7117a9f963a95d67397f5582b92356a2;hp=f2ac187a1dcf1805a61096b7b2ba713800c61e15;hpb=bb7fdfcf1c9c380dafc165c556123094ad816825;p=pspp-builds.git diff --git a/src/dictionary.c b/src/dictionary.c index f2ac187a..6fbd5dfa 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -14,16 +14,20 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include "error.h" +#include "dictionary.h" #include +#include #include "algorithm.h" #include "alloc.h" +#include "case.h" +#include "error.h" #include "hash.h" #include "misc.h" +#include "settings.h" #include "str.h" #include "value-labels.h" #include "var.h" @@ -54,7 +58,7 @@ dict_create (void) d->var = NULL; d->var_cnt = d->var_cap = 0; - d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL); + d->name_tab = hsh_create (8, compare_var_names, hash_var_name, NULL, NULL); d->next_value_idx = 0; d->split = NULL; d->split_cnt = 0; @@ -76,12 +80,18 @@ dict_clone (const struct dictionary *s) { struct dictionary *d; size_t i; - + assert (s != NULL); - + d = dict_create (); - for (i = 0; i < s->var_cnt; i++) - dict_clone_var (d, s->var[i], s->var[i]->name); + + for (i = 0; i < s->var_cnt; i++) + { + struct variable *sv = s->var[i]; + struct variable *dv = dict_clone_var_assert (d, sv, sv->name); + var_set_short_name (dv, sv->short_name); + } + d->next_value_idx = s->next_value_idx; d->split_cnt = s->split_cnt; @@ -102,9 +112,21 @@ dict_clone (const struct dictionary *s) dict_set_label (d, dict_get_label (s)); dict_set_documents (d, dict_get_documents (s)); + d->vector_cnt = s->vector_cnt; + d->vector = xmalloc (d->vector_cnt * sizeof *d->vector); for (i = 0; i < s->vector_cnt; i++) - dict_create_vector (d, s->vector[i]->name, - s->vector[i]->var, s->vector[i]->cnt); + { + struct vector *sv = s->vector[i]; + struct vector *dv = d->vector[i] = xmalloc (sizeof *dv); + int j; + + dv->idx = i; + strcpy (dv->name, sv->name); + dv->cnt = sv->cnt; + dv->var = xmalloc (dv->cnt * sizeof *dv->var); + for (j = 0; j < dv->cnt; j++) + dv->var[j] = d->var[sv->var[j]->index]; + } return d; } @@ -123,6 +145,7 @@ dict_clear (struct dictionary *d) for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; + var_clear_aux (v); val_labs_destroy (v->val_labs); free (v->label); free (v); @@ -145,6 +168,19 @@ dict_clear (struct dictionary *d) dict_clear_vectors (d); } +/* Destroys the aux data for every variable in D, by calling + var_clear_aux() for each variable. */ +void +dict_clear_aux (struct dictionary *d) +{ + int i; + + assert (d != NULL); + + for (i = 0; i < d->var_cnt; i++) + var_clear_aux (d->var[i]); +} + /* Clears a dictionary and destroys it. */ void dict_destroy (struct dictionary *d) @@ -209,50 +245,58 @@ dict_get_vars (const struct dictionary *d, struct variable ***vars, assert (*cnt == count); } + /* Creates and returns a new variable in D with the given NAME and WIDTH. Returns a null pointer if the given NAME would duplicate that of an existing variable in the dictionary. */ struct variable * -dict_create_var (struct dictionary *d, const char *name, int width) +dict_create_var (struct dictionary *d, const char *name, int width) { struct variable *v; assert (d != NULL); assert (name != NULL); - assert (strlen (name) >= 1 && strlen (name) <= 8); - assert (width >= 0 && width < 256); + assert (strlen (name) >= 1); + assert (strlen (name) <= LONG_NAME_LEN); + + assert (width >= 0 && width < 256); + /* Make sure there's not already a variable by that name. */ if (dict_lookup_var (d, name) != NULL) return NULL; /* Allocate and initialize variable. */ v = xmalloc (sizeof *v); - strncpy (v->name, name, sizeof v->name); - v->name[8] = '\0'; - v->index = d->var_cnt; + str_copy_trunc (v->name, sizeof v->name, name); v->type = width == 0 ? NUMERIC : ALPHA; v->width = width; v->fv = d->next_value_idx; v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8); v->init = 1; - v->reinit = dict_class_from_id (name) != DC_SCRATCH; + v->reinit = dict_class_from_id (v->name) != DC_SCRATCH; + v->index = d->var_cnt; v->miss_type = MISSING_NONE; if (v->type == NUMERIC) { - v->print.type = FMT_F; - v->print.w = 8; - v->print.d = 2; + v->print = f8_2; + v->alignment = ALIGN_RIGHT; + v->display_width = 8; + v->measure = MEASURE_SCALE; } else { - v->print.type = FMT_A; - v->print.w = v->width; - v->print.d = 0; + v->print = make_output_format (FMT_A, v->width, 0); + v->alignment = ALIGN_LEFT; + v->display_width = 8; + v->measure = MEASURE_NOMINAL; } v->write = v->print; v->val_labs = val_labs_create (v->width); v->label = NULL; + var_clear_short_name (v); + v->aux = NULL; + v->aux_dtor = NULL; /* Update dictionary. */ if (d->var_cnt >= d->var_cap) @@ -263,6 +307,7 @@ dict_create_var (struct dictionary *d, const char *name, int width) d->var[v->index] = v; d->var_cnt++; hsh_force_insert (d->name_tab, v); + d->next_value_idx += v->nv; return v; @@ -272,15 +317,17 @@ dict_create_var (struct dictionary *d, const char *name, int width) and WIDTH. Assert-fails if the given NAME would duplicate that of an existing variable in the dictionary. */ struct variable * -dict_create_var_assert (struct dictionary *d, const char *name, int width) +dict_create_var_assert (struct dictionary *d, const char *name, int width) { struct variable *v = dict_create_var (d, name, width); assert (v != NULL); return v; } -/* Creates a new variable in D named NAME, as a copy of existing - variable OV, which need not be in D or in any dictionary. */ +/* Creates and returns a new variable in D with name NAME, as a + copy of existing variable OV, which need not be in D or in any + dictionary. Returns a null pointer if the given NAME would + duplicate that of an existing variable in the dictionary. */ struct variable * dict_clone_var (struct dictionary *d, const struct variable *ov, const char *name) @@ -290,12 +337,18 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, assert (d != NULL); assert (ov != NULL); assert (name != NULL); - assert (strlen (name) >= 1 && strlen (name) <= 8); + + assert (strlen (name) >= 1); + assert (strlen (name) <= LONG_NAME_LEN); nv = dict_create_var (d, name, ov->width); if (nv == NULL) return NULL; + /* Copy most members not copied via dict_create_var(). + short_name[] is intentionally not copied, because there is + no reason to give a new variable with potentially a new name + the same short name. */ nv->init = 1; nv->reinit = ov->reinit; nv->miss_type = ov->miss_type; @@ -306,32 +359,24 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, nv->val_labs = val_labs_copy (ov->val_labs); if (ov->label != NULL) nv->label = xstrdup (ov->label); + nv->measure = ov->measure; + nv->display_width = ov->display_width; + nv->alignment = ov->alignment; return nv; } -/* Changes the name of V in D to name NEW_NAME. Assert-fails if - a variable named NEW_NAME is already in D, except that - NEW_NAME may be the same as V's existing name. */ -void -dict_rename_var (struct dictionary *d, struct variable *v, - const char *new_name) +/* Creates and returns a new variable in D with name NAME, as a + copy of existing variable OV, which need not be in D or in any + dictionary. Assert-fails if the given NAME would duplicate + that of an existing variable in the dictionary. */ +struct variable * +dict_clone_var_assert (struct dictionary *d, const struct variable *ov, + const char *name) { - assert (d != NULL); + struct variable *v = dict_clone_var (d, ov, name); assert (v != NULL); - assert (new_name != NULL); - assert (strlen (new_name) >= 1 && strlen (new_name) <= 8); - assert (dict_contains_var (d, v)); - - if (!strcmp (v->name, new_name)) - return; - - assert (dict_lookup_var (d, new_name) == NULL); - - hsh_force_delete (d->name_tab, v); - strncpy (v->name, new_name, sizeof v->name); - v->name[8] = '\0'; - hsh_force_insert (d->name_tab, v); + return v; } /* Returns the variable named NAME in D, or a null pointer if no @@ -343,11 +388,8 @@ dict_lookup_var (const struct dictionary *d, const char *name) assert (d != NULL); assert (name != NULL); - assert (strlen (name) >= 1 && strlen (name) <= 8); - - strncpy (v.name, name, sizeof v.name); - v.name[8] = '\0'; + str_copy_trunc (v.name, sizeof v.name, name); return hsh_find (d->name_tab, &v); } @@ -374,17 +416,12 @@ dict_contains_var (const struct dictionary *d, const struct variable *v) /* Compares two double pointers to variables, which should point to elements of a struct dictionary's `var' member array. */ static int -compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED) +compare_var_ptrs (const void *a_, const void *b_, void *aux UNUSED) { struct variable *const *a = a_; struct variable *const *b = b_; - if (a > b) - return 1; - else if (a < b) - return -1; - else - return 0; + return *a < *b ? -1 : *a > *b; } /* Deletes variable V from dictionary D and frees V. @@ -407,22 +444,22 @@ dict_delete_var (struct dictionary *d, struct variable *v) assert (d != NULL); assert (v != NULL); assert (dict_contains_var (d, v)); - assert (d->var[v->index] == v); - /* Remove v from splits, weight, filter variables. */ + /* Delete aux data. */ + var_clear_aux (v); + + /* Remove V from splits, weight, filter variables. */ d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split, - &v, - compare_variable_dblptrs, NULL); + &v, compare_var_ptrs, NULL); if (d->weight == v) d->weight = NULL; if (d->filter == v) d->filter = NULL; dict_clear_vectors (d); - /* Remove v from var array. */ + /* Remove V from var array. */ + remove_element (d->var, d->var_cnt, sizeof *d->var, v->index); d->var_cnt--; - memmove (d->var + v->index, d->var + v->index + 1, - (d->var_cnt - v->index) * sizeof *d->var); /* Update index. */ for (i = v->index; i < d->var_cnt; i++) @@ -452,6 +489,29 @@ dict_delete_vars (struct dictionary *d, dict_delete_var (d, *vars++); } +/* Moves V to 0-based position IDX in D. Other variables in D, + if any, retain their relative positions. Runs in time linear + in the distance moved. */ +void +dict_reorder_var (struct dictionary *d, struct variable *v, + size_t new_index) +{ + size_t min_idx, max_idx; + size_t i; + + assert (d != NULL); + assert (v != NULL); + assert (dict_contains_var (d, v)); + assert (new_index < d->var_cnt); + + move_element (d->var, d->var_cnt, sizeof *d->var, v->index, new_index); + + min_idx = min (v->index, new_index); + max_idx = max (v->index, new_index); + for (i = min_idx; i <= max_idx; i++) + d->var[i]->index = i; +} + /* Reorders the variables in D, placing the COUNT variables listed in ORDER in that order at the beginning of D. The other variables in D, if any, retain their relative @@ -487,6 +547,29 @@ dict_reorder_vars (struct dictionary *d, d->var = new_var; } +/* Changes the name of V in D to name NEW_NAME. Assert-fails if + a variable named NEW_NAME is already in D, except that + NEW_NAME may be the same as V's existing name. */ +void +dict_rename_var (struct dictionary *d, struct variable *v, + const char *new_name) +{ + assert (d != NULL); + assert (v != NULL); + assert (new_name != NULL); + assert (var_is_valid_name (new_name, false)); + assert (dict_contains_var (d, v)); + assert (!compare_var_names (v->name, new_name, NULL) + || dict_lookup_var (d, new_name) == NULL); + + hsh_force_delete (d->name_tab, v); + str_copy_trunc (v->name, sizeof v->name, new_name); + hsh_force_insert (d->name_tab, v); + + if (get_algorithm () == ENHANCED) + var_clear_short_name (v); +} + /* Renames COUNT variables specified in VARS to the names given in NEW_NAMES within dictionary D. If the renaming would result in a duplicate variable name, returns zero and stores a @@ -506,22 +589,32 @@ dict_rename_vars (struct dictionary *d, assert (count == 0 || vars != NULL); assert (count == 0 || new_names != NULL); + /* Remove the variables to be renamed from the name hash, + save their names, and rename them. */ old_names = xmalloc (count * sizeof *old_names); for (i = 0; i < count; i++) { assert (d->var[vars[i]->index] == vars[i]); + assert (var_is_valid_name (new_names[i], false)); hsh_force_delete (d->name_tab, vars[i]); old_names[i] = xstrdup (vars[i]->name); + strcpy (vars[i]->name, new_names[i]); } - + + /* Add the renamed variables back into the name hash, + checking for conflicts. */ for (i = 0; i < count; i++) { assert (new_names[i] != NULL); assert (*new_names[i] != '\0'); - assert (strlen (new_names[i]) < 9); - strcpy (vars[i]->name, new_names[i]); - if (hsh_insert (d->name_tab, vars[i]) != NULL) + assert (strlen (new_names[i]) >= 1); + assert (strlen (new_names[i]) <= LONG_NAME_LEN); + + if (hsh_insert (d->name_tab, vars[i]) != NULL) { + /* There is a name conflict. + Back out all the name changes that have already + taken place, and indicate failure. */ size_t fail_idx = i; if (err_name != NULL) *err_name = new_names[i]; @@ -536,10 +629,17 @@ dict_rename_vars (struct dictionary *d, } success = 0; - break; + goto done; } } + /* Clear short names. */ + if (get_algorithm () == ENHANCED) + for (i = 0; i < count; i++) + var_clear_short_name (vars[i]); + + done: + /* Free the old names we kept around. */ for (i = 0; i < count; i++) free (old_names[i]); free (old_names); @@ -558,11 +658,14 @@ dict_get_weight (const struct dictionary *d) return d->weight; } -/* Returns the value of D's weighting variable in case C, except - that a negative weight is returned as 0. Returns 1 if the - dictionary is unweighted. */ +/* Returns the value of D's weighting variable in case C, except that a + negative weight is returned as 0. Returns 1 if the dictionary is + unweighted. Will warn about missing, negative, or zero values if + warn_on_invalid is nonzero. The function will set warn_on_invalid to zero + if an invalid weight is found. */ double -dict_get_case_weight (const struct dictionary *d, const struct ccase *c) +dict_get_case_weight (const struct dictionary *d, const struct ccase *c, + int *warn_on_invalid) { assert (d != NULL); assert (c != NULL); @@ -571,9 +674,15 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c) return 1.0; else { - double w = c->data[d->weight->fv].f; - if (w < 0.0) + double w = case_num (c, d->weight->fv); + if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) ) w = 0.0; + if ( w == 0.0 && *warn_on_invalid ) { + *warn_on_invalid = 0; + msg (SW, _("At least one case in the data file had a weight value " + "that was user-missing, system-missing, zero, or " + "negative. These case(s) were ignored.")); + } return w; } } @@ -673,7 +782,31 @@ dict_compact_values (struct dictionary *d) i++; } else - dict_delete_var (default_dict, v); + dict_delete_var (d, v); + } +} + +/* Copies values from SRC, which represents a case arranged + according to dictionary D, to DST, which represents a case + arranged according to the dictionary that will be produced by + dict_compact_values(D). */ +void +dict_compact_case (const struct dictionary *d, + struct ccase *dst, const struct ccase *src) +{ + size_t i; + size_t value_idx; + + value_idx = 0; + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + + if (dict_class_from_id (v->name) != DC_SCRATCH) + { + case_copy (dst, value_idx, src, v->fv, v->nv); + value_idx += v->nv; + } } } @@ -817,10 +950,11 @@ dict_create_vector (struct dictionary *d, struct variable **var, size_t cnt) { struct vector *vector; + size_t i; assert (d != NULL); assert (name != NULL); - assert (strlen (name) > 0 && strlen (name) < 9); + assert (var_is_valid_name (name, false)); assert (var != NULL); assert (cnt > 0); @@ -830,10 +964,13 @@ dict_create_vector (struct dictionary *d, d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector); vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector); vector->idx = d->vector_cnt++; - strncpy (vector->name, name, 8); - vector->name[8] = '\0'; + str_copy_trunc (vector->name, sizeof vector->name, name); vector->var = xmalloc (cnt * sizeof *var); - memcpy (vector->var, var, cnt * sizeof *var); + for (i = 0; i < cnt; i++) + { + assert (dict_contains_var (d, var[i])); + vector->var[i] = var[i]; + } vector->cnt = cnt; return 1; @@ -870,7 +1007,7 @@ dict_lookup_vector (const struct dictionary *d, const char *name) assert (name != NULL); for (i = 0; i < d->vector_cnt; i++) - if (!strcmp (d->vector[i]->name, name)) + if (!strcasecmp (d->vector[i]->name, name)) return d->vector[i]; return NULL; } @@ -892,3 +1029,75 @@ dict_clear_vectors (struct dictionary *d) d->vector = NULL; d->vector_cnt = 0; } + +/* Compares two strings. */ +static int +compare_strings (const void *a, const void *b, void *aux UNUSED) +{ + return strcmp (a, b); +} + +/* Hashes a string. */ +static unsigned +hash_string (const void *s, void *aux UNUSED) +{ + return hsh_hash_string (s); +} + +/* Assigns a valid, unique short_name[] to each variable in D. + Each variable whose actual name is short has highest priority + for that short name. Otherwise, variables with an existing + short_name[] have the next highest priority for a given short + name; if it is already taken, then the variable is treated as + if short_name[] had been empty. Otherwise, long names are + truncated to form short names. If that causes conflicts, + variables are renamed as PREFIX_A, PREFIX_B, and so on. */ +void +dict_assign_short_names (struct dictionary *d) +{ + struct hsh_table *short_names; + size_t i; + + /* Give variables whose names are short the corresponding short + names, and clear short_names[] that conflict with a variable + name. */ + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (strlen (v->name) <= SHORT_NAME_LEN) + var_set_short_name (v, v->name); + else if (dict_lookup_var (d, v->short_name) != NULL) + var_clear_short_name (v); + } + + /* Each variable with an assigned short_name[] now gets it + unless there is a conflict. */ + short_names = hsh_create (d->var_cnt, compare_strings, hash_string, + NULL, NULL); + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (v->short_name[0] && hsh_insert (short_names, v->short_name) != NULL) + var_clear_short_name (v); + } + + /* Now assign short names to remaining variables. */ + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (v->short_name[0] == '\0') + { + int sfx; + + /* Form initial short_name. */ + var_set_short_name (v, v->name); + + /* Try _A, _B, ... _AA, _AB, etc., if needed. */ + for (sfx = 0; hsh_insert (short_names, v->short_name) != NULL; sfx++) + var_set_short_name_suffix (v, v->name, sfx); + } + } + + /* Get rid of hash table. */ + hsh_destroy (short_names); +}