X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdictionary.c;h=99ea5b8bcbec501a1cd3c57fa3252351b7d0fb17;hb=6ccbd384363db2e304ffe8cc51fcd2eac0a5349a;hp=dba6bb142a065c564ef0b32577d207338aa06498;hpb=3a7fba81ceae5b049d0f7d671e9e3c3c43bbf703;p=pspp diff --git a/src/dictionary.c b/src/dictionary.c index dba6bb142a..99ea5b8bcb 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -18,10 +18,12 @@ 02111-1307, USA. */ #include -#include +#include "dictionary.h" #include #include "algorithm.h" #include "alloc.h" +#include "case.h" +#include "error.h" #include "hash.h" #include "misc.h" #include "str.h" @@ -34,7 +36,7 @@ struct dictionary struct variable **var; /* Variables. */ size_t var_cnt, var_cap; /* Number of variables, capacity. */ struct hsh_table *name_tab; /* Variable index by name. */ - int value_cnt; /* Number of `union value's per case. */ + int next_value_idx; /* Index of next `union value' to allocate. */ struct variable **split; /* SPLIT FILE vars. */ size_t split_cnt; /* SPLIT FILE count. */ struct variable *weight; /* WEIGHT variable. */ @@ -46,6 +48,7 @@ struct dictionary size_t vector_cnt; /* Number of vectors. */ }; +/* Creates and returns a new dictionary. */ struct dictionary * dict_create (void) { @@ -53,8 +56,8 @@ dict_create (void) d->var = NULL; d->var_cnt = d->var_cap = 0; - d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL); - d->value_cnt = 0; + d->name_tab = hsh_create (8, compare_var_names, hash_var_name, NULL, NULL); + d->next_value_idx = 0; d->split = NULL; d->split_cnt = 0; d->weight = NULL; @@ -68,6 +71,8 @@ dict_create (void) return d; } +/* Creates and returns a (deep) copy of an existing + dictionary. */ struct dictionary * dict_clone (const struct dictionary *s) { @@ -79,30 +84,21 @@ dict_clone (const struct dictionary *s) d = dict_create (); for (i = 0; i < s->var_cnt; i++) dict_clone_var (d, s->var[i], s->var[i]->name); - d->value_cnt = s->value_cnt; + d->next_value_idx = s->next_value_idx; d->split_cnt = s->split_cnt; if (d->split_cnt > 0) { d->split = xmalloc (d->split_cnt * sizeof *d->split); for (i = 0; i < d->split_cnt; i++) - { - d->split[i] = dict_lookup_var (d, s->split[i]->name); - assert (d->split[i] != NULL); - } + d->split[i] = dict_lookup_var_assert (d, s->split[i]->name); } if (s->weight != NULL) - { - d->weight = dict_lookup_var (d, s->weight->name); - assert (d->weight != NULL); - } + d->weight = dict_lookup_var_assert (d, s->weight->name); if (s->filter != NULL) - { - d->filter = dict_lookup_var (d, s->filter->name); - assert (d->filter != NULL); - } + d->filter = dict_lookup_var_assert (d, s->filter->name); d->case_limit = s->case_limit; dict_set_label (d, dict_get_label (s)); @@ -115,6 +111,8 @@ dict_clone (const struct dictionary *s) return d; } +/* Clears the contents from a dictionary without destroying the + dictionary itself. */ void dict_clear (struct dictionary *d) { @@ -127,6 +125,7 @@ dict_clear (struct dictionary *d) for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; + var_clear_aux (v); val_labs_destroy (v->val_labs); free (v->label); free (v); @@ -135,7 +134,7 @@ dict_clear (struct dictionary *d) d->var = NULL; d->var_cnt = d->var_cap = 0; hsh_clear (d->name_tab); - d->value_cnt = 0; + d->next_value_idx = 0; free (d->split); d->split = NULL; d->split_cnt = 0; @@ -149,6 +148,20 @@ dict_clear (struct dictionary *d) dict_clear_vectors (d); } +/* Destroys the aux data for every variable in D, by calling + var_clear_aux() for each variable. */ +void +dict_clear_aux (struct dictionary *d) +{ + int i; + + assert (d != NULL); + + for (i = 0; i < d->var_cnt; i++) + var_clear_aux (d->var[i]); +} + +/* Clears a dictionary and destroys it. */ void dict_destroy (struct dictionary *d) { @@ -160,6 +173,7 @@ dict_destroy (struct dictionary *d) } } +/* Returns the number of variables in D. */ size_t dict_get_var_cnt (const struct dictionary *d) { @@ -168,6 +182,9 @@ dict_get_var_cnt (const struct dictionary *d) return d->var_cnt; } +/* Returns the variable in D with index IDX, which must be + between 0 and the count returned by dict_get_var_cnt(), + exclusive. */ struct variable * dict_get_var (const struct dictionary *d, size_t idx) { @@ -177,6 +194,10 @@ dict_get_var (const struct dictionary *d, size_t idx) return d->var[idx]; } +/* Sets *VARS to an array of pointers to variables in D and *CNT + to the number of variables in *D. By default all variables + are returned, but bits may be set in EXCLUDE_CLASSES to + exclude ordinary, system, and/or scratch variables. */ void dict_get_vars (const struct dictionary *d, struct variable ***vars, size_t *cnt, unsigned exclude_classes) @@ -204,6 +225,9 @@ dict_get_vars (const struct dictionary *d, struct variable ***vars, assert (*cnt == count); } +/* Creates and returns a new variable in D with the given NAME + and WIDTH. Returns a null pointer if the given NAME would + duplicate that of an existing variable in the dictionary. */ struct variable * dict_create_var (struct dictionary *d, const char *name, int width) { @@ -225,9 +249,10 @@ dict_create_var (struct dictionary *d, const char *name, int width) v->index = d->var_cnt; v->type = width == 0 ? NUMERIC : ALPHA; v->width = width; - v->fv = d->value_cnt; + v->fv = d->next_value_idx; v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8); - v->left = name[0] == '#'; + v->init = 1; + v->reinit = dict_class_from_id (name) != DC_SCRATCH; v->miss_type = MISSING_NONE; if (v->type == NUMERIC) { @@ -241,8 +266,11 @@ dict_create_var (struct dictionary *d, const char *name, int width) v->print.w = v->width; v->print.d = 0; } + v->write = v->print; v->val_labs = val_labs_create (v->width); v->label = NULL; + v->aux = NULL; + v->aux_dtor = NULL; /* Update dictionary. */ if (d->var_cnt >= d->var_cap) @@ -253,11 +281,24 @@ dict_create_var (struct dictionary *d, const char *name, int width) d->var[v->index] = v; d->var_cnt++; hsh_force_insert (d->name_tab, v); - d->value_cnt += v->nv; + d->next_value_idx += v->nv; return v; } +/* Creates and returns a new variable in D with the given NAME + and WIDTH. Assert-fails if the given NAME would duplicate + that of an existing variable in the dictionary. */ +struct variable * +dict_create_var_assert (struct dictionary *d, const char *name, int width) +{ + struct variable *v = dict_create_var (d, name, width); + assert (v != NULL); + return v; +} + +/* Creates a new variable in D named NAME, as a copy of existing + variable OV, which need not be in D or in any dictionary. */ struct variable * dict_clone_var (struct dictionary *d, const struct variable *ov, const char *name) @@ -273,7 +314,8 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, if (nv == NULL) return NULL; - nv->left = ov->left; + nv->init = 1; + nv->reinit = ov->reinit; nv->miss_type = ov->miss_type; memcpy (nv->missing, ov->missing, sizeof nv->missing); nv->print = ov->print; @@ -286,6 +328,9 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, return nv; } +/* Changes the name of V in D to name NEW_NAME. Assert-fails if + a variable named NEW_NAME is already in D, except that + NEW_NAME may be the same as V's existing name. */ void dict_rename_var (struct dictionary *d, struct variable *v, const char *new_name) @@ -294,6 +339,7 @@ dict_rename_var (struct dictionary *d, struct variable *v, assert (v != NULL); assert (new_name != NULL); assert (strlen (new_name) >= 1 && strlen (new_name) <= 8); + assert (dict_contains_var (d, v)); if (!strcmp (v->name, new_name)) return; @@ -306,6 +352,8 @@ dict_rename_var (struct dictionary *d, struct variable *v, hsh_force_insert (d->name_tab, v); } +/* Returns the variable named NAME in D, or a null pointer if no + variable has that name. */ struct variable * dict_lookup_var (const struct dictionary *d, const char *name) { @@ -321,54 +369,74 @@ dict_lookup_var (const struct dictionary *d, const char *name) return hsh_find (d->name_tab, &v); } +/* Returns the variable named NAME in D. Assert-fails if no + variable has that name. */ +struct variable * +dict_lookup_var_assert (const struct dictionary *d, const char *name) +{ + struct variable *v = dict_lookup_var (d, name); + assert (v != NULL); + return v; +} + +/* Returns nonzero if variable V is in dictionary D. */ int dict_contains_var (const struct dictionary *d, const struct variable *v) { assert (d != NULL); assert (v != NULL); - return dict_lookup_var (d, v->name) == v; + return v->index >= 0 && v->index < d->var_cnt && d->var[v->index] == v; } +/* Compares two double pointers to variables, which should point + to elements of a struct dictionary's `var' member array. */ static int -compare_variable_dblptrs (const void *a_, const void *b_, void *aux unused) +compare_var_ptrs (const void *a_, const void *b_, void *aux UNUSED) { struct variable *const *a = a_; struct variable *const *b = b_; - if (a > b) - return 1; - else if (a < b) - return -1; - else - return 0; + return *a < *b ? -1 : *a > *b; } +/* Deletes variable V from dictionary D and frees V. + + This is a very bad idea if there might be any pointers to V + from outside D. In general, no variable in default_dict + should be deleted when any transformations are active, because + those transformations might reference the deleted variable. + The safest time to delete a variable is just after a procedure + has been executed, as done by MODIFY VARS. + + Pointers to V within D are not a problem, because + dict_delete_var() knows to remove V from split variables, + weights, filters, etc. */ void dict_delete_var (struct dictionary *d, struct variable *v) { size_t i; - /* FIXME? Does not sync d->value_cnt. */ assert (d != NULL); assert (v != NULL); assert (dict_contains_var (d, v)); assert (d->var[v->index] == v); - /* Remove v from splits, weight, filter variables. */ + /* Delete aux data. */ + var_clear_aux (v); + + /* Remove V from splits, weight, filter variables. */ d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split, - &v, - compare_variable_dblptrs, NULL); + &v, compare_var_ptrs, NULL); if (d->weight == v) d->weight = NULL; if (d->filter == v) d->filter = NULL; dict_clear_vectors (d); - /* Remove v from var array. */ + /* Remove V from var array. */ + remove_element (d->var, d->var_cnt, sizeof *d->var, v->index); d->var_cnt--; - memmove (d->var + v->index, d->var + v->index + 1, - (d->var_cnt - v->index) * sizeof *d->var); /* Update index. */ for (i = v->index; i < d->var_cnt; i++) @@ -381,9 +449,10 @@ dict_delete_var (struct dictionary *d, struct variable *v) val_labs_destroy (v->val_labs); free (v->label); free (v); - } +/* Deletes the COUNT variables listed in VARS from D. This is + unsafe; see the comment on dict_delete_var() for details. */ void dict_delete_vars (struct dictionary *d, struct variable *const *vars, size_t count) @@ -397,6 +466,10 @@ dict_delete_vars (struct dictionary *d, dict_delete_var (d, *vars++); } +/* Reorders the variables in D, placing the COUNT variables + listed in ORDER in that order at the beginning of D. The + other variables in D, if any, retain their relative + positions. */ void dict_reorder_vars (struct dictionary *d, struct variable *const *order, size_t count) @@ -428,6 +501,12 @@ dict_reorder_vars (struct dictionary *d, d->var = new_var; } +/* Renames COUNT variables specified in VARS to the names given + in NEW_NAMES within dictionary D. If the renaming would + result in a duplicate variable name, returns zero and stores a + name that would be duplicated into *ERR_NAME (if ERR_NAME is + non-null). Otherwise, the renaming is successful, and nonzero + is returned. */ int dict_rename_vars (struct dictionary *d, struct variable **vars, char **new_names, @@ -452,6 +531,7 @@ dict_rename_vars (struct dictionary *d, for (i = 0; i < count; i++) { assert (new_names[i] != NULL); + assert (*new_names[i] != '\0'); assert (strlen (new_names[i]) < 9); strcpy (vars[i]->name, new_names[i]); if (hsh_insert (d->name_tab, vars[i]) != NULL) @@ -481,6 +561,8 @@ dict_rename_vars (struct dictionary *d, return success; } +/* Returns the weighting variable in dictionary D, or a null + pointer if the dictionary is unweighted. */ struct variable * dict_get_weight (const struct dictionary *d) { @@ -490,8 +572,14 @@ dict_get_weight (const struct dictionary *d) return d->weight; } +/* Returns the value of D's weighting variable in case C, except that a + negative weight is returned as 0. Returns 1 if the dictionary is + unweighted. Will warn about missing, negative, or zero values if + warn_on_invalid is nonzero. The function will set warn_on_invalid to zero + if an invalid weight is found. */ double -dict_get_case_weight (const struct dictionary *d, const struct ccase *c) +dict_get_case_weight (const struct dictionary *d, const struct ccase *c, + int *warn_on_invalid) { assert (d != NULL); assert (c != NULL); @@ -500,13 +588,21 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c) return 1.0; else { - double w = c->data[d->weight->fv].f; - if (w < 0.0) + double w = case_num (c, d->weight->fv); + if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) ) w = 0.0; + if ( w == 0.0 && *warn_on_invalid ) { + *warn_on_invalid = 0; + msg (SW, _("At least one case in the data file had a weight value " + "that was user-missing, system-missing, zero, or " + "negative. These case(s) were ignored.")); + } return w; } } +/* Sets the weighting variable of D to V, or turning off + weighting if V is a null pointer. */ void dict_set_weight (struct dictionary *d, struct variable *v) { @@ -517,6 +613,8 @@ dict_set_weight (struct dictionary *d, struct variable *v) d->weight = v; } +/* Returns the filter variable in dictionary D (see cmd_filter()) + or a null pointer if the dictionary is unfiltered. */ struct variable * dict_get_filter (const struct dictionary *d) { @@ -526,6 +624,8 @@ dict_get_filter (const struct dictionary *d) return d->filter; } +/* Sets V as the filter variable for dictionary D. Passing a + null pointer for V turn off filtering. */ void dict_set_filter (struct dictionary *d, struct variable *v) { @@ -535,6 +635,8 @@ dict_set_filter (struct dictionary *d, struct variable *v) d->filter = v; } +/* Returns the case limit for dictionary D, or zero if the number + of cases is unlimited (see cmd_n()). */ int dict_get_case_limit (const struct dictionary *d) { @@ -543,6 +645,8 @@ dict_get_case_limit (const struct dictionary *d) return d->case_limit; } +/* Sets CASE_LIMIT as the case limit for dictionary D. Zero for + CASE_LIMIT indicates no limit. */ void dict_set_case_limit (struct dictionary *d, int case_limit) { @@ -552,29 +656,121 @@ dict_set_case_limit (struct dictionary *d, int case_limit) d->case_limit = case_limit; } +/* Returns the index of the next value to be added to D. This + value is the number of `union value's that need to be + allocated to store a case for dictionary D. */ int -dict_get_value_cnt (const struct dictionary *d) +dict_get_next_value_idx (const struct dictionary *d) +{ + assert (d != NULL); + + return d->next_value_idx; +} + +/* Returns the number of bytes needed to store a case for + dictionary D. */ +size_t +dict_get_case_size (const struct dictionary *d) { assert (d != NULL); - return d->value_cnt; + return sizeof (union value) * dict_get_next_value_idx (d); } +/* Deletes scratch variables in dictionary D and reassigns values + so that fragmentation is eliminated. */ void dict_compact_values (struct dictionary *d) { size_t i; - d->value_cnt = 0; + d->next_value_idx = 0; + for (i = 0; i < d->var_cnt; ) + { + struct variable *v = d->var[i]; + + if (dict_class_from_id (v->name) != DC_SCRATCH) + { + v->fv = d->next_value_idx; + d->next_value_idx += v->nv; + i++; + } + else + dict_delete_var (default_dict, v); + } +} + +/* Copies values from SRC, which represents a case arranged + according to dictionary D, to DST, which represents a case + arranged according to the dictionary that will be produced by + dict_compact_values(D). */ +void +dict_compact_case (const struct dictionary *d, + struct ccase *dst, const struct ccase *src) +{ + size_t i; + size_t value_idx; + + value_idx = 0; + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + + if (dict_class_from_id (v->name) != DC_SCRATCH) + { + case_copy (dst, value_idx, src, v->fv, v->nv); + value_idx += v->nv; + } + } +} + +/* Returns the number of values that would be used by a case if + dict_compact_values() were called. */ +size_t +dict_get_compacted_value_cnt (const struct dictionary *d) +{ + size_t i; + size_t cnt; + + cnt = 0; + for (i = 0; i < d->var_cnt; i++) + if (dict_class_from_id (d->var[i]->name) != DC_SCRATCH) + cnt += d->var[i]->nv; + return cnt; +} + +/* Creates and returns an array mapping from a dictionary index + to the `fv' that the corresponding variable will have after + calling dict_compact_values(). Scratch variables receive -1 + for `fv' because dict_compact_values() will delete them. */ +int * +dict_get_compacted_idx_to_fv (const struct dictionary *d) +{ + size_t i; + size_t next_value_idx; + int *idx_to_fv; + + idx_to_fv = xmalloc (d->var_cnt * sizeof *idx_to_fv); + next_value_idx = 0; for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; - v->fv = d->value_cnt; - d->value_cnt += v->nv; + if (dict_class_from_id (v->name) != DC_SCRATCH) + { + idx_to_fv[i] = next_value_idx; + next_value_idx += v->nv; + } + else + idx_to_fv[i] = -1; } + return idx_to_fv; } +/* Returns the SPLIT FILE vars (see cmd_split_file()). Call + dict_get_split_cnt() to determine how many SPLIT FILE vars + there are. Returns a null pointer if and only if there are no + SPLIT FILE vars. */ struct variable *const * dict_get_split_vars (const struct dictionary *d) { @@ -583,6 +779,7 @@ dict_get_split_vars (const struct dictionary *d) return d->split; } +/* Returns the number of SPLIT FILE vars. */ size_t dict_get_split_cnt (const struct dictionary *d) { @@ -591,6 +788,7 @@ dict_get_split_cnt (const struct dictionary *d) return d->split_cnt; } +/* Sets CNT split vars SPLIT in dictionary D. */ void dict_set_split_vars (struct dictionary *d, struct variable *const *split, size_t cnt) @@ -603,6 +801,8 @@ dict_set_split_vars (struct dictionary *d, memcpy (d->split, split, cnt * sizeof *d->split); } +/* Returns the file label for D, or a null pointer if D is + unlabeled (see cmd_file_label()). */ const char * dict_get_label (const struct dictionary *d) { @@ -611,6 +811,8 @@ dict_get_label (const struct dictionary *d) return d->label; } +/* Sets D's file label to LABEL, truncating it to a maximum of 60 + characters. */ void dict_set_label (struct dictionary *d, const char *label) { @@ -629,6 +831,8 @@ dict_set_label (struct dictionary *d, const char *label) } } +/* Returns the documents for D, or a null pointer if D has no + documents (see cmd_document()).. */ const char * dict_get_documents (const struct dictionary *d) { @@ -637,6 +841,8 @@ dict_get_documents (const struct dictionary *d) return d->documents; } +/* Sets the documents for D to DOCUMENTS, or removes D's + documents if DOCUMENT is a null pointer. */ void dict_set_documents (struct dictionary *d, const char *documents) { @@ -649,6 +855,9 @@ dict_set_documents (struct dictionary *d, const char *documents) d->documents = xstrdup (documents); } +/* Creates in D a vector named NAME that contains CNT variables + VAR (see cmd_vector()). Returns nonzero if successful, or + zero if a vector named NAME already exists in D. */ int dict_create_vector (struct dictionary *d, const char *name, @@ -677,6 +886,8 @@ dict_create_vector (struct dictionary *d, return 1; } +/* Returns the vector in D with index IDX, which must be less + than dict_get_vector_cnt (D). */ const struct vector * dict_get_vector (const struct dictionary *d, size_t idx) { @@ -686,6 +897,7 @@ dict_get_vector (const struct dictionary *d, size_t idx) return d->vector[idx]; } +/* Returns the number of vectors in D. */ size_t dict_get_vector_cnt (const struct dictionary *d) { @@ -694,6 +906,8 @@ dict_get_vector_cnt (const struct dictionary *d) return d->vector_cnt; } +/* Looks up and returns the vector within D with the given + NAME. */ const struct vector * dict_lookup_vector (const struct dictionary *d, const char *name) { @@ -708,6 +922,7 @@ dict_lookup_vector (const struct dictionary *d, const char *name) return NULL; } +/* Deletes all vectors from D. */ void dict_clear_vectors (struct dictionary *d) {