X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdictionary.c;h=cee678e407878651360979f77b290e614968204d;hb=37273a2e5b058a1907bc7b4b5bf666c64e0afdbb;hp=ca1a286c4d135ad20345fabe11484242bac3d9af;hpb=077a1c38bd58911cb74a08f95be3691e49b87779;p=pspp-builds.git diff --git a/src/data/dictionary.c b/src/data/dictionary.c index ca1a286c..cee678e4 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -1,20 +1,18 @@ -/* PSPP - computes sample statistics. +/* PSPP - a program for statistical analysis. Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -24,7 +22,6 @@ #include #include "case.h" -#include "cat-routines.h" #include "category.h" #include "settings.h" #include "value-labels.h" @@ -52,13 +49,13 @@ struct dictionary size_t var_cnt, var_cap; /* Number of variables, capacity. */ struct hsh_table *name_tab; /* Variable index by name. */ int next_value_idx; /* Index of next `union value' to allocate. */ - struct variable **split; /* SPLIT FILE vars. */ + const struct variable **split; /* SPLIT FILE vars. */ size_t split_cnt; /* SPLIT FILE count. */ struct variable *weight; /* WEIGHT variable. */ struct variable *filter; /* FILTER variable. */ size_t case_limit; /* Current case limit (N command). */ char *label; /* File label. */ - char *documents; /* Documents, as a string. */ + struct string documents; /* Documents, as a string. */ struct vector **vector; /* Vectors of variables. */ size_t vector_cnt; /* Number of vectors. */ const struct dict_callbacks *callbacks; /* Callbacks on dictionary @@ -117,7 +114,10 @@ dict_clone (const struct dictionary *s) { struct variable *sv = s->var[i]; struct variable *dv = dict_clone_var_assert (d, sv, var_get_name (sv)); - var_set_short_name (dv, var_get_short_name (sv)); + size_t i; + + for (i = 0; i < var_get_short_name_cnt (sv); i++) + var_set_short_name (dv, i, var_get_short_name (sv, i)); } d->next_value_idx = s->next_value_idx; @@ -159,14 +159,22 @@ dict_clear (struct dictionary *d) while (d->var_cnt > 0 ) { - var_clear_vardict (d->var[d->var_cnt - 1]); - var_destroy (d->var[d->var_cnt -1]); + struct variable *v = d->var[d->var_cnt - 1]; + int dict_index = var_get_dict_index (v); + int case_index = var_get_case_index (v); + int val_cnt = var_get_value_cnt (v); + + var_clear_vardict (v); + var_destroy (v); d->var_cnt--; if (d->callbacks && d->callbacks->var_deleted ) - d->callbacks->var_deleted (d, d->var_cnt, d->cb_data); + d->callbacks->var_deleted (d, + dict_index, case_index, val_cnt, + d->cb_data); } + free (d->var); d->var = NULL; d->var_cnt = d->var_cap = 0; @@ -178,8 +186,7 @@ dict_clear (struct dictionary *d) d->case_limit = 0; free (d->label); d->label = NULL; - free (d->documents); - d->documents = NULL; + ds_destroy (&d->documents); dict_clear_vectors (d); } @@ -233,13 +240,20 @@ dict_get_var (const struct dictionary *d, size_t idx) return d->var[idx]; } +inline void +dict_get_vars (const struct dictionary *d, const struct variable ***vars, + size_t *cnt, unsigned exclude_classes) +{ + dict_get_vars_mutable (d, (struct variable ***) vars, cnt, exclude_classes); +} + /* Sets *VARS to an array of pointers to variables in D and *CNT to the number of variables in *D. All variables are returned if EXCLUDE_CLASSES is 0, or it may contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the corresponding type of variable. */ void -dict_get_vars (const struct dictionary *d, struct variable ***vars, +dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars, size_t *cnt, unsigned exclude_classes) { size_t count; @@ -415,7 +429,7 @@ set_var_dict_index (struct variable *v, int dict_index) d->callbacks->var_changed (d, dict_index, d->cb_data); } -/* Sets the case_index in V's vardict to DICT_INDEX. */ +/* Sets the case_index in V's vardict to CASE_INDEX. */ static void set_var_case_index (struct variable *v, int case_index) { @@ -438,11 +452,12 @@ reindex_vars (struct dictionary *d, size_t from, size_t to) /* Deletes variable V from dictionary D and frees V. This is a very bad idea if there might be any pointers to V - from outside D. In general, no variable in should be deleted when - any transformations are active on the dictionary's dataset, because - those transformations might reference the deleted variable. - The safest time to delete a variable is just after a procedure - has been executed, as done by MODIFY VARS. + from outside D. In general, no variable in the active file's + dictionary should be deleted when any transformations are + active on the dictionary's dataset, because those + transformations might reference the deleted variable. The + safest time to delete a variable is just after a procedure has + been executed, as done by MODIFY VARS. Pointers to V within D are not a problem, because dict_delete_var() knows to remove V from split variables, @@ -451,6 +466,8 @@ void dict_delete_var (struct dictionary *d, struct variable *v) { int dict_index = var_get_dict_index (v); + const int case_index = var_get_case_index (v); + const int val_cnt = var_get_value_cnt (v); assert (dict_contains_var (d, v)); @@ -483,7 +500,7 @@ dict_delete_var (struct dictionary *d, struct variable *v) var_destroy (v); if (d->callbacks && d->callbacks->var_deleted ) - d->callbacks->var_deleted (d, dict_index, d->cb_data); + d->callbacks->var_deleted (d, dict_index, case_index, val_cnt, d->cb_data); } /* Deletes the COUNT variables listed in VARS from D. This is @@ -610,7 +627,7 @@ dict_rename_var (struct dictionary *d, struct variable *v, hsh_force_insert (d->name_tab, v); if (get_algorithm () == ENHANCED) - var_clear_short_name (v); + var_clear_short_names (v); if ( d->callbacks && d->callbacks->var_changed ) d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data); @@ -676,7 +693,7 @@ dict_rename_vars (struct dictionary *d, /* Clear short names. */ if (get_algorithm () == ENHANCED) for (i = 0; i < count; i++) - var_clear_short_name (vars[i]); + var_clear_short_names (vars[i]); pool_destroy (pool); return true; @@ -712,7 +729,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c, double w = case_num (c, d->weight); if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY)) w = 0.0; - if ( w == 0.0 && *warn_on_invalid ) { + if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) { *warn_on_invalid = false; msg (SW, _("At least one case in the data file had a weight value " "that was user-missing, system-missing, zero, or " @@ -830,6 +847,33 @@ dict_compact_values (struct dictionary *d) } } + +/* + Reassigns case indices for D, increasing each index above START by + the value PADDING. +*/ +static void +dict_pad_values (struct dictionary *d, int start, int padding) +{ + size_t i; + + if ( padding <= 0 ) + return; + + for (i = 0; i < d->var_cnt; ++i) + { + struct variable *v = d->var[i]; + + int index = var_get_case_index (v); + + if ( index >= start) + set_var_case_index (v, index + padding); + } + + d->next_value_idx += padding; +} + + /* Returns the number of values that would be used by a case if dict_compact_values() were called. */ size_t @@ -1009,7 +1053,7 @@ dict_compactor_destroy (struct dict_compactor *compactor) dict_get_split_cnt() to determine how many SPLIT FILE vars there are. Returns a null pointer if and only if there are no SPLIT FILE vars. */ -struct variable *const * +const struct variable *const * dict_get_split_vars (const struct dictionary *d) { assert (d != NULL); @@ -1051,7 +1095,7 @@ dict_set_split_vars (struct dictionary *d, assert (cnt == 0 || split != NULL); d->split_cnt = cnt; - d->split = xnrealloc (d->split, cnt, sizeof *d->split); + d->split = cnt > 0 ? xnrealloc (d->split, cnt, sizeof *d->split) : NULL; memcpy (d->split, split, cnt * sizeof *d->split); if ( d->callbacks && d->callbacks->split_changed ) @@ -1089,27 +1133,73 @@ dict_set_label (struct dictionary *d, const char *label) } /* Returns the documents for D, or a null pointer if D has no - documents (see cmd_document()).. */ + documents. If the return value is nonnull, then the string + will be an exact multiple of DOC_LINE_LENGTH bytes in length, + with each segment corresponding to one line. */ const char * dict_get_documents (const struct dictionary *d) { - assert (d != NULL); - - return d->documents; + return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents); } /* Sets the documents for D to DOCUMENTS, or removes D's - documents if DOCUMENT is a null pointer. */ + documents if DOCUMENT is a null pointer. If DOCUMENTS is + nonnull, then it should be an exact multiple of + DOC_LINE_LENGTH bytes in length, with each segment + corresponding to one line. */ void dict_set_documents (struct dictionary *d, const char *documents) { - assert (d != NULL); + size_t remainder; - free (d->documents); - if (documents == NULL) - d->documents = NULL; - else - d->documents = xstrdup (documents); + ds_assign_cstr (&d->documents, documents != NULL ? documents : ""); + + /* In case the caller didn't get it quite right, pad out the + final line with spaces. */ + remainder = ds_length (&d->documents) % DOC_LINE_LENGTH; + if (remainder != 0) + ds_put_char_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder); +} + +/* Drops the documents from dictionary D. */ +void +dict_clear_documents (struct dictionary *d) +{ + ds_clear (&d->documents); +} + +/* Appends LINE to the documents in D. LINE will be truncated or + padded on the right with spaces to make it exactly + DOC_LINE_LENGTH bytes long. */ +void +dict_add_document_line (struct dictionary *d, const char *line) +{ + if (strlen (line) > DOC_LINE_LENGTH) + { + /* Note to translators: "bytes" is correct, not characters */ + msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH); + } + buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH), + DOC_LINE_LENGTH, line); +} + +/* Returns the number of document lines in dictionary D. */ +size_t +dict_get_document_line_cnt (const struct dictionary *d) +{ + return ds_length (&d->documents) / DOC_LINE_LENGTH; +} + +/* Copies document line number IDX from dictionary D into + LINE, trimming off any trailing white space. */ +void +dict_get_document_line (const struct dictionary *d, + size_t idx, struct string *line) +{ + assert (idx < dict_get_document_line_cnt (d)); + ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH, + DOC_LINE_LENGTH)); + ds_rtrim (line, ss_cstr (CC_SPACES)); } /* Creates in D a vector named NAME that contains the CNT @@ -1195,137 +1285,28 @@ dict_clear_vectors (struct dictionary *d) d->vector_cnt = 0; } -/* Compares two strings. */ -static int -compare_strings (const void *a, const void *b, const void *aux UNUSED) -{ - return strcmp (a, b); -} - -/* Hashes a string. */ -static unsigned -hash_string (const void *s, const void *aux UNUSED) -{ - return hsh_hash_string (s); -} - - -/* Sets V's short name to BASE, followed by a suffix of the form - _A, _B, _C, ..., _AA, _AB, etc. according to the value of - SUFFIX_NUMBER. Truncates BASE as necessary to fit. */ -static void -set_var_short_name_suffix (struct variable *v, const char *base, - int suffix_number) -{ - char suffix[SHORT_NAME_LEN + 1]; - char short_name[SHORT_NAME_LEN + 1]; - char *start, *end; - int len, ofs; - - assert (v != NULL); - assert (suffix_number >= 0); - - /* Set base name. */ - var_set_short_name (v, base); - - /* Compose suffix. */ - start = end = suffix + sizeof suffix - 1; - *end = '\0'; - do - { - *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26]; - if (start <= suffix + 1) - msg (SE, _("Variable suffix too large.")); - suffix_number /= 26; - } - while (suffix_number > 0); - *--start = '_'; - - /* Append suffix to V's short name. */ - str_copy_trunc (short_name, sizeof short_name, base); - len = end - start; - if (len + strlen (short_name) > SHORT_NAME_LEN) - ofs = SHORT_NAME_LEN - len; - else - ofs = strlen (short_name); - strcpy (short_name + ofs, start); - - /* Set name. */ - var_set_short_name (v, short_name); -} - -/* Assigns a valid, unique short_name[] to each variable in D. - Each variable whose actual name is short has highest priority - for that short name. Otherwise, variables with an existing - short_name[] have the next highest priority for a given short - name; if it is already taken, then the variable is treated as - if short_name[] had been empty. Otherwise, long names are - truncated to form short names. If that causes conflicts, - variables are renamed as PREFIX_A, PREFIX_B, and so on. */ +/* Called from variable.c to notify the dictionary that some property of + the variable has changed */ void -dict_assign_short_names (struct dictionary *d) +dict_var_changed (const struct variable *v) { - struct hsh_table *short_names; - size_t i; - - /* Give variables whose names are short the corresponding short - names, and clear short_names[] that conflict with a variable - name. */ - for (i = 0; i < d->var_cnt; i++) + if ( var_has_vardict (v)) { - struct variable *v = d->var[i]; - const char *short_name = var_get_short_name (v); - if (strlen (var_get_name (v)) <= SHORT_NAME_LEN) - var_set_short_name (v, var_get_name (v)); - else if (short_name != NULL && dict_lookup_var (d, short_name) != NULL) - var_clear_short_name (v); - } + const struct vardict_info *vdi = var_get_vardict (v); + struct dictionary *d; - /* Each variable with an assigned short_name[] now gets it - unless there is a conflict. */ - short_names = hsh_create (d->var_cnt, compare_strings, hash_string, - NULL, NULL); - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - const char *name = var_get_short_name (v); - if (name != NULL && hsh_insert (short_names, (char *) name) != NULL) - var_clear_short_name (v); - } + d = vdi->dict; - /* Now assign short names to remaining variables. */ - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - const char *name = var_get_short_name (v); - if (name == NULL) - { - /* Form initial short_name from the variable name, then - try _A, _B, ... _AA, _AB, etc., if needed.*/ - int trial = 0; - do - { - if (trial == 0) - var_set_short_name (v, var_get_name (v)); - else - set_var_short_name_suffix (v, var_get_name (v), trial - 1); - - trial++; - } - while (hsh_insert (short_names, (char *) var_get_short_name (v)) - != NULL); - } + if ( d->callbacks && d->callbacks->var_changed ) + d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data); } - - /* Get rid of hash table. */ - hsh_destroy (short_names); } -/* Called from variable.c to notify the dictionary that some property of - the variable has changed */ +/* Called from variable.c to notify the dictionary that the variable's width + has changed */ void -dict_var_changed (const struct variable *v) +dict_var_resized (const struct variable *v, int delta) { if ( var_has_vardict (v)) { @@ -1334,7 +1315,9 @@ dict_var_changed (const struct variable *v) d = vdi->dict; - if ( d->callbacks && d->callbacks->var_changed ) - d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data); + dict_pad_values (d, var_get_case_index(v) + 1, delta); + + if ( d->callbacks && d->callbacks->var_resized ) + d->callbacks->var_resized (d, var_get_dict_index (v), delta, d->cb_data); } }