1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int next_value_idx; /* Index of next `union value' to allocate. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
49 /* Creates and returns a new dictionary. */
53 struct dictionary *d = xmalloc (sizeof *d);
56 d->var_cnt = d->var_cap = 0;
57 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
58 d->next_value_idx = 0;
72 /* Creates and returns a (deep) copy of an existing
75 dict_clone (const struct dictionary *s)
83 for (i = 0; i < s->var_cnt; i++)
84 dict_clone_var (d, s->var[i], s->var[i]->name);
85 d->next_value_idx = s->next_value_idx;
87 d->split_cnt = s->split_cnt;
90 d->split = xmalloc (d->split_cnt * sizeof *d->split);
91 for (i = 0; i < d->split_cnt; i++)
92 d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
95 if (s->weight != NULL)
96 d->weight = dict_lookup_var_assert (d, s->weight->name);
98 if (s->filter != NULL)
99 d->filter = dict_lookup_var_assert (d, s->filter->name);
101 d->case_limit = s->case_limit;
102 dict_set_label (d, dict_get_label (s));
103 dict_set_documents (d, dict_get_documents (s));
105 for (i = 0; i < s->vector_cnt; i++)
106 dict_create_vector (d, s->vector[i]->name,
107 s->vector[i]->var, s->vector[i]->cnt);
112 /* Clears the contents from a dictionary without destroying the
113 dictionary itself. */
115 dict_clear (struct dictionary *d)
117 /* FIXME? Should we really clear case_limit, label, documents?
118 Others are necessarily cleared by deleting all the variables.*/
123 for (i = 0; i < d->var_cnt; i++)
125 struct variable *v = d->var[i];
126 val_labs_destroy (v->val_labs);
132 d->var_cnt = d->var_cap = 0;
133 hsh_clear (d->name_tab);
134 d->next_value_idx = 0;
145 dict_clear_vectors (d);
148 /* Clears a dictionary and destroys it. */
150 dict_destroy (struct dictionary *d)
155 hsh_destroy (d->name_tab);
160 /* Returns the number of variables in D. */
162 dict_get_var_cnt (const struct dictionary *d)
169 /* Returns the variable in D with index IDX, which must be
170 between 0 and the count returned by dict_get_var_cnt(),
173 dict_get_var (const struct dictionary *d, size_t idx)
176 assert (idx < d->var_cnt);
181 /* Sets *VARS to an array of pointers to variables in D and *CNT
182 to the number of variables in *D. By default all variables
183 are returned, but bits may be set in EXCLUDE_CLASSES to
184 exclude ordinary, system, and/or scratch variables. */
186 dict_get_vars (const struct dictionary *d, struct variable ***vars,
187 size_t *cnt, unsigned exclude_classes)
193 assert (vars != NULL);
194 assert (cnt != NULL);
195 assert ((exclude_classes & ~((1u << DC_ORDINARY)
197 | (1u << DC_SCRATCH))) == 0);
200 for (i = 0; i < d->var_cnt; i++)
201 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
204 *vars = xmalloc (count * sizeof **vars);
206 for (i = 0; i < d->var_cnt; i++)
207 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
208 (*vars)[(*cnt)++] = d->var[i];
209 assert (*cnt == count);
212 /* Creates and returns a new variable in D with the given NAME
213 and WIDTH. Returns a null pointer if the given NAME would
214 duplicate that of an existing variable in the dictionary. */
216 dict_create_var (struct dictionary *d, const char *name, int width)
221 assert (name != NULL);
222 assert (strlen (name) >= 1 && strlen (name) <= 8);
223 assert (width >= 0 && width < 256);
225 /* Make sure there's not already a variable by that name. */
226 if (dict_lookup_var (d, name) != NULL)
229 /* Allocate and initialize variable. */
230 v = xmalloc (sizeof *v);
231 strncpy (v->name, name, sizeof v->name);
233 v->index = d->var_cnt;
234 v->type = width == 0 ? NUMERIC : ALPHA;
236 v->fv = d->next_value_idx;
237 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
239 v->reinit = dict_class_from_id (name) != DC_SCRATCH;
240 v->miss_type = MISSING_NONE;
241 if (v->type == NUMERIC)
243 v->print.type = FMT_F;
249 v->print.type = FMT_A;
250 v->print.w = v->width;
254 v->val_labs = val_labs_create (v->width);
257 /* Update dictionary. */
258 if (d->var_cnt >= d->var_cap)
260 d->var_cap = 8 + 2 * d->var_cap;
261 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
263 d->var[v->index] = v;
265 hsh_force_insert (d->name_tab, v);
266 d->next_value_idx += v->nv;
271 /* Creates and returns a new variable in D with the given NAME
272 and WIDTH. Assert-fails if the given NAME would duplicate
273 that of an existing variable in the dictionary. */
275 dict_create_var_assert (struct dictionary *d, const char *name, int width)
277 struct variable *v = dict_create_var (d, name, width);
282 /* Creates a new variable in D named NAME, as a copy of existing
283 variable OV, which need not be in D or in any dictionary. */
285 dict_clone_var (struct dictionary *d, const struct variable *ov,
292 assert (name != NULL);
293 assert (strlen (name) >= 1 && strlen (name) <= 8);
295 nv = dict_create_var (d, name, ov->width);
300 nv->reinit = ov->reinit;
301 nv->miss_type = ov->miss_type;
302 memcpy (nv->missing, ov->missing, sizeof nv->missing);
303 nv->print = ov->print;
304 nv->write = ov->write;
305 val_labs_destroy (nv->val_labs);
306 nv->val_labs = val_labs_copy (ov->val_labs);
307 if (ov->label != NULL)
308 nv->label = xstrdup (ov->label);
313 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
314 a variable named NEW_NAME is already in D, except that
315 NEW_NAME may be the same as V's existing name. */
317 dict_rename_var (struct dictionary *d, struct variable *v,
318 const char *new_name)
322 assert (new_name != NULL);
323 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
324 assert (dict_contains_var (d, v));
326 if (!strcmp (v->name, new_name))
329 assert (dict_lookup_var (d, new_name) == NULL);
331 hsh_force_delete (d->name_tab, v);
332 strncpy (v->name, new_name, sizeof v->name);
334 hsh_force_insert (d->name_tab, v);
337 /* Returns the variable named NAME in D, or a null pointer if no
338 variable has that name. */
340 dict_lookup_var (const struct dictionary *d, const char *name)
345 assert (name != NULL);
346 assert (strlen (name) >= 1 && strlen (name) <= 8);
348 strncpy (v.name, name, sizeof v.name);
351 return hsh_find (d->name_tab, &v);
354 /* Returns the variable named NAME in D. Assert-fails if no
355 variable has that name. */
357 dict_lookup_var_assert (const struct dictionary *d, const char *name)
359 struct variable *v = dict_lookup_var (d, name);
364 /* Returns nonzero if variable V is in dictionary D. */
366 dict_contains_var (const struct dictionary *d, const struct variable *v)
371 return v->index >= 0 && v->index < d->var_cnt && d->var[v->index] == v;
374 /* Compares two double pointers to variables, which should point
375 to elements of a struct dictionary's `var' member array. */
377 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
379 struct variable *const *a = a_;
380 struct variable *const *b = b_;
390 /* Deletes variable V from dictionary D and frees V.
392 This is a very bad idea if there might be any pointers to V
393 from outside D. In general, no variable in default_dict
394 should be deleted when any transformations are active, because
395 those transformations might reference the deleted variable.
396 The safest time to delete a variable is just after a procedure
397 has been executed, as done by MODIFY VARS.
399 Pointers to V within D are not a problem, because
400 dict_delete_var() knows to remove V from split variables,
401 weights, filters, etc. */
403 dict_delete_var (struct dictionary *d, struct variable *v)
409 assert (dict_contains_var (d, v));
410 assert (d->var[v->index] == v);
412 /* Remove v from splits, weight, filter variables. */
413 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
415 compare_variable_dblptrs, NULL);
420 dict_clear_vectors (d);
422 /* Remove v from var array. */
424 memmove (d->var + v->index, d->var + v->index + 1,
425 (d->var_cnt - v->index) * sizeof *d->var);
428 for (i = v->index; i < d->var_cnt; i++)
429 d->var[i]->index = i;
431 /* Update name hash. */
432 hsh_force_delete (d->name_tab, v);
435 val_labs_destroy (v->val_labs);
440 /* Deletes the COUNT variables listed in VARS from D. This is
441 unsafe; see the comment on dict_delete_var() for details. */
443 dict_delete_vars (struct dictionary *d,
444 struct variable *const *vars, size_t count)
446 /* FIXME: this can be done in O(count) time, but this algorithm
449 assert (count == 0 || vars != NULL);
452 dict_delete_var (d, *vars++);
455 /* Reorders the variables in D, placing the COUNT variables
456 listed in ORDER in that order at the beginning of D. The
457 other variables in D, if any, retain their relative
460 dict_reorder_vars (struct dictionary *d,
461 struct variable *const *order, size_t count)
463 struct variable **new_var;
467 assert (count == 0 || order != NULL);
468 assert (count <= d->var_cnt);
470 new_var = xmalloc (d->var_cnt * sizeof *new_var);
471 memcpy (new_var, order, count * sizeof *new_var);
472 for (i = 0; i < count; i++)
474 assert (d->var[order[i]->index] != NULL);
475 d->var[order[i]->index] = NULL;
478 for (i = 0; i < d->var_cnt; i++)
479 if (d->var[i] != NULL)
481 assert (count < d->var_cnt);
482 new_var[count] = d->var[i];
483 new_var[count]->index = count;
490 /* Renames COUNT variables specified in VARS to the names given
491 in NEW_NAMES within dictionary D. If the renaming would
492 result in a duplicate variable name, returns zero and stores a
493 name that would be duplicated into *ERR_NAME (if ERR_NAME is
494 non-null). Otherwise, the renaming is successful, and nonzero
497 dict_rename_vars (struct dictionary *d,
498 struct variable **vars, char **new_names,
499 size_t count, char **err_name)
506 assert (count == 0 || vars != NULL);
507 assert (count == 0 || new_names != NULL);
509 old_names = xmalloc (count * sizeof *old_names);
510 for (i = 0; i < count; i++)
512 assert (d->var[vars[i]->index] == vars[i]);
513 hsh_force_delete (d->name_tab, vars[i]);
514 old_names[i] = xstrdup (vars[i]->name);
517 for (i = 0; i < count; i++)
519 assert (new_names[i] != NULL);
520 assert (*new_names[i] != '\0');
521 assert (strlen (new_names[i]) < 9);
522 strcpy (vars[i]->name, new_names[i]);
523 if (hsh_insert (d->name_tab, vars[i]) != NULL)
526 if (err_name != NULL)
527 *err_name = new_names[i];
529 for (i = 0; i < fail_idx; i++)
530 hsh_force_delete (d->name_tab, vars[i]);
532 for (i = 0; i < count; i++)
534 strcpy (vars[i]->name, old_names[i]);
535 hsh_force_insert (d->name_tab, vars[i]);
543 for (i = 0; i < count; i++)
550 /* Returns the weighting variable in dictionary D, or a null
551 pointer if the dictionary is unweighted. */
553 dict_get_weight (const struct dictionary *d)
556 assert (d->weight == NULL || dict_contains_var (d, d->weight));
561 /* Returns the value of D's weighting variable in case C, except that a
562 negative weight is returned as 0. Returns 1 if the dictionary is
563 unweighted. Will warn about missing, negative, or zero values if
564 warn_on_invalid is nonzero. The function will set warn_on_invalid to zero
565 if an invalid weight is found. */
567 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
568 int *warn_on_invalid)
573 if (d->weight == NULL)
577 double w = c->data[d->weight->fv].f;
578 if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) )
580 if ( w == 0.0 && *warn_on_invalid ) {
581 *warn_on_invalid = 0;
582 msg (SW, _("At least one case in the data file had a weight value "
583 "that was user-missing, system-missing, zero, or "
584 "negative. These case(s) were ignored."));
590 /* Sets the weighting variable of D to V, or turning off
591 weighting if V is a null pointer. */
593 dict_set_weight (struct dictionary *d, struct variable *v)
596 assert (v == NULL || dict_contains_var (d, v));
597 assert (v == NULL || v->type == NUMERIC);
602 /* Returns the filter variable in dictionary D (see cmd_filter())
603 or a null pointer if the dictionary is unfiltered. */
605 dict_get_filter (const struct dictionary *d)
608 assert (d->filter == NULL || dict_contains_var (d, d->filter));
613 /* Sets V as the filter variable for dictionary D. Passing a
614 null pointer for V turn off filtering. */
616 dict_set_filter (struct dictionary *d, struct variable *v)
619 assert (v == NULL || dict_contains_var (d, v));
624 /* Returns the case limit for dictionary D, or zero if the number
625 of cases is unlimited (see cmd_n()). */
627 dict_get_case_limit (const struct dictionary *d)
631 return d->case_limit;
634 /* Sets CASE_LIMIT as the case limit for dictionary D. Zero for
635 CASE_LIMIT indicates no limit. */
637 dict_set_case_limit (struct dictionary *d, int case_limit)
640 assert (case_limit >= 0);
642 d->case_limit = case_limit;
645 /* Returns the index of the next value to be added to D. This
646 value is the number of `union value's that need to be
647 allocated to store a case for dictionary D. */
649 dict_get_next_value_idx (const struct dictionary *d)
653 return d->next_value_idx;
656 /* Returns the number of bytes needed to store a case for
659 dict_get_case_size (const struct dictionary *d)
663 return sizeof (union value) * dict_get_next_value_idx (d);
666 /* Deletes scratch variables in dictionary D and reassigns values
667 so that fragmentation is eliminated. */
669 dict_compact_values (struct dictionary *d)
673 d->next_value_idx = 0;
674 for (i = 0; i < d->var_cnt; )
676 struct variable *v = d->var[i];
678 if (dict_class_from_id (v->name) != DC_SCRATCH)
680 v->fv = d->next_value_idx;
681 d->next_value_idx += v->nv;
685 dict_delete_var (default_dict, v);
689 /* Returns the number of values that would be used by a case if
690 dict_compact_values() were called. */
692 dict_get_compacted_value_cnt (const struct dictionary *d)
698 for (i = 0; i < d->var_cnt; i++)
699 if (dict_class_from_id (d->var[i]->name) != DC_SCRATCH)
700 cnt += d->var[i]->nv;
704 /* Creates and returns an array mapping from a dictionary index
705 to the `fv' that the corresponding variable will have after
706 calling dict_compact_values(). Scratch variables receive -1
707 for `fv' because dict_compact_values() will delete them. */
709 dict_get_compacted_idx_to_fv (const struct dictionary *d)
712 size_t next_value_idx;
715 idx_to_fv = xmalloc (d->var_cnt * sizeof *idx_to_fv);
717 for (i = 0; i < d->var_cnt; i++)
719 struct variable *v = d->var[i];
721 if (dict_class_from_id (v->name) != DC_SCRATCH)
723 idx_to_fv[i] = next_value_idx;
724 next_value_idx += v->nv;
732 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
733 dict_get_split_cnt() to determine how many SPLIT FILE vars
734 there are. Returns a null pointer if and only if there are no
736 struct variable *const *
737 dict_get_split_vars (const struct dictionary *d)
744 /* Returns the number of SPLIT FILE vars. */
746 dict_get_split_cnt (const struct dictionary *d)
753 /* Sets CNT split vars SPLIT in dictionary D. */
755 dict_set_split_vars (struct dictionary *d,
756 struct variable *const *split, size_t cnt)
759 assert (cnt == 0 || split != NULL);
762 d->split = xrealloc (d->split, cnt * sizeof *d->split);
763 memcpy (d->split, split, cnt * sizeof *d->split);
766 /* Returns the file label for D, or a null pointer if D is
767 unlabeled (see cmd_file_label()). */
769 dict_get_label (const struct dictionary *d)
776 /* Sets D's file label to LABEL, truncating it to a maximum of 60
779 dict_set_label (struct dictionary *d, const char *label)
786 else if (strlen (label) < 60)
787 d->label = xstrdup (label);
790 d->label = xmalloc (61);
791 memcpy (d->label, label, 60);
796 /* Returns the documents for D, or a null pointer if D has no
797 documents (see cmd_document()).. */
799 dict_get_documents (const struct dictionary *d)
806 /* Sets the documents for D to DOCUMENTS, or removes D's
807 documents if DOCUMENT is a null pointer. */
809 dict_set_documents (struct dictionary *d, const char *documents)
814 if (documents == NULL)
817 d->documents = xstrdup (documents);
820 /* Creates in D a vector named NAME that contains CNT variables
821 VAR (see cmd_vector()). Returns nonzero if successful, or
822 zero if a vector named NAME already exists in D. */
824 dict_create_vector (struct dictionary *d,
826 struct variable **var, size_t cnt)
828 struct vector *vector;
831 assert (name != NULL);
832 assert (strlen (name) > 0 && strlen (name) < 9);
833 assert (var != NULL);
836 if (dict_lookup_vector (d, name) != NULL)
839 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
840 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
841 vector->idx = d->vector_cnt++;
842 strncpy (vector->name, name, 8);
843 vector->name[8] = '\0';
844 vector->var = xmalloc (cnt * sizeof *var);
845 memcpy (vector->var, var, cnt * sizeof *var);
851 /* Returns the vector in D with index IDX, which must be less
852 than dict_get_vector_cnt (D). */
853 const struct vector *
854 dict_get_vector (const struct dictionary *d, size_t idx)
857 assert (idx < d->vector_cnt);
859 return d->vector[idx];
862 /* Returns the number of vectors in D. */
864 dict_get_vector_cnt (const struct dictionary *d)
868 return d->vector_cnt;
871 /* Looks up and returns the vector within D with the given
873 const struct vector *
874 dict_lookup_vector (const struct dictionary *d, const char *name)
879 assert (name != NULL);
881 for (i = 0; i < d->vector_cnt; i++)
882 if (!strcmp (d->vector[i]->name, name))
887 /* Deletes all vectors from D. */
889 dict_clear_vectors (struct dictionary *d)
895 for (i = 0; i < d->vector_cnt; i++)
897 free (d->vector[i]->var);