1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int next_value_idx; /* Index of next `union value' to allocate. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
49 /* Creates and returns a new dictionary. */
53 struct dictionary *d = xmalloc (sizeof *d);
56 d->var_cnt = d->var_cap = 0;
57 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
58 d->next_value_idx = 0;
72 /* Creates and returns a (deep) copy of an existing
75 dict_clone (const struct dictionary *s)
83 for (i = 0; i < s->var_cnt; i++)
84 dict_clone_var (d, s->var[i], s->var[i]->name);
85 d->next_value_idx = s->next_value_idx;
87 d->split_cnt = s->split_cnt;
90 d->split = xmalloc (d->split_cnt * sizeof *d->split);
91 for (i = 0; i < d->split_cnt; i++)
92 d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
95 if (s->weight != NULL)
96 d->weight = dict_lookup_var_assert (d, s->weight->name);
98 if (s->filter != NULL)
99 d->filter = dict_lookup_var_assert (d, s->filter->name);
101 d->case_limit = s->case_limit;
102 dict_set_label (d, dict_get_label (s));
103 dict_set_documents (d, dict_get_documents (s));
105 for (i = 0; i < s->vector_cnt; i++)
106 dict_create_vector (d, s->vector[i]->name,
107 s->vector[i]->var, s->vector[i]->cnt);
112 /* Clears the contents from a dictionary without destroying the
113 dictionary itself. */
115 dict_clear (struct dictionary *d)
117 /* FIXME? Should we really clear case_limit, label, documents?
118 Others are necessarily cleared by deleting all the variables.*/
123 for (i = 0; i < d->var_cnt; i++)
125 struct variable *v = d->var[i];
126 val_labs_destroy (v->val_labs);
132 d->var_cnt = d->var_cap = 0;
133 hsh_clear (d->name_tab);
134 d->next_value_idx = 0;
145 dict_clear_vectors (d);
148 /* Clears a dictionary and destroys it. */
150 dict_destroy (struct dictionary *d)
155 hsh_destroy (d->name_tab);
160 /* Returns the number of variables in D. */
162 dict_get_var_cnt (const struct dictionary *d)
169 /* Returns the variable in D with index IDX, which must be
170 between 0 and the count returned by dict_get_var_cnt(),
173 dict_get_var (const struct dictionary *d, size_t idx)
176 assert (idx < d->var_cnt);
181 /* Sets *VARS to an array of pointers to variables in D and *CNT
182 to the number of variables in *D. By default all variables
183 are returned, but bits may be set in EXCLUDE_CLASSES to
184 exclude ordinary, system, and/or scratch variables. */
186 dict_get_vars (const struct dictionary *d, struct variable ***vars,
187 size_t *cnt, unsigned exclude_classes)
193 assert (vars != NULL);
194 assert (cnt != NULL);
195 assert ((exclude_classes & ~((1u << DC_ORDINARY)
197 | (1u << DC_SCRATCH))) == 0);
200 for (i = 0; i < d->var_cnt; i++)
201 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
204 *vars = xmalloc (count * sizeof **vars);
206 for (i = 0; i < d->var_cnt; i++)
207 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
208 (*vars)[(*cnt)++] = d->var[i];
209 assert (*cnt == count);
212 /* Creates and returns a new variable in D with the given NAME
213 and WIDTH. Returns a null pointer if the given NAME would
214 duplicate that of an existing variable in the dictionary. */
216 dict_create_var (struct dictionary *d, const char *name, int width)
221 assert (name != NULL);
222 assert (strlen (name) >= 1 && strlen (name) <= 8);
223 assert (width >= 0 && width < 256);
225 /* Make sure there's not already a variable by that name. */
226 if (dict_lookup_var (d, name) != NULL)
229 /* Allocate and initialize variable. */
230 v = xmalloc (sizeof *v);
231 strncpy (v->name, name, sizeof v->name);
233 v->index = d->var_cnt;
234 v->type = width == 0 ? NUMERIC : ALPHA;
236 v->fv = d->next_value_idx;
237 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
239 v->reinit = name[0] != '#';
240 v->miss_type = MISSING_NONE;
241 if (v->type == NUMERIC)
243 v->print.type = FMT_F;
249 v->print.type = FMT_A;
250 v->print.w = v->width;
254 v->val_labs = val_labs_create (v->width);
257 /* Update dictionary. */
258 if (d->var_cnt >= d->var_cap)
260 d->var_cap = 8 + 2 * d->var_cap;
261 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
263 d->var[v->index] = v;
265 hsh_force_insert (d->name_tab, v);
266 d->next_value_idx += v->nv;
271 /* Creates and returns a new variable in D with the given NAME
272 and WIDTH. Assert-fails if the given NAME would duplicate
273 that of an existing variable in the dictionary. */
275 dict_create_var_assert (struct dictionary *d, const char *name, int width)
277 struct variable *v = dict_create_var (d, name, width);
282 /* Creates a new variable in D named NAME, as a copy of existing
283 variable OV, which need not be in D or in any dictionary. */
285 dict_clone_var (struct dictionary *d, const struct variable *ov,
292 assert (name != NULL);
293 assert (strlen (name) >= 1 && strlen (name) <= 8);
295 nv = dict_create_var (d, name, ov->width);
300 nv->reinit = ov->reinit;
301 nv->miss_type = ov->miss_type;
302 memcpy (nv->missing, ov->missing, sizeof nv->missing);
303 nv->print = ov->print;
304 nv->write = ov->write;
305 val_labs_destroy (nv->val_labs);
306 nv->val_labs = val_labs_copy (ov->val_labs);
307 if (ov->label != NULL)
308 nv->label = xstrdup (ov->label);
313 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
314 a variable named NEW_NAME is already in D, except that
315 NEW_NAME may be the same as V's existing name. */
317 dict_rename_var (struct dictionary *d, struct variable *v,
318 const char *new_name)
322 assert (new_name != NULL);
323 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
325 if (!strcmp (v->name, new_name))
328 assert (dict_lookup_var (d, new_name) == NULL);
330 hsh_force_delete (d->name_tab, v);
331 strncpy (v->name, new_name, sizeof v->name);
333 hsh_force_insert (d->name_tab, v);
336 /* Returns the variable named NAME in D, or a null pointer if no
337 variable has that name. */
339 dict_lookup_var (const struct dictionary *d, const char *name)
344 assert (name != NULL);
345 assert (strlen (name) >= 1 && strlen (name) <= 8);
347 strncpy (v.name, name, sizeof v.name);
350 return hsh_find (d->name_tab, &v);
353 /* Returns the variable named NAME in D. Assert-fails if no
354 variable has that name. */
356 dict_lookup_var_assert (const struct dictionary *d, const char *name)
358 struct variable *v = dict_lookup_var (d, name);
363 /* Returns nonzero if variable V is in dictionary D. */
365 dict_contains_var (const struct dictionary *d, const struct variable *v)
370 return dict_lookup_var (d, v->name) == v;
373 /* Compares two double pointers to variables, which should point
374 to elements of a struct dictionary's `var' member array. */
376 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
378 struct variable *const *a = a_;
379 struct variable *const *b = b_;
389 /* Deletes variable V from dictionary D and frees V.
391 This is a very bad idea if there might be any pointers to V
392 from outside D. In general, no variable in default_dict
393 should be deleted when any transformations are active, because
394 those transformations might reference the deleted variable.
395 The safest time to delete a variable is just after a procedure
396 has been executed, as done by MODIFY VARS.
398 Pointers to V within D are not a problem, because
399 dict_delete_var() knows to remove V from split variables,
400 weights, filters, etc. */
402 dict_delete_var (struct dictionary *d, struct variable *v)
408 assert (dict_contains_var (d, v));
409 assert (d->var[v->index] == v);
411 /* Remove v from splits, weight, filter variables. */
412 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
414 compare_variable_dblptrs, NULL);
419 dict_clear_vectors (d);
421 /* Remove v from var array. */
423 memmove (d->var + v->index, d->var + v->index + 1,
424 (d->var_cnt - v->index) * sizeof *d->var);
427 for (i = v->index; i < d->var_cnt; i++)
428 d->var[i]->index = i;
430 /* Update name hash. */
431 hsh_force_delete (d->name_tab, v);
434 val_labs_destroy (v->val_labs);
439 /* Deletes the COUNT variables listed in VARS from D. This is
440 unsafe; see the comment on dict_delete_var() for details. */
442 dict_delete_vars (struct dictionary *d,
443 struct variable *const *vars, size_t count)
445 /* FIXME: this can be done in O(count) time, but this algorithm
448 assert (count == 0 || vars != NULL);
451 dict_delete_var (d, *vars++);
454 /* Reorders the variables in D, placing the COUNT variables
455 listed in ORDER in that order at the beginning of D. The
456 other variables in D, if any, retain their relative
459 dict_reorder_vars (struct dictionary *d,
460 struct variable *const *order, size_t count)
462 struct variable **new_var;
466 assert (count == 0 || order != NULL);
467 assert (count <= d->var_cnt);
469 new_var = xmalloc (d->var_cnt * sizeof *new_var);
470 memcpy (new_var, order, count * sizeof *new_var);
471 for (i = 0; i < count; i++)
473 assert (d->var[order[i]->index] != NULL);
474 d->var[order[i]->index] = NULL;
477 for (i = 0; i < d->var_cnt; i++)
478 if (d->var[i] != NULL)
480 assert (count < d->var_cnt);
481 new_var[count] = d->var[i];
482 new_var[count]->index = count;
489 /* Renames COUNT variables specified in VARS to the names given
490 in NEW_NAMES within dictionary D. If the renaming would
491 result in a duplicate variable name, returns zero and stores a
492 name that would be duplicated into *ERR_NAME (if ERR_NAME is
493 non-null). Otherwise, the renaming is successful, and nonzero
496 dict_rename_vars (struct dictionary *d,
497 struct variable **vars, char **new_names,
498 size_t count, char **err_name)
505 assert (count == 0 || vars != NULL);
506 assert (count == 0 || new_names != NULL);
508 old_names = xmalloc (count * sizeof *old_names);
509 for (i = 0; i < count; i++)
511 assert (d->var[vars[i]->index] == vars[i]);
512 hsh_force_delete (d->name_tab, vars[i]);
513 old_names[i] = xstrdup (vars[i]->name);
516 for (i = 0; i < count; i++)
518 assert (new_names[i] != NULL);
519 assert (*new_names[i] != '\0');
520 assert (strlen (new_names[i]) < 9);
521 strcpy (vars[i]->name, new_names[i]);
522 if (hsh_insert (d->name_tab, vars[i]) != NULL)
525 if (err_name != NULL)
526 *err_name = new_names[i];
528 for (i = 0; i < fail_idx; i++)
529 hsh_force_delete (d->name_tab, vars[i]);
531 for (i = 0; i < count; i++)
533 strcpy (vars[i]->name, old_names[i]);
534 hsh_force_insert (d->name_tab, vars[i]);
542 for (i = 0; i < count; i++)
549 /* Returns the weighting variable in dictionary D, or a null
550 pointer if the dictionary is unweighted. */
552 dict_get_weight (const struct dictionary *d)
555 assert (d->weight == NULL || dict_contains_var (d, d->weight));
560 /* Returns the value of D's weighting variable in case C, except
561 that a negative weight is returned as 0. Returns 1 if the
562 dictionary is unweighted. */
564 dict_get_case_weight (const struct dictionary *d, const struct ccase *c)
569 if (d->weight == NULL)
573 double w = c->data[d->weight->fv].f;
580 /* Sets the weighting variable of D to V, or turning off
581 weighting if V is a null pointer. */
583 dict_set_weight (struct dictionary *d, struct variable *v)
586 assert (v == NULL || dict_contains_var (d, v));
587 assert (v == NULL || v->type == NUMERIC);
592 /* Returns the filter variable in dictionary D (see cmd_filter())
593 or a null pointer if the dictionary is unfiltered. */
595 dict_get_filter (const struct dictionary *d)
598 assert (d->filter == NULL || dict_contains_var (d, d->filter));
603 /* Sets V as the filter variable for dictionary D. Passing a
604 null pointer for V turn off filtering. */
606 dict_set_filter (struct dictionary *d, struct variable *v)
609 assert (v == NULL || dict_contains_var (d, v));
614 /* Returns the case limit for dictionary D, or zero if the number
615 of cases is unlimited (see cmd_n()). */
617 dict_get_case_limit (const struct dictionary *d)
621 return d->case_limit;
624 /* Sets CASE_LIMIT as the case limit for dictionary D. Zero for
625 CASE_LIMIT indicates no limit. */
627 dict_set_case_limit (struct dictionary *d, int case_limit)
630 assert (case_limit >= 0);
632 d->case_limit = case_limit;
635 /* Returns the index of the next value to be added to D. This
636 value is the number of `union value's that need to be
637 allocated to store a case for dictionary D. */
639 dict_get_next_value_idx (const struct dictionary *d)
643 return d->next_value_idx;
646 /* Returns the number of bytes needed to store a case for
649 dict_get_case_size (const struct dictionary *d)
653 return sizeof (union value) * dict_get_next_value_idx (d);
656 /* Reassigns values in dictionary D so that fragmentation is
659 dict_compact_values (struct dictionary *d)
663 d->next_value_idx = 0;
664 for (i = 0; i < d->var_cnt; i++)
666 struct variable *v = d->var[i];
668 v->fv = d->next_value_idx;
669 d->next_value_idx += v->nv;
673 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
674 dict_get_split_cnt() to determine how many SPLIT FILE vars
675 there are. Returns a null pointer if and only if there are no
677 struct variable *const *
678 dict_get_split_vars (const struct dictionary *d)
685 /* Returns the number of SPLIT FILE vars. */
687 dict_get_split_cnt (const struct dictionary *d)
694 /* Sets CNT split vars SPLIT in dictionary D. */
696 dict_set_split_vars (struct dictionary *d,
697 struct variable *const *split, size_t cnt)
700 assert (cnt == 0 || split != NULL);
703 d->split = xrealloc (d->split, cnt * sizeof *d->split);
704 memcpy (d->split, split, cnt * sizeof *d->split);
707 /* Returns the file label for D, or a null pointer if D is
708 unlabeled (see cmd_file_label()). */
710 dict_get_label (const struct dictionary *d)
717 /* Sets D's file label to LABEL, truncating it to a maximum of 60
720 dict_set_label (struct dictionary *d, const char *label)
727 else if (strlen (label) < 60)
728 d->label = xstrdup (label);
731 d->label = xmalloc (61);
732 memcpy (d->label, label, 60);
737 /* Returns the documents for D, or a null pointer if D has no
738 documents (see cmd_document()).. */
740 dict_get_documents (const struct dictionary *d)
747 /* Sets the documents for D to DOCUMENTS, or removes D's
748 documents if DOCUMENT is a null pointer. */
750 dict_set_documents (struct dictionary *d, const char *documents)
755 if (documents == NULL)
758 d->documents = xstrdup (documents);
761 /* Creates in D a vector named NAME that contains CNT variables
762 VAR (see cmd_vector()). Returns nonzero if successful, or
763 zero if a vector named NAME already exists in D. */
765 dict_create_vector (struct dictionary *d,
767 struct variable **var, size_t cnt)
769 struct vector *vector;
772 assert (name != NULL);
773 assert (strlen (name) > 0 && strlen (name) < 9);
774 assert (var != NULL);
777 if (dict_lookup_vector (d, name) != NULL)
780 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
781 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
782 vector->idx = d->vector_cnt++;
783 strncpy (vector->name, name, 8);
784 vector->name[8] = '\0';
785 vector->var = xmalloc (cnt * sizeof *var);
786 memcpy (vector->var, var, cnt * sizeof *var);
792 /* Returns the vector in D with index IDX, which must be less
793 than dict_get_vector_cnt (D). */
794 const struct vector *
795 dict_get_vector (const struct dictionary *d, size_t idx)
798 assert (idx < d->vector_cnt);
800 return d->vector[idx];
803 /* Returns the number of vectors in D. */
805 dict_get_vector_cnt (const struct dictionary *d)
809 return d->vector_cnt;
812 /* Looks up and returns the vector within D with the given
814 const struct vector *
815 dict_lookup_vector (const struct dictionary *d, const char *name)
820 assert (name != NULL);
822 for (i = 0; i < d->vector_cnt; i++)
823 if (!strcmp (d->vector[i]->name, name))
828 /* Deletes all vectors from D. */
830 dict_clear_vectors (struct dictionary *d)
836 for (i = 0; i < d->vector_cnt; i++)
838 free (d->vector[i]->var);