1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int next_value_idx; /* Index of next `union value' to allocate. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
49 /* Creates and returns a new dictionary. */
53 struct dictionary *d = xmalloc (sizeof *d);
56 d->var_cnt = d->var_cap = 0;
57 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
58 d->next_value_idx = 0;
72 /* Creates and returns a (deep) copy of an existing
75 dict_clone (const struct dictionary *s)
83 for (i = 0; i < s->var_cnt; i++)
84 dict_clone_var (d, s->var[i], s->var[i]->name);
85 d->next_value_idx = s->next_value_idx;
87 d->split_cnt = s->split_cnt;
90 d->split = xmalloc (d->split_cnt * sizeof *d->split);
91 for (i = 0; i < d->split_cnt; i++)
92 d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
95 if (s->weight != NULL)
96 d->weight = dict_lookup_var_assert (d, s->weight->name);
98 if (s->filter != NULL)
99 d->filter = dict_lookup_var_assert (d, s->filter->name);
101 d->case_limit = s->case_limit;
102 dict_set_label (d, dict_get_label (s));
103 dict_set_documents (d, dict_get_documents (s));
105 for (i = 0; i < s->vector_cnt; i++)
106 dict_create_vector (d, s->vector[i]->name,
107 s->vector[i]->var, s->vector[i]->cnt);
112 /* Clears the contents from a dictionary without destroying the
113 dictionary itself. */
115 dict_clear (struct dictionary *d)
117 /* FIXME? Should we really clear case_limit, label, documents?
118 Others are necessarily cleared by deleting all the variables.*/
123 for (i = 0; i < d->var_cnt; i++)
125 struct variable *v = d->var[i];
126 val_labs_destroy (v->val_labs);
132 d->var_cnt = d->var_cap = 0;
133 hsh_clear (d->name_tab);
134 d->next_value_idx = 0;
145 dict_clear_vectors (d);
148 /* Clears a dictionary and destroys it. */
150 dict_destroy (struct dictionary *d)
155 hsh_destroy (d->name_tab);
160 /* Returns the number of variables in D. */
162 dict_get_var_cnt (const struct dictionary *d)
169 /* Returns the variable in D with index IDX, which must be
170 between 0 and the count returned by dict_get_var_cnt(),
173 dict_get_var (const struct dictionary *d, size_t idx)
176 assert (idx < d->var_cnt);
181 /* Sets *VARS to an array of pointers to variables in D and *CNT
182 to the number of variables in *D. By default all variables
183 are returned, but bits may be set in EXCLUDE_CLASSES to
184 exclude ordinary, system, and/or scratch variables. */
186 dict_get_vars (const struct dictionary *d, struct variable ***vars,
187 size_t *cnt, unsigned exclude_classes)
193 assert (vars != NULL);
194 assert (cnt != NULL);
195 assert ((exclude_classes & ~((1u << DC_ORDINARY)
197 | (1u << DC_SCRATCH))) == 0);
200 for (i = 0; i < d->var_cnt; i++)
201 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
204 *vars = xmalloc (count * sizeof **vars);
206 for (i = 0; i < d->var_cnt; i++)
207 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
208 (*vars)[(*cnt)++] = d->var[i];
209 assert (*cnt == count);
212 /* Creates and returns a new variable in D with the given NAME
213 and WIDTH. Returns a null pointer if the given NAME would
214 duplicate that of an existing variable in the dictionary. */
216 dict_create_var (struct dictionary *d, const char *name, int width)
221 assert (name != NULL);
222 assert (strlen (name) >= 1 && strlen (name) <= 8);
223 assert (width >= 0 && width < 256);
225 /* Make sure there's not already a variable by that name. */
226 if (dict_lookup_var (d, name) != NULL)
229 /* Allocate and initialize variable. */
230 v = xmalloc (sizeof *v);
231 strncpy (v->name, name, sizeof v->name);
233 v->index = d->var_cnt;
234 v->type = width == 0 ? NUMERIC : ALPHA;
236 v->fv = d->next_value_idx;
237 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
239 v->reinit = dict_class_from_id (name) != DC_SCRATCH;
240 v->miss_type = MISSING_NONE;
241 if (v->type == NUMERIC)
243 v->print.type = FMT_F;
249 v->print.type = FMT_A;
250 v->print.w = v->width;
254 v->val_labs = val_labs_create (v->width);
257 /* Update dictionary. */
258 if (d->var_cnt >= d->var_cap)
260 d->var_cap = 8 + 2 * d->var_cap;
261 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
263 d->var[v->index] = v;
265 hsh_force_insert (d->name_tab, v);
266 d->next_value_idx += v->nv;
271 /* Creates and returns a new variable in D with the given NAME
272 and WIDTH. Assert-fails if the given NAME would duplicate
273 that of an existing variable in the dictionary. */
275 dict_create_var_assert (struct dictionary *d, const char *name, int width)
277 struct variable *v = dict_create_var (d, name, width);
282 /* Creates a new variable in D named NAME, as a copy of existing
283 variable OV, which need not be in D or in any dictionary. */
285 dict_clone_var (struct dictionary *d, const struct variable *ov,
292 assert (name != NULL);
293 assert (strlen (name) >= 1 && strlen (name) <= 8);
295 nv = dict_create_var (d, name, ov->width);
300 nv->reinit = ov->reinit;
301 nv->miss_type = ov->miss_type;
302 memcpy (nv->missing, ov->missing, sizeof nv->missing);
303 nv->print = ov->print;
304 nv->write = ov->write;
305 val_labs_destroy (nv->val_labs);
306 nv->val_labs = val_labs_copy (ov->val_labs);
307 if (ov->label != NULL)
308 nv->label = xstrdup (ov->label);
313 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
314 a variable named NEW_NAME is already in D, except that
315 NEW_NAME may be the same as V's existing name. */
317 dict_rename_var (struct dictionary *d, struct variable *v,
318 const char *new_name)
322 assert (new_name != NULL);
323 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
324 assert (dict_contains_var (d, v));
326 if (!strcmp (v->name, new_name))
329 assert (dict_lookup_var (d, new_name) == NULL);
331 hsh_force_delete (d->name_tab, v);
332 strncpy (v->name, new_name, sizeof v->name);
334 hsh_force_insert (d->name_tab, v);
337 /* Returns the variable named NAME in D, or a null pointer if no
338 variable has that name. */
340 dict_lookup_var (const struct dictionary *d, const char *name)
345 assert (name != NULL);
346 assert (strlen (name) >= 1 && strlen (name) <= 8);
348 strncpy (v.name, name, sizeof v.name);
351 return hsh_find (d->name_tab, &v);
354 /* Returns the variable named NAME in D. Assert-fails if no
355 variable has that name. */
357 dict_lookup_var_assert (const struct dictionary *d, const char *name)
359 struct variable *v = dict_lookup_var (d, name);
364 /* Returns nonzero if variable V is in dictionary D. */
366 dict_contains_var (const struct dictionary *d, const struct variable *v)
371 return v->index >= 0 && v->index < d->var_cnt && d->var[v->index] == v;
374 /* Compares two double pointers to variables, which should point
375 to elements of a struct dictionary's `var' member array. */
377 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
379 struct variable *const *a = a_;
380 struct variable *const *b = b_;
390 /* Deletes variable V from dictionary D and frees V.
392 This is a very bad idea if there might be any pointers to V
393 from outside D. In general, no variable in default_dict
394 should be deleted when any transformations are active, because
395 those transformations might reference the deleted variable.
396 The safest time to delete a variable is just after a procedure
397 has been executed, as done by MODIFY VARS.
399 Pointers to V within D are not a problem, because
400 dict_delete_var() knows to remove V from split variables,
401 weights, filters, etc. */
403 dict_delete_var (struct dictionary *d, struct variable *v)
409 assert (dict_contains_var (d, v));
410 assert (d->var[v->index] == v);
412 /* Remove v from splits, weight, filter variables. */
413 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
415 compare_variable_dblptrs, NULL);
420 dict_clear_vectors (d);
422 /* Remove v from var array. */
424 memmove (d->var + v->index, d->var + v->index + 1,
425 (d->var_cnt - v->index) * sizeof *d->var);
428 for (i = v->index; i < d->var_cnt; i++)
429 d->var[i]->index = i;
431 /* Update name hash. */
432 hsh_force_delete (d->name_tab, v);
435 val_labs_destroy (v->val_labs);
440 /* Deletes the COUNT variables listed in VARS from D. This is
441 unsafe; see the comment on dict_delete_var() for details. */
443 dict_delete_vars (struct dictionary *d,
444 struct variable *const *vars, size_t count)
446 /* FIXME: this can be done in O(count) time, but this algorithm
449 assert (count == 0 || vars != NULL);
452 dict_delete_var (d, *vars++);
455 /* Reorders the variables in D, placing the COUNT variables
456 listed in ORDER in that order at the beginning of D. The
457 other variables in D, if any, retain their relative
460 dict_reorder_vars (struct dictionary *d,
461 struct variable *const *order, size_t count)
463 struct variable **new_var;
467 assert (count == 0 || order != NULL);
468 assert (count <= d->var_cnt);
470 new_var = xmalloc (d->var_cnt * sizeof *new_var);
471 memcpy (new_var, order, count * sizeof *new_var);
472 for (i = 0; i < count; i++)
474 assert (d->var[order[i]->index] != NULL);
475 d->var[order[i]->index] = NULL;
478 for (i = 0; i < d->var_cnt; i++)
479 if (d->var[i] != NULL)
481 assert (count < d->var_cnt);
482 new_var[count] = d->var[i];
483 new_var[count]->index = count;
490 /* Renames COUNT variables specified in VARS to the names given
491 in NEW_NAMES within dictionary D. If the renaming would
492 result in a duplicate variable name, returns zero and stores a
493 name that would be duplicated into *ERR_NAME (if ERR_NAME is
494 non-null). Otherwise, the renaming is successful, and nonzero
497 dict_rename_vars (struct dictionary *d,
498 struct variable **vars, char **new_names,
499 size_t count, char **err_name)
506 assert (count == 0 || vars != NULL);
507 assert (count == 0 || new_names != NULL);
509 old_names = xmalloc (count * sizeof *old_names);
510 for (i = 0; i < count; i++)
512 assert (d->var[vars[i]->index] == vars[i]);
513 hsh_force_delete (d->name_tab, vars[i]);
514 old_names[i] = xstrdup (vars[i]->name);
517 for (i = 0; i < count; i++)
519 assert (new_names[i] != NULL);
520 assert (*new_names[i] != '\0');
521 assert (strlen (new_names[i]) < 9);
522 strcpy (vars[i]->name, new_names[i]);
523 if (hsh_insert (d->name_tab, vars[i]) != NULL)
526 if (err_name != NULL)
527 *err_name = new_names[i];
529 for (i = 0; i < fail_idx; i++)
530 hsh_force_delete (d->name_tab, vars[i]);
532 for (i = 0; i < count; i++)
534 strcpy (vars[i]->name, old_names[i]);
535 hsh_force_insert (d->name_tab, vars[i]);
543 for (i = 0; i < count; i++)
550 /* Returns the weighting variable in dictionary D, or a null
551 pointer if the dictionary is unweighted. */
553 dict_get_weight (const struct dictionary *d)
556 assert (d->weight == NULL || dict_contains_var (d, d->weight));
561 /* Returns the value of D's weighting variable in case C, except
562 that a negative weight is returned as 0. Returns 1 if the
563 dictionary is unweighted. */
565 dict_get_case_weight (const struct dictionary *d, const struct ccase *c)
570 if (d->weight == NULL)
574 double w = c->data[d->weight->fv].f;
581 /* Sets the weighting variable of D to V, or turning off
582 weighting if V is a null pointer. */
584 dict_set_weight (struct dictionary *d, struct variable *v)
587 assert (v == NULL || dict_contains_var (d, v));
588 assert (v == NULL || v->type == NUMERIC);
593 /* Returns the filter variable in dictionary D (see cmd_filter())
594 or a null pointer if the dictionary is unfiltered. */
596 dict_get_filter (const struct dictionary *d)
599 assert (d->filter == NULL || dict_contains_var (d, d->filter));
604 /* Sets V as the filter variable for dictionary D. Passing a
605 null pointer for V turn off filtering. */
607 dict_set_filter (struct dictionary *d, struct variable *v)
610 assert (v == NULL || dict_contains_var (d, v));
615 /* Returns the case limit for dictionary D, or zero if the number
616 of cases is unlimited (see cmd_n()). */
618 dict_get_case_limit (const struct dictionary *d)
622 return d->case_limit;
625 /* Sets CASE_LIMIT as the case limit for dictionary D. Zero for
626 CASE_LIMIT indicates no limit. */
628 dict_set_case_limit (struct dictionary *d, int case_limit)
631 assert (case_limit >= 0);
633 d->case_limit = case_limit;
636 /* Returns the index of the next value to be added to D. This
637 value is the number of `union value's that need to be
638 allocated to store a case for dictionary D. */
640 dict_get_next_value_idx (const struct dictionary *d)
644 return d->next_value_idx;
647 /* Returns the number of bytes needed to store a case for
650 dict_get_case_size (const struct dictionary *d)
654 return sizeof (union value) * dict_get_next_value_idx (d);
657 /* Deletes scratch variables in dictionary D and reassigns values
658 so that fragmentation is eliminated. */
660 dict_compact_values (struct dictionary *d)
664 for (i = 0; i < d->var_cnt; )
666 struct variable *v = d->var[i];
668 if (dict_class_from_id (v->name) != DC_SCRATCH)
670 v->fv = d->next_value_idx;
671 d->next_value_idx += v->nv;
675 dict_delete_var (default_dict, v);
679 /* Returns the number of values that would be used by a case if
680 dict_compact_values() were called. */
682 dict_get_compacted_value_cnt (const struct dictionary *d)
688 for (i = 0; i < d->var_cnt; i++)
689 if (dict_class_from_id (d->var[i]->name) != DC_SCRATCH)
690 cnt += d->var[i]->nv;
694 /* Creates and returns an array mapping from a dictionary index
695 to the `fv' that the corresponding variable will have after
696 calling dict_compact_values(). Scratch variables receive -1
697 for `fv' because dict_compact_values() will delete them. */
699 dict_get_compacted_idx_to_fv (const struct dictionary *d)
702 size_t next_value_idx;
705 idx_to_fv = xmalloc (d->var_cnt * sizeof *idx_to_fv);
707 for (i = 0; i < d->var_cnt; i++)
709 struct variable *v = d->var[i];
711 if (dict_class_from_id (v->name) != DC_SCRATCH)
713 idx_to_fv[i] = next_value_idx;
714 next_value_idx += v->nv;
722 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
723 dict_get_split_cnt() to determine how many SPLIT FILE vars
724 there are. Returns a null pointer if and only if there are no
726 struct variable *const *
727 dict_get_split_vars (const struct dictionary *d)
734 /* Returns the number of SPLIT FILE vars. */
736 dict_get_split_cnt (const struct dictionary *d)
743 /* Sets CNT split vars SPLIT in dictionary D. */
745 dict_set_split_vars (struct dictionary *d,
746 struct variable *const *split, size_t cnt)
749 assert (cnt == 0 || split != NULL);
752 d->split = xrealloc (d->split, cnt * sizeof *d->split);
753 memcpy (d->split, split, cnt * sizeof *d->split);
756 /* Returns the file label for D, or a null pointer if D is
757 unlabeled (see cmd_file_label()). */
759 dict_get_label (const struct dictionary *d)
766 /* Sets D's file label to LABEL, truncating it to a maximum of 60
769 dict_set_label (struct dictionary *d, const char *label)
776 else if (strlen (label) < 60)
777 d->label = xstrdup (label);
780 d->label = xmalloc (61);
781 memcpy (d->label, label, 60);
786 /* Returns the documents for D, or a null pointer if D has no
787 documents (see cmd_document()).. */
789 dict_get_documents (const struct dictionary *d)
796 /* Sets the documents for D to DOCUMENTS, or removes D's
797 documents if DOCUMENT is a null pointer. */
799 dict_set_documents (struct dictionary *d, const char *documents)
804 if (documents == NULL)
807 d->documents = xstrdup (documents);
810 /* Creates in D a vector named NAME that contains CNT variables
811 VAR (see cmd_vector()). Returns nonzero if successful, or
812 zero if a vector named NAME already exists in D. */
814 dict_create_vector (struct dictionary *d,
816 struct variable **var, size_t cnt)
818 struct vector *vector;
821 assert (name != NULL);
822 assert (strlen (name) > 0 && strlen (name) < 9);
823 assert (var != NULL);
826 if (dict_lookup_vector (d, name) != NULL)
829 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
830 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
831 vector->idx = d->vector_cnt++;
832 strncpy (vector->name, name, 8);
833 vector->name[8] = '\0';
834 vector->var = xmalloc (cnt * sizeof *var);
835 memcpy (vector->var, var, cnt * sizeof *var);
841 /* Returns the vector in D with index IDX, which must be less
842 than dict_get_vector_cnt (D). */
843 const struct vector *
844 dict_get_vector (const struct dictionary *d, size_t idx)
847 assert (idx < d->vector_cnt);
849 return d->vector[idx];
852 /* Returns the number of vectors in D. */
854 dict_get_vector_cnt (const struct dictionary *d)
858 return d->vector_cnt;
861 /* Looks up and returns the vector within D with the given
863 const struct vector *
864 dict_lookup_vector (const struct dictionary *d, const char *name)
869 assert (name != NULL);
871 for (i = 0; i < d->vector_cnt; i++)
872 if (!strcmp (d->vector[i]->name, name))
877 /* Deletes all vectors from D. */
879 dict_clear_vectors (struct dictionary *d)
885 for (i = 0; i < d->vector_cnt; i++)
887 free (d->vector[i]->var);