1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int next_value_idx; /* Index of next `union value' to allocate. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
49 /* Creates and returns a new dictionary. */
53 struct dictionary *d = xmalloc (sizeof *d);
56 d->var_cnt = d->var_cap = 0;
57 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
58 d->next_value_idx = 0;
72 /* Creates and returns a (deep) copy of an existing
75 dict_clone (const struct dictionary *s)
83 for (i = 0; i < s->var_cnt; i++)
84 dict_clone_var (d, s->var[i], s->var[i]->name);
85 d->next_value_idx = s->next_value_idx;
87 d->split_cnt = s->split_cnt;
90 d->split = xmalloc (d->split_cnt * sizeof *d->split);
91 for (i = 0; i < d->split_cnt; i++)
92 d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
95 if (s->weight != NULL)
96 d->weight = dict_lookup_var_assert (d, s->weight->name);
98 if (s->filter != NULL)
99 d->filter = dict_lookup_var_assert (d, s->filter->name);
101 d->case_limit = s->case_limit;
102 dict_set_label (d, dict_get_label (s));
103 dict_set_documents (d, dict_get_documents (s));
105 for (i = 0; i < s->vector_cnt; i++)
106 dict_create_vector (d, s->vector[i]->name,
107 s->vector[i]->var, s->vector[i]->cnt);
112 /* Clears the contents from a dictionary without destroying the
113 dictionary itself. */
115 dict_clear (struct dictionary *d)
117 /* FIXME? Should we really clear case_limit, label, documents?
118 Others are necessarily cleared by deleting all the variables.*/
123 for (i = 0; i < d->var_cnt; i++)
125 struct variable *v = d->var[i];
126 val_labs_destroy (v->val_labs);
132 d->var_cnt = d->var_cap = 0;
133 hsh_clear (d->name_tab);
134 d->next_value_idx = 0;
145 dict_clear_vectors (d);
148 /* Clears a dictionary and destroys it. */
150 dict_destroy (struct dictionary *d)
155 hsh_destroy (d->name_tab);
160 /* Returns the number of variables in D. */
162 dict_get_var_cnt (const struct dictionary *d)
169 /* Returns the variable in D with index IDX, which must be
170 between 0 and the count returned by dict_get_var_cnt(),
173 dict_get_var (const struct dictionary *d, size_t idx)
176 assert (idx < d->var_cnt);
181 /* Sets *VARS to an array of pointers to variables in D and *CNT
182 to the number of variables in *D. By default all variables
183 are returned, but bits may be set in EXCLUDE_CLASSES to
184 exclude ordinary, system, and/or scratch variables. */
186 dict_get_vars (const struct dictionary *d, struct variable ***vars,
187 size_t *cnt, unsigned exclude_classes)
193 assert (vars != NULL);
194 assert (cnt != NULL);
195 assert ((exclude_classes & ~((1u << DC_ORDINARY)
197 | (1u << DC_SCRATCH))) == 0);
200 for (i = 0; i < d->var_cnt; i++)
201 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
204 *vars = xmalloc (count * sizeof **vars);
206 for (i = 0; i < d->var_cnt; i++)
207 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
208 (*vars)[(*cnt)++] = d->var[i];
209 assert (*cnt == count);
212 /* Creates and returns a new variable in D with the given NAME
213 and WIDTH. Returns a null pointer if the given NAME would
214 duplicate that of an existing variable in the dictionary. */
216 dict_create_var (struct dictionary *d, const char *name, int width)
221 assert (name != NULL);
222 assert (strlen (name) >= 1 && strlen (name) <= 8);
223 assert (width >= 0 && width < 256);
225 /* Make sure there's not already a variable by that name. */
226 if (dict_lookup_var (d, name) != NULL)
229 /* Allocate and initialize variable. */
230 v = xmalloc (sizeof *v);
231 strncpy (v->name, name, sizeof v->name);
233 v->index = d->var_cnt;
234 v->type = width == 0 ? NUMERIC : ALPHA;
236 v->fv = d->next_value_idx;
237 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
239 v->reinit = name[0] != '#';
240 v->miss_type = MISSING_NONE;
241 if (v->type == NUMERIC)
243 v->print.type = FMT_F;
249 v->print.type = FMT_A;
250 v->print.w = v->width;
253 v->val_labs = val_labs_create (v->width);
256 /* Update dictionary. */
257 if (d->var_cnt >= d->var_cap)
259 d->var_cap = 8 + 2 * d->var_cap;
260 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
262 d->var[v->index] = v;
264 hsh_force_insert (d->name_tab, v);
265 d->next_value_idx += v->nv;
270 /* Creates and returns a new variable in D with the given NAME
271 and WIDTH. Assert-fails if the given NAME would duplicate
272 that of an existing variable in the dictionary. */
274 dict_create_var_assert (struct dictionary *d, const char *name, int width)
276 struct variable *v = dict_create_var (d, name, width);
281 /* Creates a new variable in D named NAME, as a copy of existing
282 variable OV, which need not be in D or in any dictionary. */
284 dict_clone_var (struct dictionary *d, const struct variable *ov,
291 assert (name != NULL);
292 assert (strlen (name) >= 1 && strlen (name) <= 8);
294 nv = dict_create_var (d, name, ov->width);
299 nv->reinit = ov->reinit;
300 nv->miss_type = ov->miss_type;
301 memcpy (nv->missing, ov->missing, sizeof nv->missing);
302 nv->print = ov->print;
303 nv->write = ov->write;
304 val_labs_destroy (nv->val_labs);
305 nv->val_labs = val_labs_copy (ov->val_labs);
306 if (ov->label != NULL)
307 nv->label = xstrdup (ov->label);
312 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
313 a variable named NEW_NAME is already in D, except that
314 NEW_NAME may be the same as V's existing name. */
316 dict_rename_var (struct dictionary *d, struct variable *v,
317 const char *new_name)
321 assert (new_name != NULL);
322 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
324 if (!strcmp (v->name, new_name))
327 assert (dict_lookup_var (d, new_name) == NULL);
329 hsh_force_delete (d->name_tab, v);
330 strncpy (v->name, new_name, sizeof v->name);
332 hsh_force_insert (d->name_tab, v);
335 /* Returns the variable named NAME in D, or a null pointer if no
336 variable has that name. */
338 dict_lookup_var (const struct dictionary *d, const char *name)
343 assert (name != NULL);
344 assert (strlen (name) >= 1 && strlen (name) <= 8);
346 strncpy (v.name, name, sizeof v.name);
349 return hsh_find (d->name_tab, &v);
352 /* Returns the variable named NAME in D. Assert-fails if no
353 variable has that name. */
355 dict_lookup_var_assert (const struct dictionary *d, const char *name)
357 struct variable *v = dict_lookup_var (d, name);
362 /* Returns nonzero if variable V is in dictionary D. */
364 dict_contains_var (const struct dictionary *d, const struct variable *v)
369 return dict_lookup_var (d, v->name) == v;
372 /* Compares two double pointers to variables, which should point
373 to elements of a struct dictionary's `var' member array. */
375 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
377 struct variable *const *a = a_;
378 struct variable *const *b = b_;
388 /* Deletes variable V from dictionary D and frees V.
390 This is a very bad idea if there might be any pointers to V
391 from outside D. In general, no variable in default_dict
392 should be deleted when any transformations are active, because
393 those transformations might reference the deleted variable.
394 The safest time to delete a variable is just after a procedure
395 has been executed, as done by MODIFY VARS.
397 Pointers to V within D are not a problem, because
398 dict_delete_var() knows to remove V from split variables,
399 weights, filters, etc. */
401 dict_delete_var (struct dictionary *d, struct variable *v)
407 assert (dict_contains_var (d, v));
408 assert (d->var[v->index] == v);
410 /* Remove v from splits, weight, filter variables. */
411 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
413 compare_variable_dblptrs, NULL);
418 dict_clear_vectors (d);
420 /* Remove v from var array. */
422 memmove (d->var + v->index, d->var + v->index + 1,
423 (d->var_cnt - v->index) * sizeof *d->var);
426 for (i = v->index; i < d->var_cnt; i++)
427 d->var[i]->index = i;
429 /* Update name hash. */
430 hsh_force_delete (d->name_tab, v);
433 val_labs_destroy (v->val_labs);
438 /* Deletes the COUNT variables listed in VARS from D. This is
439 unsafe; see the comment on dict_delete_var() for details. */
441 dict_delete_vars (struct dictionary *d,
442 struct variable *const *vars, size_t count)
444 /* FIXME: this can be done in O(count) time, but this algorithm
447 assert (count == 0 || vars != NULL);
450 dict_delete_var (d, *vars++);
453 /* Reorders the variables in D, placing the COUNT variables
454 listed in ORDER in that order at the beginning of D. The
455 other variables in D, if any, retain their relative
458 dict_reorder_vars (struct dictionary *d,
459 struct variable *const *order, size_t count)
461 struct variable **new_var;
465 assert (count == 0 || order != NULL);
466 assert (count <= d->var_cnt);
468 new_var = xmalloc (d->var_cnt * sizeof *new_var);
469 memcpy (new_var, order, count * sizeof *new_var);
470 for (i = 0; i < count; i++)
472 assert (d->var[order[i]->index] != NULL);
473 d->var[order[i]->index] = NULL;
476 for (i = 0; i < d->var_cnt; i++)
477 if (d->var[i] != NULL)
479 assert (count < d->var_cnt);
480 new_var[count] = d->var[i];
481 new_var[count]->index = count;
488 /* Renames COUNT variables specified in VARS to the names given
489 in NEW_NAMES within dictionary D. If the renaming would
490 result in a duplicate variable name, returns zero and stores a
491 name that would be duplicated into *ERR_NAME (if ERR_NAME is
492 non-null). Otherwise, the renaming is successful, and nonzero
495 dict_rename_vars (struct dictionary *d,
496 struct variable **vars, char **new_names,
497 size_t count, char **err_name)
504 assert (count == 0 || vars != NULL);
505 assert (count == 0 || new_names != NULL);
507 old_names = xmalloc (count * sizeof *old_names);
508 for (i = 0; i < count; i++)
510 assert (d->var[vars[i]->index] == vars[i]);
511 hsh_force_delete (d->name_tab, vars[i]);
512 old_names[i] = xstrdup (vars[i]->name);
515 for (i = 0; i < count; i++)
517 assert (new_names[i] != NULL);
518 assert (*new_names[i] != '\0');
519 assert (strlen (new_names[i]) < 9);
520 strcpy (vars[i]->name, new_names[i]);
521 if (hsh_insert (d->name_tab, vars[i]) != NULL)
524 if (err_name != NULL)
525 *err_name = new_names[i];
527 for (i = 0; i < fail_idx; i++)
528 hsh_force_delete (d->name_tab, vars[i]);
530 for (i = 0; i < count; i++)
532 strcpy (vars[i]->name, old_names[i]);
533 hsh_force_insert (d->name_tab, vars[i]);
541 for (i = 0; i < count; i++)
548 /* Returns the weighting variable in dictionary D, or a null
549 pointer if the dictionary is unweighted. */
551 dict_get_weight (const struct dictionary *d)
554 assert (d->weight == NULL || dict_contains_var (d, d->weight));
559 /* Returns the value of D's weighting variable in case C, except
560 that a negative weight is returned as 0. Returns 1 if the
561 dictionary is unweighted. */
563 dict_get_case_weight (const struct dictionary *d, const struct ccase *c)
568 if (d->weight == NULL)
572 double w = c->data[d->weight->fv].f;
579 /* Sets the weighting variable of D to V, or turning off
580 weighting if V is a null pointer. */
582 dict_set_weight (struct dictionary *d, struct variable *v)
585 assert (v == NULL || dict_contains_var (d, v));
586 assert (v == NULL || v->type == NUMERIC);
591 /* Returns the filter variable in dictionary D (see cmd_filter())
592 or a null pointer if the dictionary is unfiltered. */
594 dict_get_filter (const struct dictionary *d)
597 assert (d->filter == NULL || dict_contains_var (d, d->filter));
602 /* Sets V as the filter variable for dictionary D. Passing a
603 null pointer for V turn off filtering. */
605 dict_set_filter (struct dictionary *d, struct variable *v)
608 assert (v == NULL || dict_contains_var (d, v));
613 /* Returns the case limit for dictionary D, or zero if the number
614 of cases is unlimited (see cmd_n()). */
616 dict_get_case_limit (const struct dictionary *d)
620 return d->case_limit;
623 /* Sets CASE_LIMIT as the case limit for dictionary D. Zero for
624 CASE_LIMIT indicates no limit. */
626 dict_set_case_limit (struct dictionary *d, int case_limit)
629 assert (case_limit >= 0);
631 d->case_limit = case_limit;
634 /* Returns the index of the next value to be added to D. This
635 value is the number of `union value's that need to be
636 allocated to store a case for dictionary D. */
638 dict_get_next_value_idx (const struct dictionary *d)
642 return d->next_value_idx;
645 /* Returns the number of bytes needed to store a case for
648 dict_get_case_size (const struct dictionary *d)
652 return sizeof (union value) * dict_get_next_value_idx (d);
655 /* Reassigns values in dictionary D so that fragmentation is
658 dict_compact_values (struct dictionary *d)
662 d->next_value_idx = 0;
663 for (i = 0; i < d->var_cnt; i++)
665 struct variable *v = d->var[i];
667 v->fv = d->next_value_idx;
668 d->next_value_idx += v->nv;
672 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
673 dict_get_split_cnt() to determine how many SPLIT FILE vars
674 there are. Returns a null pointer if and only if there are no
676 struct variable *const *
677 dict_get_split_vars (const struct dictionary *d)
684 /* Returns the number of SPLIT FILE vars. */
686 dict_get_split_cnt (const struct dictionary *d)
693 /* Sets CNT split vars SPLIT in dictionary D. */
695 dict_set_split_vars (struct dictionary *d,
696 struct variable *const *split, size_t cnt)
699 assert (cnt == 0 || split != NULL);
702 d->split = xrealloc (d->split, cnt * sizeof *d->split);
703 memcpy (d->split, split, cnt * sizeof *d->split);
706 /* Returns the file label for D, or a null pointer if D is
707 unlabeled (see cmd_file_label()). */
709 dict_get_label (const struct dictionary *d)
716 /* Sets D's file label to LABEL, truncating it to a maximum of 60
719 dict_set_label (struct dictionary *d, const char *label)
726 else if (strlen (label) < 60)
727 d->label = xstrdup (label);
730 d->label = xmalloc (61);
731 memcpy (d->label, label, 60);
736 /* Returns the documents for D, or a null pointer if D has no
737 documents (see cmd_document()).. */
739 dict_get_documents (const struct dictionary *d)
746 /* Sets the documents for D to DOCUMENTS, or removes D's
747 documents if DOCUMENT is a null pointer. */
749 dict_set_documents (struct dictionary *d, const char *documents)
754 if (documents == NULL)
757 d->documents = xstrdup (documents);
760 /* Creates in D a vector named NAME that contains CNT variables
761 VAR (see cmd_vector()). Returns nonzero if successful, or
762 zero if a vector named NAME already exists in D. */
764 dict_create_vector (struct dictionary *d,
766 struct variable **var, size_t cnt)
768 struct vector *vector;
771 assert (name != NULL);
772 assert (strlen (name) > 0 && strlen (name) < 9);
773 assert (var != NULL);
776 if (dict_lookup_vector (d, name) != NULL)
779 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
780 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
781 vector->idx = d->vector_cnt++;
782 strncpy (vector->name, name, 8);
783 vector->name[8] = '\0';
784 vector->var = xmalloc (cnt * sizeof *var);
785 memcpy (vector->var, var, cnt * sizeof *var);
791 /* Returns the vector in D with index IDX, which must be less
792 than dict_get_vector_cnt (D). */
793 const struct vector *
794 dict_get_vector (const struct dictionary *d, size_t idx)
797 assert (idx < d->vector_cnt);
799 return d->vector[idx];
802 /* Returns the number of vectors in D. */
804 dict_get_vector_cnt (const struct dictionary *d)
808 return d->vector_cnt;
811 /* Looks up and returns the vector within D with the given
813 const struct vector *
814 dict_lookup_vector (const struct dictionary *d, const char *name)
819 assert (name != NULL);
821 for (i = 0; i < d->vector_cnt; i++)
822 if (!strcmp (d->vector[i]->name, name))
827 /* Deletes all vectors from D. */
829 dict_clear_vectors (struct dictionary *d)
835 for (i = 0; i < d->vector_cnt; i++)
837 free (d->vector[i]->var);