1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int value_cnt; /* Number of `union value's per case. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
52 struct dictionary *d = xmalloc (sizeof *d);
55 d->var_cnt = d->var_cap = 0;
56 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
72 dict_clone (const struct dictionary *s)
80 for (i = 0; i < s->var_cnt; i++)
81 dict_clone_var (d, s->var[i], s->var[i]->name);
82 d->value_cnt = s->value_cnt;
84 d->split_cnt = s->split_cnt;
87 d->split = xmalloc (d->split_cnt * sizeof *d->split);
88 for (i = 0; i < d->split_cnt; i++)
89 d->split[i] = dict_lookup_var_assert (d, s->split[i]->name);
92 if (s->weight != NULL)
93 d->weight = dict_lookup_var_assert (d, s->weight->name);
95 if (s->filter != NULL)
96 d->filter = dict_lookup_var_assert (d, s->filter->name);
98 d->case_limit = s->case_limit;
99 dict_set_label (d, dict_get_label (s));
100 dict_set_documents (d, dict_get_documents (s));
102 for (i = 0; i < s->vector_cnt; i++)
103 dict_create_vector (d, s->vector[i]->name,
104 s->vector[i]->var, s->vector[i]->cnt);
110 dict_clear (struct dictionary *d)
112 /* FIXME? Should we really clear case_limit, label, documents?
113 Others are necessarily cleared by deleting all the variables.*/
118 for (i = 0; i < d->var_cnt; i++)
120 struct variable *v = d->var[i];
121 val_labs_destroy (v->val_labs);
127 d->var_cnt = d->var_cap = 0;
128 hsh_clear (d->name_tab);
140 dict_clear_vectors (d);
144 dict_destroy (struct dictionary *d)
149 hsh_destroy (d->name_tab);
155 dict_get_var_cnt (const struct dictionary *d)
163 dict_get_var (const struct dictionary *d, size_t idx)
166 assert (idx < d->var_cnt);
172 dict_get_vars (const struct dictionary *d, struct variable ***vars,
173 size_t *cnt, unsigned exclude_classes)
179 assert (vars != NULL);
180 assert (cnt != NULL);
181 assert ((exclude_classes & ~((1u << DC_ORDINARY)
183 | (1u << DC_SCRATCH))) == 0);
186 for (i = 0; i < d->var_cnt; i++)
187 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
190 *vars = xmalloc (count * sizeof **vars);
192 for (i = 0; i < d->var_cnt; i++)
193 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
194 (*vars)[(*cnt)++] = d->var[i];
195 assert (*cnt == count);
199 dict_create_var (struct dictionary *d, const char *name, int width)
204 assert (name != NULL);
205 assert (strlen (name) >= 1 && strlen (name) <= 8);
206 assert (width >= 0 && width < 256);
208 /* Make sure there's not already a variable by that name. */
209 if (dict_lookup_var (d, name) != NULL)
212 /* Allocate and initialize variable. */
213 v = xmalloc (sizeof *v);
214 strncpy (v->name, name, sizeof v->name);
216 v->index = d->var_cnt;
217 v->type = width == 0 ? NUMERIC : ALPHA;
219 v->fv = d->value_cnt;
220 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
222 v->reinit = name[0] != '#';
223 v->miss_type = MISSING_NONE;
224 if (v->type == NUMERIC)
226 v->print.type = FMT_F;
232 v->print.type = FMT_A;
233 v->print.w = v->width;
236 v->val_labs = val_labs_create (v->width);
239 /* Update dictionary. */
240 if (d->var_cnt >= d->var_cap)
242 d->var_cap = 8 + 2 * d->var_cap;
243 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
245 d->var[v->index] = v;
247 hsh_force_insert (d->name_tab, v);
248 d->value_cnt += v->nv;
254 dict_create_var_assert (struct dictionary *d, const char *name, int width)
256 struct variable *v = dict_create_var (d, name, width);
262 dict_clone_var (struct dictionary *d, const struct variable *ov,
269 assert (name != NULL);
270 assert (strlen (name) >= 1 && strlen (name) <= 8);
272 nv = dict_create_var (d, name, ov->width);
277 nv->reinit = ov->reinit;
278 nv->miss_type = ov->miss_type;
279 memcpy (nv->missing, ov->missing, sizeof nv->missing);
280 nv->print = ov->print;
281 nv->write = ov->write;
282 val_labs_destroy (nv->val_labs);
283 nv->val_labs = val_labs_copy (ov->val_labs);
284 if (ov->label != NULL)
285 nv->label = xstrdup (ov->label);
291 dict_rename_var (struct dictionary *d, struct variable *v,
292 const char *new_name)
296 assert (new_name != NULL);
297 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
299 if (!strcmp (v->name, new_name))
302 assert (dict_lookup_var (d, new_name) == NULL);
304 hsh_force_delete (d->name_tab, v);
305 strncpy (v->name, new_name, sizeof v->name);
307 hsh_force_insert (d->name_tab, v);
311 dict_lookup_var (const struct dictionary *d, const char *name)
316 assert (name != NULL);
317 assert (strlen (name) >= 1 && strlen (name) <= 8);
319 strncpy (v.name, name, sizeof v.name);
322 return hsh_find (d->name_tab, &v);
326 dict_lookup_var_assert (const struct dictionary *d, const char *name)
328 struct variable *v = dict_lookup_var (d, name);
334 dict_contains_var (const struct dictionary *d, const struct variable *v)
339 return dict_lookup_var (d, v->name) == v;
343 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
345 struct variable *const *a = a_;
346 struct variable *const *b = b_;
357 dict_delete_var (struct dictionary *d, struct variable *v)
361 /* FIXME? Does not sync d->value_cnt. */
364 assert (dict_contains_var (d, v));
365 assert (d->var[v->index] == v);
367 /* Remove v from splits, weight, filter variables. */
368 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
370 compare_variable_dblptrs, NULL);
375 dict_clear_vectors (d);
377 /* Remove v from var array. */
379 memmove (d->var + v->index, d->var + v->index + 1,
380 (d->var_cnt - v->index) * sizeof *d->var);
383 for (i = v->index; i < d->var_cnt; i++)
384 d->var[i]->index = i;
386 /* Update name hash. */
387 hsh_force_delete (d->name_tab, v);
390 val_labs_destroy (v->val_labs);
397 dict_delete_vars (struct dictionary *d,
398 struct variable *const *vars, size_t count)
400 /* FIXME: this can be done in O(count) time, but this algorithm
403 assert (count == 0 || vars != NULL);
406 dict_delete_var (d, *vars++);
410 dict_reorder_vars (struct dictionary *d,
411 struct variable *const *order, size_t count)
413 struct variable **new_var;
417 assert (count == 0 || order != NULL);
418 assert (count <= d->var_cnt);
420 new_var = xmalloc (d->var_cnt * sizeof *new_var);
421 memcpy (new_var, order, count * sizeof *new_var);
422 for (i = 0; i < count; i++)
424 assert (d->var[order[i]->index] != NULL);
425 d->var[order[i]->index] = NULL;
428 for (i = 0; i < d->var_cnt; i++)
429 if (d->var[i] != NULL)
431 assert (count < d->var_cnt);
432 new_var[count] = d->var[i];
433 new_var[count]->index = count;
441 dict_rename_vars (struct dictionary *d,
442 struct variable **vars, char **new_names,
443 size_t count, char **err_name)
450 assert (count == 0 || vars != NULL);
451 assert (count == 0 || new_names != NULL);
453 old_names = xmalloc (count * sizeof *old_names);
454 for (i = 0; i < count; i++)
456 assert (d->var[vars[i]->index] == vars[i]);
457 hsh_force_delete (d->name_tab, vars[i]);
458 old_names[i] = xstrdup (vars[i]->name);
461 for (i = 0; i < count; i++)
463 assert (new_names[i] != NULL);
464 assert (strlen (new_names[i]) < 9);
465 strcpy (vars[i]->name, new_names[i]);
466 if (hsh_insert (d->name_tab, vars[i]) != NULL)
469 if (err_name != NULL)
470 *err_name = new_names[i];
472 for (i = 0; i < fail_idx; i++)
473 hsh_force_delete (d->name_tab, vars[i]);
475 for (i = 0; i < count; i++)
477 strcpy (vars[i]->name, old_names[i]);
478 hsh_force_insert (d->name_tab, vars[i]);
486 for (i = 0; i < count; i++)
494 dict_get_weight (const struct dictionary *d)
497 assert (d->weight == NULL || dict_contains_var (d, d->weight));
503 dict_get_case_weight (const struct dictionary *d, const struct ccase *c)
508 if (d->weight == NULL)
512 double w = c->data[d->weight->fv].f;
520 dict_set_weight (struct dictionary *d, struct variable *v)
523 assert (v == NULL || dict_contains_var (d, v));
524 assert (v == NULL || v->type == NUMERIC);
530 dict_get_filter (const struct dictionary *d)
533 assert (d->filter == NULL || dict_contains_var (d, d->filter));
539 dict_set_filter (struct dictionary *d, struct variable *v)
542 assert (v == NULL || dict_contains_var (d, v));
548 dict_get_case_limit (const struct dictionary *d)
552 return d->case_limit;
556 dict_set_case_limit (struct dictionary *d, int case_limit)
559 assert (case_limit >= 0);
561 d->case_limit = case_limit;
565 dict_get_value_cnt (const struct dictionary *d)
573 dict_compact_values (struct dictionary *d)
578 for (i = 0; i < d->var_cnt; i++)
580 struct variable *v = d->var[i];
582 v->fv = d->value_cnt;
583 d->value_cnt += v->nv;
587 struct variable *const *
588 dict_get_split_vars (const struct dictionary *d)
596 dict_get_split_cnt (const struct dictionary *d)
604 dict_set_split_vars (struct dictionary *d,
605 struct variable *const *split, size_t cnt)
608 assert (cnt == 0 || split != NULL);
611 d->split = xrealloc (d->split, cnt * sizeof *d->split);
612 memcpy (d->split, split, cnt * sizeof *d->split);
616 dict_get_label (const struct dictionary *d)
624 dict_set_label (struct dictionary *d, const char *label)
631 else if (strlen (label) < 60)
632 d->label = xstrdup (label);
635 d->label = xmalloc (61);
636 memcpy (d->label, label, 60);
642 dict_get_documents (const struct dictionary *d)
650 dict_set_documents (struct dictionary *d, const char *documents)
655 if (documents == NULL)
658 d->documents = xstrdup (documents);
662 dict_create_vector (struct dictionary *d,
664 struct variable **var, size_t cnt)
666 struct vector *vector;
669 assert (name != NULL);
670 assert (strlen (name) > 0 && strlen (name) < 9);
671 assert (var != NULL);
674 if (dict_lookup_vector (d, name) != NULL)
677 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
678 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
679 vector->idx = d->vector_cnt++;
680 strncpy (vector->name, name, 8);
681 vector->name[8] = '\0';
682 vector->var = xmalloc (cnt * sizeof *var);
683 memcpy (vector->var, var, cnt * sizeof *var);
689 const struct vector *
690 dict_get_vector (const struct dictionary *d, size_t idx)
693 assert (idx < d->vector_cnt);
695 return d->vector[idx];
699 dict_get_vector_cnt (const struct dictionary *d)
703 return d->vector_cnt;
706 const struct vector *
707 dict_lookup_vector (const struct dictionary *d, const char *name)
712 assert (name != NULL);
714 for (i = 0; i < d->vector_cnt; i++)
715 if (!strcmp (d->vector[i]->name, name))
721 dict_clear_vectors (struct dictionary *d)
727 for (i = 0; i < d->vector_cnt; i++)
729 free (d->vector[i]->var);