1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 #include "algorithm.h"
28 #include "value-labels.h"
34 struct variable **var; /* Variables. */
35 size_t var_cnt, var_cap; /* Number of variables, capacity. */
36 struct hsh_table *name_tab; /* Variable index by name. */
37 int value_cnt; /* Number of `union value's per case. */
38 struct variable **split; /* SPLIT FILE vars. */
39 size_t split_cnt; /* SPLIT FILE count. */
40 struct variable *weight; /* WEIGHT variable. */
41 struct variable *filter; /* FILTER variable. */
42 int case_limit; /* Current case limit (N command). */
43 char *label; /* File label. */
44 char *documents; /* Documents, as a string. */
45 struct vector **vector; /* Vectors of variables. */
46 size_t vector_cnt; /* Number of vectors. */
52 struct dictionary *d = xmalloc (sizeof *d);
55 d->var_cnt = d->var_cap = 0;
56 d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
72 dict_clone (const struct dictionary *s)
80 for (i = 0; i < s->var_cnt; i++)
81 dict_clone_var (d, s->var[i], s->var[i]->name);
82 d->value_cnt = s->value_cnt;
84 d->split_cnt = s->split_cnt;
87 d->split = xmalloc (d->split_cnt * sizeof *d->split);
88 for (i = 0; i < d->split_cnt; i++)
90 d->split[i] = dict_lookup_var (d, s->split[i]->name);
91 assert (d->split[i] != NULL);
95 if (s->weight != NULL)
97 d->weight = dict_lookup_var (d, s->weight->name);
98 assert (d->weight != NULL);
101 if (s->filter != NULL)
103 d->filter = dict_lookup_var (d, s->filter->name);
104 assert (d->filter != NULL);
107 d->case_limit = s->case_limit;
108 dict_set_label (d, dict_get_label (s));
109 dict_set_documents (d, dict_get_documents (s));
111 for (i = 0; i < s->vector_cnt; i++)
112 dict_create_vector (d, s->vector[i]->name,
113 s->vector[i]->var, s->vector[i]->cnt);
119 dict_clear (struct dictionary *d)
121 /* FIXME? Should we really clear case_limit, label, documents?
122 Others are necessarily cleared by deleting all the variables.*/
127 for (i = 0; i < d->var_cnt; i++)
129 struct variable *v = d->var[i];
130 val_labs_destroy (v->val_labs);
136 d->var_cnt = d->var_cap = 0;
137 hsh_clear (d->name_tab);
149 dict_clear_vectors (d);
153 dict_destroy (struct dictionary *d)
158 hsh_destroy (d->name_tab);
164 dict_get_var_cnt (const struct dictionary *d)
172 dict_get_var (const struct dictionary *d, size_t idx)
175 assert (idx < d->var_cnt);
181 dict_get_vars (const struct dictionary *d, struct variable ***vars,
182 size_t *cnt, unsigned exclude_classes)
188 assert (vars != NULL);
189 assert (cnt != NULL);
190 assert ((exclude_classes & ~((1u << DC_ORDINARY)
192 | (1u << DC_SCRATCH))) == 0);
195 for (i = 0; i < d->var_cnt; i++)
196 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
199 *vars = xmalloc (count * sizeof **vars);
201 for (i = 0; i < d->var_cnt; i++)
202 if (!(exclude_classes & (1u << dict_class_from_id (d->var[i]->name))))
203 (*vars)[(*cnt)++] = d->var[i];
204 assert (*cnt == count);
208 dict_create_var (struct dictionary *d, const char *name, int width)
213 assert (name != NULL);
214 assert (strlen (name) >= 1 && strlen (name) <= 8);
215 assert (width >= 0 && width < 256);
217 /* Make sure there's not already a variable by that name. */
218 if (dict_lookup_var (d, name) != NULL)
221 /* Allocate and initialize variable. */
222 v = xmalloc (sizeof *v);
223 strncpy (v->name, name, sizeof v->name);
225 v->index = d->var_cnt;
226 v->type = width == 0 ? NUMERIC : ALPHA;
228 v->fv = d->value_cnt;
229 v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
230 v->left = name[0] == '#';
231 v->miss_type = MISSING_NONE;
232 if (v->type == NUMERIC)
234 v->print.type = FMT_F;
240 v->print.type = FMT_A;
241 v->print.w = v->width;
244 v->val_labs = val_labs_create (v->width);
247 /* Update dictionary. */
248 if (d->var_cnt >= d->var_cap)
250 d->var_cap = 8 + 2 * d->var_cap;
251 d->var = xrealloc (d->var, d->var_cap * sizeof *d->var);
253 d->var[v->index] = v;
255 hsh_force_insert (d->name_tab, v);
256 d->value_cnt += v->nv;
262 dict_clone_var (struct dictionary *d, const struct variable *ov,
269 assert (name != NULL);
270 assert (strlen (name) >= 1 && strlen (name) <= 8);
272 nv = dict_create_var (d, name, ov->width);
277 nv->miss_type = ov->miss_type;
278 memcpy (nv->missing, ov->missing, sizeof nv->missing);
279 nv->print = ov->print;
280 nv->write = ov->write;
281 val_labs_destroy (nv->val_labs);
282 nv->val_labs = val_labs_copy (ov->val_labs);
283 if (ov->label != NULL)
284 nv->label = xstrdup (ov->label);
290 dict_rename_var (struct dictionary *d, struct variable *v,
291 const char *new_name)
295 assert (new_name != NULL);
296 assert (strlen (new_name) >= 1 && strlen (new_name) <= 8);
298 if (!strcmp (v->name, new_name))
301 assert (dict_lookup_var (d, new_name) == NULL);
303 hsh_force_delete (d->name_tab, v);
304 strncpy (v->name, new_name, sizeof v->name);
306 hsh_force_insert (d->name_tab, v);
310 dict_lookup_var (const struct dictionary *d, const char *name)
315 assert (name != NULL);
316 assert (strlen (name) >= 1 && strlen (name) <= 8);
318 strncpy (v.name, name, sizeof v.name);
321 return hsh_find (d->name_tab, &v);
325 dict_contains_var (const struct dictionary *d, const struct variable *v)
330 return dict_lookup_var (d, v->name) == v;
334 compare_variable_dblptrs (const void *a_, const void *b_, void *aux UNUSED)
336 struct variable *const *a = a_;
337 struct variable *const *b = b_;
348 dict_delete_var (struct dictionary *d, struct variable *v)
352 /* FIXME? Does not sync d->value_cnt. */
355 assert (dict_contains_var (d, v));
356 assert (d->var[v->index] == v);
358 /* Remove v from splits, weight, filter variables. */
359 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
361 compare_variable_dblptrs, NULL);
366 dict_clear_vectors (d);
368 /* Remove v from var array. */
370 memmove (d->var + v->index, d->var + v->index + 1,
371 (d->var_cnt - v->index) * sizeof *d->var);
374 for (i = v->index; i < d->var_cnt; i++)
375 d->var[i]->index = i;
377 /* Update name hash. */
378 hsh_force_delete (d->name_tab, v);
381 val_labs_destroy (v->val_labs);
388 dict_delete_vars (struct dictionary *d,
389 struct variable *const *vars, size_t count)
391 /* FIXME: this can be done in O(count) time, but this algorithm
394 assert (count == 0 || vars != NULL);
397 dict_delete_var (d, *vars++);
401 dict_reorder_vars (struct dictionary *d,
402 struct variable *const *order, size_t count)
404 struct variable **new_var;
408 assert (count == 0 || order != NULL);
409 assert (count <= d->var_cnt);
411 new_var = xmalloc (d->var_cnt * sizeof *new_var);
412 memcpy (new_var, order, count * sizeof *new_var);
413 for (i = 0; i < count; i++)
415 assert (d->var[order[i]->index] != NULL);
416 d->var[order[i]->index] = NULL;
419 for (i = 0; i < d->var_cnt; i++)
420 if (d->var[i] != NULL)
422 assert (count < d->var_cnt);
423 new_var[count] = d->var[i];
424 new_var[count]->index = count;
432 dict_rename_vars (struct dictionary *d,
433 struct variable **vars, char **new_names,
434 size_t count, char **err_name)
441 assert (count == 0 || vars != NULL);
442 assert (count == 0 || new_names != NULL);
444 old_names = xmalloc (count * sizeof *old_names);
445 for (i = 0; i < count; i++)
447 assert (d->var[vars[i]->index] == vars[i]);
448 hsh_force_delete (d->name_tab, vars[i]);
449 old_names[i] = xstrdup (vars[i]->name);
452 for (i = 0; i < count; i++)
454 assert (new_names[i] != NULL);
455 assert (strlen (new_names[i]) < 9);
456 strcpy (vars[i]->name, new_names[i]);
457 if (hsh_insert (d->name_tab, vars[i]) != NULL)
460 if (err_name != NULL)
461 *err_name = new_names[i];
463 for (i = 0; i < fail_idx; i++)
464 hsh_force_delete (d->name_tab, vars[i]);
466 for (i = 0; i < count; i++)
468 strcpy (vars[i]->name, old_names[i]);
469 hsh_force_insert (d->name_tab, vars[i]);
477 for (i = 0; i < count; i++)
485 dict_get_weight (const struct dictionary *d)
488 assert (d->weight == NULL || dict_contains_var (d, d->weight));
494 dict_get_case_weight (const struct dictionary *d, const struct ccase *c)
499 if (d->weight == NULL)
503 double w = c->data[d->weight->fv].f;
511 dict_set_weight (struct dictionary *d, struct variable *v)
514 assert (v == NULL || dict_contains_var (d, v));
515 assert (v == NULL || v->type == NUMERIC);
521 dict_get_filter (const struct dictionary *d)
524 assert (d->filter == NULL || dict_contains_var (d, d->filter));
530 dict_set_filter (struct dictionary *d, struct variable *v)
533 assert (v == NULL || dict_contains_var (d, v));
539 dict_get_case_limit (const struct dictionary *d)
543 return d->case_limit;
547 dict_set_case_limit (struct dictionary *d, int case_limit)
550 assert (case_limit >= 0);
552 d->case_limit = case_limit;
556 dict_get_value_cnt (const struct dictionary *d)
564 dict_compact_values (struct dictionary *d)
569 for (i = 0; i < d->var_cnt; i++)
571 struct variable *v = d->var[i];
573 v->fv = d->value_cnt;
574 d->value_cnt += v->nv;
578 struct variable *const *
579 dict_get_split_vars (const struct dictionary *d)
587 dict_get_split_cnt (const struct dictionary *d)
595 dict_set_split_vars (struct dictionary *d,
596 struct variable *const *split, size_t cnt)
599 assert (cnt == 0 || split != NULL);
602 d->split = xrealloc (d->split, cnt * sizeof *d->split);
603 memcpy (d->split, split, cnt * sizeof *d->split);
607 dict_get_label (const struct dictionary *d)
615 dict_set_label (struct dictionary *d, const char *label)
622 else if (strlen (label) < 60)
623 d->label = xstrdup (label);
626 d->label = xmalloc (61);
627 memcpy (d->label, label, 60);
633 dict_get_documents (const struct dictionary *d)
641 dict_set_documents (struct dictionary *d, const char *documents)
646 if (documents == NULL)
649 d->documents = xstrdup (documents);
653 dict_create_vector (struct dictionary *d,
655 struct variable **var, size_t cnt)
657 struct vector *vector;
660 assert (name != NULL);
661 assert (strlen (name) > 0 && strlen (name) < 9);
662 assert (var != NULL);
665 if (dict_lookup_vector (d, name) != NULL)
668 d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector);
669 vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector);
670 vector->idx = d->vector_cnt++;
671 strncpy (vector->name, name, 8);
672 vector->name[8] = '\0';
673 vector->var = xmalloc (cnt * sizeof *var);
674 memcpy (vector->var, var, cnt * sizeof *var);
680 const struct vector *
681 dict_get_vector (const struct dictionary *d, size_t idx)
684 assert (idx < d->vector_cnt);
686 return d->vector[idx];
690 dict_get_vector_cnt (const struct dictionary *d)
694 return d->vector_cnt;
697 const struct vector *
698 dict_lookup_vector (const struct dictionary *d, const char *name)
703 assert (name != NULL);
705 for (i = 0; i < d->vector_cnt; i++)
706 if (!strcmp (d->vector[i]->name, name))
712 dict_clear_vectors (struct dictionary *d)
718 for (i = 0; i < d->vector_cnt; i++)
720 free (d->vector[i]->var);