1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/dictionary.h"
25 #include "data/attributes.h"
26 #include "data/case.h"
27 #include "data/identifier.h"
28 #include "data/mrset.h"
29 #include "data/settings.h"
30 #include "data/value-labels.h"
31 #include "data/vardict.h"
32 #include "data/variable.h"
33 #include "data/vector.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/compiler.h"
37 #include "libpspp/hash-functions.h"
38 #include "libpspp/hmap.h"
39 #include "libpspp/message.h"
40 #include "libpspp/misc.h"
41 #include "libpspp/pool.h"
42 #include "libpspp/str.h"
44 #include "gl/intprops.h"
45 #include "gl/minmax.h"
46 #include "gl/xalloc.h"
49 #define _(msgid) gettext (msgid)
54 struct vardict_info *var; /* Variables. */
55 size_t var_cnt, var_cap; /* Number of variables, capacity. */
56 struct caseproto *proto; /* Prototype for dictionary cases
58 struct hmap name_map; /* Variable index by name. */
59 int next_value_idx; /* Index of next `union value' to allocate. */
60 const struct variable **split; /* SPLIT FILE vars. */
61 size_t split_cnt; /* SPLIT FILE count. */
62 struct variable *weight; /* WEIGHT variable. */
63 struct variable *filter; /* FILTER variable. */
64 casenumber case_limit; /* Current case limit (N command). */
65 char *label; /* File label. */
66 struct string documents; /* Documents, as a string. */
67 struct vector **vector; /* Vectors of variables. */
68 size_t vector_cnt; /* Number of vectors. */
69 struct attrset attributes; /* Custom attributes. */
70 struct mrset **mrsets; /* Multiple response sets. */
71 size_t n_mrsets; /* Number of multiple response sets. */
73 char *encoding; /* Character encoding of string data */
75 const struct dict_callbacks *callbacks; /* Callbacks on dictionary
77 void *cb_data ; /* Data passed to callbacks */
79 void (*changed) (struct dictionary *, void *); /* Generic change callback */
83 static void dict_unset_split_var (struct dictionary *, struct variable *);
84 static void dict_unset_mrset_var (struct dictionary *, struct variable *);
87 dict_set_encoding (struct dictionary *d, const char *enc)
92 d->encoding = xstrdup (enc);
97 dict_get_encoding (const struct dictionary *d)
104 dict_set_change_callback (struct dictionary *d,
105 void (*changed) (struct dictionary *, void*),
108 d->changed = changed;
109 d->changed_data = data;
112 /* Discards dictionary D's caseproto. (It will be regenerated
113 lazily, on demand.) */
115 invalidate_proto (struct dictionary *d)
117 caseproto_unref (d->proto);
121 /* Print a representation of dictionary D to stdout, for
122 debugging purposes. */
124 dict_dump (const struct dictionary *d)
127 for (i = 0 ; i < d->var_cnt ; ++i )
129 const struct variable *v = d->var[i].var;
130 printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
132 var_get_dict_index (v),
133 var_get_case_index (v));
138 /* Associate CALLBACKS with DICT. Callbacks will be invoked whenever
139 the dictionary or any of the variables it contains are modified.
140 Each callback will get passed CALLBACK_DATA.
141 Any callback may be NULL, in which case it'll be ignored.
144 dict_set_callbacks (struct dictionary *dict,
145 const struct dict_callbacks *callbacks,
148 dict->callbacks = callbacks;
149 dict->cb_data = callback_data;
152 /* Shallow copy the callbacks from SRC to DEST */
154 dict_copy_callbacks (struct dictionary *dest,
155 const struct dictionary *src)
157 dest->callbacks = src->callbacks;
158 dest->cb_data = src->cb_data;
161 /* Creates and returns a new dictionary. */
165 struct dictionary *d = xzalloc (sizeof *d);
167 hmap_init (&d->name_map);
168 attrset_init (&d->attributes);
172 /* Creates and returns a (deep) copy of an existing
175 The new dictionary's case indexes are copied from the old
176 dictionary. If the new dictionary won't be used to access
177 cases produced with the old dictionary, then the new
178 dictionary's case indexes should be compacted with
179 dict_compact_values to save space. */
181 dict_clone (const struct dictionary *s)
183 struct dictionary *d;
188 for (i = 0; i < s->var_cnt; i++)
190 struct variable *sv = s->var[i].var;
191 struct variable *dv = dict_clone_var_assert (d, sv);
194 for (i = 0; i < var_get_short_name_cnt (sv); i++)
195 var_set_short_name (dv, i, var_get_short_name (sv, i));
197 var_get_vardict (dv)->case_index = var_get_vardict (sv)->case_index;
200 d->next_value_idx = s->next_value_idx;
202 d->split_cnt = s->split_cnt;
203 if (d->split_cnt > 0)
205 d->split = xnmalloc (d->split_cnt, sizeof *d->split);
206 for (i = 0; i < d->split_cnt; i++)
207 d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
210 if (s->weight != NULL)
211 dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
213 if (s->filter != NULL)
214 dict_set_filter (d, dict_lookup_var_assert (d, var_get_name (s->filter)));
216 d->case_limit = s->case_limit;
217 dict_set_label (d, dict_get_label (s));
218 dict_set_documents (d, dict_get_documents (s));
220 d->vector_cnt = s->vector_cnt;
221 d->vector = xnmalloc (d->vector_cnt, sizeof *d->vector);
222 for (i = 0; i < s->vector_cnt; i++)
223 d->vector[i] = vector_clone (s->vector[i], s, d);
226 d->encoding = xstrdup (s->encoding);
228 dict_set_attributes (d, dict_get_attributes (s));
230 for (i = 0; i < s->n_mrsets; i++)
232 const struct mrset *old = s->mrsets[i];
236 /* Clone old mrset, then replace vars from D by vars from S. */
237 new = mrset_clone (old);
238 for (j = 0; j < new->n_vars; j++)
239 new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
241 dict_add_mrset (d, new);
247 /* Clears the contents from a dictionary without destroying the
248 dictionary itself. */
250 dict_clear (struct dictionary *d)
252 /* FIXME? Should we really clear case_limit, label, documents?
253 Others are necessarily cleared by deleting all the variables.*/
254 while (d->var_cnt > 0 )
256 dict_delete_var (d, d->var[d->var_cnt - 1].var);
261 d->var_cnt = d->var_cap = 0;
262 invalidate_proto (d);
263 hmap_clear (&d->name_map);
264 d->next_value_idx = 0;
265 dict_set_split_vars (d, NULL, 0);
266 dict_set_weight (d, NULL);
267 dict_set_filter (d, NULL);
271 ds_destroy (&d->documents);
272 dict_clear_vectors (d);
273 attrset_clear (&d->attributes);
276 /* Destroys the aux data for every variable in D, by calling
277 var_clear_aux() for each variable. */
279 dict_clear_aux (struct dictionary *d)
283 for (i = 0; i < d->var_cnt; i++)
284 var_clear_aux (d->var[i].var);
287 /* Clears a dictionary and destroys it. */
289 dict_destroy (struct dictionary *d)
293 /* In general, we don't want callbacks occuring, if the dictionary
294 is being destroyed */
295 d->callbacks = NULL ;
298 hmap_destroy (&d->name_map);
299 attrset_destroy (&d->attributes);
305 /* Returns the number of variables in D. */
307 dict_get_var_cnt (const struct dictionary *d)
312 /* Returns the variable in D with dictionary index IDX, which
313 must be between 0 and the count returned by
314 dict_get_var_cnt(), exclusive. */
316 dict_get_var (const struct dictionary *d, size_t idx)
318 assert (idx < d->var_cnt);
320 return d->var[idx].var;
323 /* Sets *VARS to an array of pointers to variables in D and *CNT
324 to the number of variables in *D. All variables are returned
325 except for those, if any, in the classes indicated by EXCLUDE.
326 (There is no point in putting DC_SYSTEM in EXCLUDE as
327 dictionaries never include system variables.) */
329 dict_get_vars (const struct dictionary *d, const struct variable ***vars,
330 size_t *cnt, enum dict_class exclude)
332 dict_get_vars_mutable (d, (struct variable ***) vars, cnt, exclude);
335 /* Sets *VARS to an array of pointers to variables in D and *CNT
336 to the number of variables in *D. All variables are returned
337 except for those, if any, in the classes indicated by EXCLUDE.
338 (There is no point in putting DC_SYSTEM in EXCLUDE as
339 dictionaries never include system variables.) */
341 dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars,
342 size_t *cnt, enum dict_class exclude)
347 assert (exclude == (exclude & DC_ALL));
350 for (i = 0; i < d->var_cnt; i++)
352 enum dict_class class = var_get_dict_class (d->var[i].var);
353 if (!(class & exclude))
357 *vars = xnmalloc (count, sizeof **vars);
359 for (i = 0; i < d->var_cnt; i++)
361 enum dict_class class = var_get_dict_class (d->var[i].var);
362 if (!(class & exclude))
363 (*vars)[(*cnt)++] = d->var[i].var;
365 assert (*cnt == count);
368 static struct variable *
369 add_var (struct dictionary *d, struct variable *v)
371 struct vardict_info *vardict;
373 /* Update dictionary. */
374 if (d->var_cnt >= d->var_cap)
378 d->var = x2nrealloc (d->var, &d->var_cap, sizeof *d->var);
379 hmap_clear (&d->name_map);
380 for (i = 0; i < d->var_cnt; i++)
382 var_set_vardict (d->var[i].var, &d->var[i]);
383 hmap_insert_fast (&d->name_map, &d->var[i].name_node,
384 d->var[i].name_node.hash);
388 vardict = &d->var[d->var_cnt++];
391 hmap_insert (&d->name_map, &vardict->name_node,
392 hash_case_string (var_get_name (v), 0));
393 vardict->case_index = d->next_value_idx;
394 var_set_vardict (v, vardict);
396 if ( d->changed ) d->changed (d, d->changed_data);
397 if ( d->callbacks && d->callbacks->var_added )
398 d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
401 invalidate_proto (d);
406 /* Creates and returns a new variable in D with the given NAME
407 and WIDTH. Returns a null pointer if the given NAME would
408 duplicate that of an existing variable in the dictionary. */
410 dict_create_var (struct dictionary *d, const char *name, int width)
412 return (dict_lookup_var (d, name) == NULL
413 ? dict_create_var_assert (d, name, width)
417 /* Creates and returns a new variable in D with the given NAME
418 and WIDTH. Assert-fails if the given NAME would duplicate
419 that of an existing variable in the dictionary. */
421 dict_create_var_assert (struct dictionary *d, const char *name, int width)
423 assert (dict_lookup_var (d, name) == NULL);
424 return add_var (d, var_create (name, width));
427 /* Creates and returns a new variable in D, as a copy of existing variable
428 OLD_VAR, which need not be in D or in any dictionary. Returns a null
429 pointer if OLD_VAR's name would duplicate that of an existing variable in
432 dict_clone_var (struct dictionary *d, const struct variable *old_var)
434 return dict_clone_var_as (d, old_var, var_get_name (old_var));
437 /* Creates and returns a new variable in D, as a copy of existing variable
438 OLD_VAR, which need not be in D or in any dictionary. Assert-fails if
439 OLD_VAR's name would duplicate that of an existing variable in the
442 dict_clone_var_assert (struct dictionary *d, const struct variable *old_var)
444 return dict_clone_var_as_assert (d, old_var, var_get_name (old_var));
447 /* Creates and returns a new variable in D with name NAME, as a copy of
448 existing variable OLD_VAR, which need not be in D or in any dictionary.
449 Returns a null pointer if the given NAME would duplicate that of an existing
450 variable in the dictionary. */
452 dict_clone_var_as (struct dictionary *d, const struct variable *old_var,
455 return (dict_lookup_var (d, name) == NULL
456 ? dict_clone_var_as_assert (d, old_var, name)
460 /* Creates and returns a new variable in D with name NAME, as a copy of
461 existing variable OLD_VAR, which need not be in D or in any dictionary.
462 Assert-fails if the given NAME would duplicate that of an existing variable
463 in the dictionary. */
465 dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
468 struct variable *new_var = var_clone (old_var);
469 assert (dict_lookup_var (d, name) == NULL);
470 var_set_name (new_var, name);
471 return add_var (d, new_var);
474 /* Returns the variable named NAME in D, or a null pointer if no
475 variable has that name. */
477 dict_lookup_var (const struct dictionary *d, const char *name)
479 struct vardict_info *vardict;
481 HMAP_FOR_EACH_WITH_HASH (vardict, struct vardict_info, name_node,
482 hash_case_string (name, 0), &d->name_map)
484 struct variable *var = vardict->var;
485 if (!strcasecmp (var_get_name (var), name))
492 /* Returns the variable named NAME in D. Assert-fails if no
493 variable has that name. */
495 dict_lookup_var_assert (const struct dictionary *d, const char *name)
497 struct variable *v = dict_lookup_var (d, name);
502 /* Returns true if variable V is in dictionary D,
505 dict_contains_var (const struct dictionary *d, const struct variable *v)
507 return (var_has_vardict (v)
508 && vardict_get_dictionary (var_get_vardict (v)) == d);
511 /* Compares two double pointers to variables, which should point
512 to elements of a struct dictionary's `var' member array. */
514 compare_var_ptrs (const void *a_, const void *b_, const void *aux UNUSED)
516 struct variable *const *a = a_;
517 struct variable *const *b = b_;
519 return *a < *b ? -1 : *a > *b;
523 unindex_var (struct dictionary *d, struct vardict_info *vardict)
525 hmap_delete (&d->name_map, &vardict->name_node);
528 /* This function assumes that vardict->name_node.hash is valid, that is, that
529 its name has not changed since it was hashed (rename_var() updates this
530 hash along with the name itself). */
532 reindex_var (struct dictionary *d, struct vardict_info *vardict)
534 struct variable *var = vardict->var;
536 var_set_vardict (var, vardict);
537 hmap_insert_fast (&d->name_map, &vardict->name_node,
538 vardict->name_node.hash);
540 if ( d->changed ) d->changed (d, d->changed_data);
541 if ( d->callbacks && d->callbacks->var_changed )
542 d->callbacks->var_changed (d, var_get_dict_index (var), d->cb_data);
545 /* Sets the case_index in V's vardict to CASE_INDEX. */
547 set_var_case_index (struct variable *v, int case_index)
549 var_get_vardict (v)->case_index = case_index;
552 /* Removes the dictionary variables with indexes from FROM to TO (exclusive)
555 unindex_vars (struct dictionary *d, size_t from, size_t to)
559 for (i = from; i < to; i++)
560 unindex_var (d, &d->var[i]);
563 /* Re-sets the dict_index in the dictionary variables with
564 indexes from FROM to TO (exclusive). */
566 reindex_vars (struct dictionary *d, size_t from, size_t to)
570 for (i = from; i < to; i++)
571 reindex_var (d, &d->var[i]);
574 /* Deletes variable V from dictionary D and frees V.
576 This is a very bad idea if there might be any pointers to V
577 from outside D. In general, no variable in the active file's
578 dictionary should be deleted when any transformations are
579 active on the dictionary's dataset, because those
580 transformations might reference the deleted variable. The
581 safest time to delete a variable is just after a procedure has
582 been executed, as done by DELETE VARIABLES.
584 Pointers to V within D are not a problem, because
585 dict_delete_var() knows to remove V from split variables,
586 weights, filters, etc. */
588 dict_delete_var (struct dictionary *d, struct variable *v)
590 int dict_index = var_get_dict_index (v);
591 const int case_index = var_get_case_index (v);
592 const int width = var_get_width (v);
594 assert (dict_contains_var (d, v));
596 /* Delete aux data. */
599 dict_unset_split_var (d, v);
600 dict_unset_mrset_var (d, v);
603 dict_set_weight (d, NULL);
606 dict_set_filter (d, NULL);
608 dict_clear_vectors (d);
610 /* Remove V from var array. */
611 unindex_vars (d, dict_index, d->var_cnt);
612 remove_element (d->var, d->var_cnt, sizeof *d->var, dict_index);
615 /* Update dict_index for each affected variable. */
616 reindex_vars (d, dict_index, d->var_cnt);
619 var_clear_vardict (v);
622 if ( d->changed ) d->changed (d, d->changed_data);
624 invalidate_proto (d);
625 if (d->callbacks && d->callbacks->var_deleted )
626 d->callbacks->var_deleted (d, dict_index, case_index, width, d->cb_data);
629 /* Deletes the COUNT variables listed in VARS from D. This is
630 unsafe; see the comment on dict_delete_var() for details. */
632 dict_delete_vars (struct dictionary *d,
633 struct variable *const *vars, size_t count)
635 /* FIXME: this can be done in O(count) time, but this algorithm
637 assert (count == 0 || vars != NULL);
640 dict_delete_var (d, *vars++);
643 /* Deletes the COUNT variables in D starting at index IDX. This
644 is unsafe; see the comment on dict_delete_var() for
647 dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
649 /* FIXME: this can be done in O(count) time, but this algorithm
651 assert (idx + count <= d->var_cnt);
654 dict_delete_var (d, d->var[idx].var);
657 /* Deletes scratch variables from dictionary D. */
659 dict_delete_scratch_vars (struct dictionary *d)
663 /* FIXME: this can be done in O(count) time, but this algorithm
665 for (i = 0; i < d->var_cnt; )
666 if (var_get_dict_class (d->var[i].var) == DC_SCRATCH)
667 dict_delete_var (d, d->var[i].var);
672 /* Moves V to 0-based position IDX in D. Other variables in D,
673 if any, retain their relative positions. Runs in time linear
674 in the distance moved. */
676 dict_reorder_var (struct dictionary *d, struct variable *v, size_t new_index)
678 size_t old_index = var_get_dict_index (v);
680 assert (new_index < d->var_cnt);
682 unindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
683 move_element (d->var, d->var_cnt, sizeof *d->var, old_index, new_index);
684 reindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
687 /* Reorders the variables in D, placing the COUNT variables
688 listed in ORDER in that order at the beginning of D. The
689 other variables in D, if any, retain their relative
692 dict_reorder_vars (struct dictionary *d,
693 struct variable *const *order, size_t count)
695 struct vardict_info *new_var;
698 assert (count == 0 || order != NULL);
699 assert (count <= d->var_cnt);
701 new_var = xnmalloc (d->var_cap, sizeof *new_var);
703 /* Add variables in ORDER to new_var. */
704 for (i = 0; i < count; i++)
706 struct vardict_info *old_var;
708 assert (dict_contains_var (d, order[i]));
710 old_var = var_get_vardict (order[i]);
711 new_var[i] = *old_var;
712 old_var->dict = NULL;
715 /* Add remaining variables to new_var. */
716 for (i = 0; i < d->var_cnt; i++)
717 if (d->var[i].dict != NULL)
718 new_var[count++] = d->var[i];
719 assert (count == d->var_cnt);
721 /* Replace old vardicts by new ones. */
725 hmap_clear (&d->name_map);
726 reindex_vars (d, 0, d->var_cnt);
729 /* Changes the name of variable V that is currently in a dictionary to
732 rename_var (struct variable *v, const char *new_name)
734 struct vardict_info *vardict = var_get_vardict (v);
735 var_clear_vardict (v);
736 var_set_name (v, new_name);
737 vardict->name_node.hash = hash_case_string (new_name, 0);
738 var_set_vardict (v, vardict);
741 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
742 a variable named NEW_NAME is already in D, except that
743 NEW_NAME may be the same as V's existing name. */
745 dict_rename_var (struct dictionary *d, struct variable *v,
746 const char *new_name)
748 assert (!strcasecmp (var_get_name (v), new_name)
749 || dict_lookup_var (d, new_name) == NULL);
751 unindex_var (d, var_get_vardict (v));
752 rename_var (v, new_name);
753 reindex_var (d, var_get_vardict (v));
755 if (settings_get_algorithm () == ENHANCED)
756 var_clear_short_names (v);
758 if ( d->changed ) d->changed (d, d->changed_data);
759 if ( d->callbacks && d->callbacks->var_changed )
760 d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data);
763 /* Renames COUNT variables specified in VARS to the names given
764 in NEW_NAMES within dictionary D. If the renaming would
765 result in a duplicate variable name, returns false and stores a
766 name that would be duplicated into *ERR_NAME (if ERR_NAME is
767 non-null). Otherwise, the renaming is successful, and true
770 dict_rename_vars (struct dictionary *d,
771 struct variable **vars, char **new_names, size_t count,
778 assert (count == 0 || vars != NULL);
779 assert (count == 0 || new_names != NULL);
781 /* Save the names of the variables to be renamed. */
782 pool = pool_create ();
783 old_names = pool_nalloc (pool, count, sizeof *old_names);
784 for (i = 0; i < count; i++)
785 old_names[i] = pool_strdup (pool, var_get_name (vars[i]));
787 /* Remove the variables to be renamed from the name hash,
789 for (i = 0; i < count; i++)
791 unindex_var (d, var_get_vardict (vars[i]));
792 rename_var (vars[i], new_names[i]);
795 /* Add the renamed variables back into the name hash,
796 checking for conflicts. */
797 for (i = 0; i < count; i++)
799 if (dict_lookup_var (d, var_get_name (vars[i])) != NULL)
801 /* There is a name conflict.
802 Back out all the name changes that have already
803 taken place, and indicate failure. */
805 if (err_name != NULL)
806 *err_name = new_names[i];
808 for (i = 0; i < fail_idx; i++)
809 unindex_var (d, var_get_vardict (vars[i]));
811 for (i = 0; i < count; i++)
813 rename_var (vars[i], old_names[i]);
814 reindex_var (d, var_get_vardict (vars[i]));
820 reindex_var (d, var_get_vardict (vars[i]));
823 /* Clear short names. */
824 if (settings_get_algorithm () == ENHANCED)
825 for (i = 0; i < count; i++)
826 var_clear_short_names (vars[i]);
832 /* Returns true if a variable named NAME may be inserted in DICT;
833 that is, if there is not already a variable with that name in
834 DICT and if NAME is not a reserved word. (The caller's checks
835 have already verified that NAME is otherwise acceptable as a
838 var_name_is_insertable (const struct dictionary *dict, const char *name)
840 return (dict_lookup_var (dict, name) == NULL
841 && lex_id_to_token (ss_cstr (name)) == T_ID);
845 make_hinted_name (const struct dictionary *dict, const char *hint,
846 char name[VAR_NAME_LEN + 1])
848 bool dropped = false;
851 for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++)
854 ? lex_is_id1 (*hint) && *hint != '$'
855 : lex_is_idn (*hint))
862 if (cp < name + VAR_NAME_LEN)
872 size_t len = strlen (name);
875 if (var_name_is_insertable (dict, name))
878 for (i = 0; i < ULONG_MAX; i++)
880 char suffix[INT_BUFSIZE_BOUND (i) + 1];
884 if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1))
887 ofs = MIN (VAR_NAME_LEN - strlen (suffix), len);
888 strcpy (&name[ofs], suffix);
890 if (var_name_is_insertable (dict, name))
899 make_numeric_name (const struct dictionary *dict, unsigned long int *num_start,
900 char name[VAR_NAME_LEN + 1])
902 unsigned long int number;
904 for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
908 sprintf (name, "VAR%03lu", number);
909 if (dict_lookup_var (dict, name) == NULL)
911 if (num_start != NULL)
912 *num_start = number + 1;
917 if (num_start != NULL)
918 *num_start = ULONG_MAX;
923 /* Attempts to devise a variable name unique within DICT.
924 Returns true if successful, in which case the new variable
925 name is stored into NAME. Returns false if all names that can
926 be generated have already been taken. (Returning false is
927 quite unlikely: at least ULONG_MAX unique names can be
930 HINT, if it is non-null, is used as a suggestion that will be
931 modified for suitability as a variable name and for
934 If HINT is null or entirely unsuitable, a name in the form
935 "VAR%03d" will be generated, where the smallest unused integer
936 value is used. If NUM_START is non-null, then its value is
937 used as the minimum numeric value to check, and it is updated
938 to the next value to be checked.
941 dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
942 unsigned long int *num_start,
943 char name[VAR_NAME_LEN + 1])
945 return ((hint != NULL && make_hinted_name (dict, hint, name))
946 || make_numeric_name (dict, num_start, name));
949 /* Returns the weighting variable in dictionary D, or a null
950 pointer if the dictionary is unweighted. */
952 dict_get_weight (const struct dictionary *d)
954 assert (d->weight == NULL || dict_contains_var (d, d->weight));
959 /* Returns the value of D's weighting variable in case C, except
960 that a negative weight is returned as 0. Returns 1 if the
961 dictionary is unweighted. Will warn about missing, negative,
962 or zero values if *WARN_ON_INVALID is true. The function will
963 set *WARN_ON_INVALID to false if an invalid weight is
966 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
967 bool *warn_on_invalid)
971 if (d->weight == NULL)
975 double w = case_num (c, d->weight);
976 if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY))
978 if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) {
979 *warn_on_invalid = false;
980 msg (SW, _("At least one case in the data file had a weight value "
981 "that was user-missing, system-missing, zero, or "
982 "negative. These case(s) were ignored."));
988 /* Sets the weighting variable of D to V, or turning off
989 weighting if V is a null pointer. */
991 dict_set_weight (struct dictionary *d, struct variable *v)
993 assert (v == NULL || dict_contains_var (d, v));
994 assert (v == NULL || var_is_numeric (v));
998 if (d->changed) d->changed (d, d->changed_data);
999 if ( d->callbacks && d->callbacks->weight_changed )
1000 d->callbacks->weight_changed (d,
1001 v ? var_get_dict_index (v) : -1,
1005 /* Returns the filter variable in dictionary D (see cmd_filter())
1006 or a null pointer if the dictionary is unfiltered. */
1008 dict_get_filter (const struct dictionary *d)
1010 assert (d->filter == NULL || dict_contains_var (d, d->filter));
1015 /* Sets V as the filter variable for dictionary D. Passing a
1016 null pointer for V turn off filtering. */
1018 dict_set_filter (struct dictionary *d, struct variable *v)
1020 assert (v == NULL || dict_contains_var (d, v));
1021 assert (v == NULL || var_is_numeric (v));
1025 if (d->changed) d->changed (d, d->changed_data);
1026 if ( d->callbacks && d->callbacks->filter_changed )
1027 d->callbacks->filter_changed (d,
1028 v ? var_get_dict_index (v) : -1,
1032 /* Returns the case limit for dictionary D, or zero if the number
1033 of cases is unlimited. */
1035 dict_get_case_limit (const struct dictionary *d)
1037 return d->case_limit;
1040 /* Sets CASE_LIMIT as the case limit for dictionary D. Use
1041 0 for CASE_LIMIT to indicate no limit. */
1043 dict_set_case_limit (struct dictionary *d, casenumber case_limit)
1045 d->case_limit = case_limit;
1048 /* Returns the prototype used for cases created by dictionary D. */
1049 const struct caseproto *
1050 dict_get_proto (const struct dictionary *d_)
1052 struct dictionary *d = CONST_CAST (struct dictionary *, d_);
1053 if (d->proto == NULL)
1057 d->proto = caseproto_create ();
1058 d->proto = caseproto_reserve (d->proto, d->var_cnt);
1059 for (i = 0; i < d->var_cnt; i++)
1060 d->proto = caseproto_set_width (d->proto,
1061 var_get_case_index (d->var[i].var),
1062 var_get_width (d->var[i].var));
1067 /* Returns the case index of the next value to be added to D.
1068 This value is the number of `union value's that need to be
1069 allocated to store a case for dictionary D. */
1071 dict_get_next_value_idx (const struct dictionary *d)
1073 return d->next_value_idx;
1076 /* Returns the number of bytes needed to store a case for
1079 dict_get_case_size (const struct dictionary *d)
1081 return sizeof (union value) * dict_get_next_value_idx (d);
1084 /* Reassigns values in dictionary D so that fragmentation is
1087 dict_compact_values (struct dictionary *d)
1091 d->next_value_idx = 0;
1092 for (i = 0; i < d->var_cnt; i++)
1094 struct variable *v = d->var[i].var;
1095 set_var_case_index (v, d->next_value_idx++);
1097 invalidate_proto (d);
1100 /* Returns the number of values occupied by the variables in
1101 dictionary D. All variables are considered if EXCLUDE_CLASSES
1102 is 0, or it may contain one or more of (1u << DC_ORDINARY),
1103 (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the
1104 corresponding type of variable.
1106 The return value may be less than the number of values in one
1107 of dictionary D's cases (as returned by
1108 dict_get_next_value_idx) even if E is 0, because there may be
1109 gaps in D's cases due to deleted variables. */
1111 dict_count_values (const struct dictionary *d, unsigned int exclude_classes)
1116 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1118 | (1u << DC_SCRATCH))) == 0);
1121 for (i = 0; i < d->var_cnt; i++)
1123 enum dict_class class = var_get_dict_class (d->var[i].var);
1124 if (!(exclude_classes & (1u << class)))
1130 /* Returns the case prototype that would result after deleting
1131 all variables from D that are not in one of the
1132 EXCLUDE_CLASSES and compacting the dictionary with
1135 The caller must unref the returned caseproto when it is no
1138 dict_get_compacted_proto (const struct dictionary *d,
1139 unsigned int exclude_classes)
1141 struct caseproto *proto;
1144 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1146 | (1u << DC_SCRATCH))) == 0);
1148 proto = caseproto_create ();
1149 for (i = 0; i < d->var_cnt; i++)
1151 struct variable *v = d->var[i].var;
1152 if (!(exclude_classes & (1u << var_get_dict_class (v))))
1153 proto = caseproto_add_width (proto, var_get_width (v));
1158 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
1159 dict_get_split_cnt() to determine how many SPLIT FILE vars
1160 there are. Returns a null pointer if and only if there are no
1162 const struct variable *const *
1163 dict_get_split_vars (const struct dictionary *d)
1168 /* Returns the number of SPLIT FILE vars. */
1170 dict_get_split_cnt (const struct dictionary *d)
1172 return d->split_cnt;
1175 /* Removes variable V, which must be in D, from D's set of split
1178 dict_unset_split_var (struct dictionary *d, struct variable *v)
1182 assert (dict_contains_var (d, v));
1184 orig_count = d->split_cnt;
1185 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
1186 &v, compare_var_ptrs, NULL);
1187 if (orig_count != d->split_cnt)
1189 if (d->changed) d->changed (d, d->changed_data);
1190 /* We changed the set of split variables so invoke the
1192 if (d->callbacks && d->callbacks->split_changed)
1193 d->callbacks->split_changed (d, d->cb_data);
1197 /* Sets CNT split vars SPLIT in dictionary D. */
1199 dict_set_split_vars (struct dictionary *d,
1200 struct variable *const *split, size_t cnt)
1202 assert (cnt == 0 || split != NULL);
1207 d->split = xnrealloc (d->split, cnt, sizeof *d->split) ;
1208 memcpy (d->split, split, cnt * sizeof *d->split);
1216 if (d->changed) d->changed (d, d->changed_data);
1217 if ( d->callbacks && d->callbacks->split_changed )
1218 d->callbacks->split_changed (d, d->cb_data);
1221 /* Returns the file label for D, or a null pointer if D is
1222 unlabeled (see cmd_file_label()). */
1224 dict_get_label (const struct dictionary *d)
1229 /* Sets D's file label to LABEL, truncating it to a maximum of 60
1232 dict_set_label (struct dictionary *d, const char *label)
1235 d->label = label != NULL ? xstrndup (label, 60) : NULL;
1238 /* Returns the documents for D, or a null pointer if D has no
1239 documents. If the return value is nonnull, then the string
1240 will be an exact multiple of DOC_LINE_LENGTH bytes in length,
1241 with each segment corresponding to one line. */
1243 dict_get_documents (const struct dictionary *d)
1245 return ds_is_empty (&d->documents) ? NULL : ds_cstr (&d->documents);
1248 /* Sets the documents for D to DOCUMENTS, or removes D's
1249 documents if DOCUMENT is a null pointer. If DOCUMENTS is
1250 nonnull, then it should be an exact multiple of
1251 DOC_LINE_LENGTH bytes in length, with each segment
1252 corresponding to one line. */
1254 dict_set_documents (struct dictionary *d, const char *documents)
1258 ds_assign_cstr (&d->documents, documents != NULL ? documents : "");
1260 /* In case the caller didn't get it quite right, pad out the
1261 final line with spaces. */
1262 remainder = ds_length (&d->documents) % DOC_LINE_LENGTH;
1264 ds_put_char_multiple (&d->documents, ' ', DOC_LINE_LENGTH - remainder);
1267 /* Drops the documents from dictionary D. */
1269 dict_clear_documents (struct dictionary *d)
1271 ds_clear (&d->documents);
1274 /* Appends LINE to the documents in D. LINE will be truncated or
1275 padded on the right with spaces to make it exactly
1276 DOC_LINE_LENGTH bytes long. */
1278 dict_add_document_line (struct dictionary *d, const char *line)
1280 if (strlen (line) > DOC_LINE_LENGTH)
1282 /* Note to translators: "bytes" is correct, not characters */
1283 msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
1285 buf_copy_str_rpad (ds_put_uninit (&d->documents, DOC_LINE_LENGTH),
1286 DOC_LINE_LENGTH, line, ' ');
1289 /* Returns the number of document lines in dictionary D. */
1291 dict_get_document_line_cnt (const struct dictionary *d)
1293 return ds_length (&d->documents) / DOC_LINE_LENGTH;
1296 /* Copies document line number IDX from dictionary D into
1297 LINE, trimming off any trailing white space. */
1299 dict_get_document_line (const struct dictionary *d,
1300 size_t idx, struct string *line)
1302 assert (idx < dict_get_document_line_cnt (d));
1303 ds_assign_substring (line, ds_substr (&d->documents, idx * DOC_LINE_LENGTH,
1305 ds_rtrim (line, ss_cstr (CC_SPACES));
1308 /* Creates in D a vector named NAME that contains the CNT
1309 variables in VAR. Returns true if successful, or false if a
1310 vector named NAME already exists in D. */
1312 dict_create_vector (struct dictionary *d,
1314 struct variable **var, size_t cnt)
1319 for (i = 0; i < cnt; i++)
1320 assert (dict_contains_var (d, var[i]));
1322 if (dict_lookup_vector (d, name) == NULL)
1324 d->vector = xnrealloc (d->vector, d->vector_cnt + 1, sizeof *d->vector);
1325 d->vector[d->vector_cnt++] = vector_create (name, var, cnt);
1332 /* Creates in D a vector named NAME that contains the CNT
1333 variables in VAR. A vector named NAME must not already exist
1336 dict_create_vector_assert (struct dictionary *d,
1338 struct variable **var, size_t cnt)
1340 assert (dict_lookup_vector (d, name) == NULL);
1341 dict_create_vector (d, name, var, cnt);
1344 /* Returns the vector in D with index IDX, which must be less
1345 than dict_get_vector_cnt (D). */
1346 const struct vector *
1347 dict_get_vector (const struct dictionary *d, size_t idx)
1349 assert (idx < d->vector_cnt);
1351 return d->vector[idx];
1354 /* Returns the number of vectors in D. */
1356 dict_get_vector_cnt (const struct dictionary *d)
1358 return d->vector_cnt;
1361 /* Looks up and returns the vector within D with the given
1363 const struct vector *
1364 dict_lookup_vector (const struct dictionary *d, const char *name)
1367 for (i = 0; i < d->vector_cnt; i++)
1368 if (!strcasecmp (vector_get_name (d->vector[i]), name))
1369 return d->vector[i];
1373 /* Deletes all vectors from D. */
1375 dict_clear_vectors (struct dictionary *d)
1379 for (i = 0; i < d->vector_cnt; i++)
1380 vector_destroy (d->vector[i]);
1387 /* Multiple response sets. */
1389 /* Returns the multiple response set in DICT with index IDX, which must be
1390 between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
1391 const struct mrset *
1392 dict_get_mrset (const struct dictionary *dict, size_t idx)
1394 assert (idx < dict->n_mrsets);
1395 return dict->mrsets[idx];
1398 /* Returns the number of multiple response sets in DICT. */
1400 dict_get_n_mrsets (const struct dictionary *dict)
1402 return dict->n_mrsets;
1405 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1406 returns its index; otherwise, returns SIZE_MAX. */
1408 dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
1412 for (i = 0; i < dict->n_mrsets; i++)
1413 if (!strcasecmp (name, dict->mrsets[i]->name))
1419 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1420 returns it; otherwise, returns NULL. */
1421 const struct mrset *
1422 dict_lookup_mrset (const struct dictionary *dict, const char *name)
1424 size_t idx = dict_lookup_mrset_idx (dict, name);
1425 return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
1428 /* Adds MRSET to DICT, replacing any existing set with the same name. Returns
1429 true if a set was replaced, false if none existed with the specified name.
1431 Ownership of MRSET is transferred to DICT. */
1433 dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
1437 assert (mrset_ok (mrset, dict));
1439 idx = dict_lookup_mrset_idx (dict, mrset->name);
1440 if (idx == SIZE_MAX)
1442 dict->mrsets = xrealloc (dict->mrsets,
1443 (dict->n_mrsets + 1) * sizeof *dict->mrsets);
1444 dict->mrsets[dict->n_mrsets++] = mrset;
1449 mrset_destroy (dict->mrsets[idx]);
1450 dict->mrsets[idx] = mrset;
1455 /* Looks for a multiple response set in DICT named NAME. If found, removes it
1456 from DICT and returns true. If none is found, returns false without
1459 Deleting one multiple response set causes the indexes of other sets within
1462 dict_delete_mrset (struct dictionary *dict, const char *name)
1464 size_t idx = dict_lookup_mrset_idx (dict, name);
1465 if (idx != SIZE_MAX)
1467 mrset_destroy (dict->mrsets[idx]);
1468 dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
1475 /* Deletes all multiple response sets from DICT. */
1477 dict_clear_mrsets (struct dictionary *dict)
1481 for (i = 0; i < dict->n_mrsets; i++)
1482 mrset_destroy (dict->mrsets[i]);
1483 free (dict->mrsets);
1484 dict->mrsets = NULL;
1488 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1490 dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
1494 assert (dict_contains_var (dict, var));
1496 for (i = 0; i < dict->n_mrsets; )
1498 struct mrset *mrset = dict->mrsets[i];
1501 for (j = 0; j < mrset->n_vars; )
1502 if (mrset->vars[j] == var)
1503 remove_element (mrset->vars, mrset->n_vars--,
1504 sizeof *mrset->vars, j);
1508 if (mrset->n_vars < 2)
1510 mrset_destroy (mrset);
1511 dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
1518 /* Returns D's attribute set. The caller may examine or modify
1519 the attribute set, but must not destroy it. Destroying D or
1520 calling dict_set_attributes for D will also destroy D's
1523 dict_get_attributes (const struct dictionary *d)
1525 return CONST_CAST (struct attrset *, &d->attributes);
1528 /* Replaces D's attributes set by a copy of ATTRS. */
1530 dict_set_attributes (struct dictionary *d, const struct attrset *attrs)
1532 attrset_destroy (&d->attributes);
1533 attrset_clone (&d->attributes, attrs);
1536 /* Returns true if D has at least one attribute in its attribute
1537 set, false if D's attribute set is empty. */
1539 dict_has_attributes (const struct dictionary *d)
1541 return attrset_count (&d->attributes) > 0;
1544 /* Called from variable.c to notify the dictionary that some property of
1545 the variable has changed */
1547 dict_var_changed (const struct variable *v)
1549 if ( var_has_vardict (v))
1551 const struct vardict_info *vardict = var_get_vardict (v);
1552 struct dictionary *d = vardict->dict;
1557 if (d->changed ) d->changed (d, d->changed_data);
1558 if ( d->callbacks && d->callbacks->var_changed )
1559 d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data);
1564 /* Called from variable.c to notify the dictionary that the variable's width
1567 dict_var_resized (const struct variable *v, int old_width)
1569 if ( var_has_vardict (v))
1571 const struct vardict_info *vardict = var_get_vardict (v);
1572 struct dictionary *d;
1576 if (d->changed) d->changed (d, d->changed_data);
1578 invalidate_proto (d);
1579 if ( d->callbacks && d->callbacks->var_resized )
1580 d->callbacks->var_resized (d, var_get_dict_index (v), old_width,
1585 /* Called from variable.c to notify the dictionary that the variable's display width
1588 dict_var_display_width_changed (const struct variable *v)
1590 if ( var_has_vardict (v))
1592 const struct vardict_info *vardict = var_get_vardict (v);
1593 struct dictionary *d;
1597 if (d->changed) d->changed (d, d->changed_data);
1598 if ( d->callbacks && d->callbacks->var_display_width_changed )
1599 d->callbacks->var_display_width_changed (d, var_get_dict_index (v), d->cb_data);
1603 /* Dictionary used to contain "internal variables". */
1604 static struct dictionary *internal_dict;
1606 /* Create a variable of the specified WIDTH to be used for internal
1607 calculations only. The variable is assigned case index CASE_IDX. */
1609 dict_create_internal_var (int case_idx, int width)
1611 if (internal_dict == NULL)
1612 internal_dict = dict_create ();
1616 static int counter = INT_MAX / 2;
1617 struct variable *var;
1620 if (++counter == INT_MAX)
1621 counter = INT_MAX / 2;
1623 sprintf (name, "$internal%d", counter);
1624 var = dict_create_var (internal_dict, name, width);
1627 set_var_case_index (var, case_idx);
1633 /* Destroys VAR, which must have been created with
1634 dict_create_internal_var(). */
1636 dict_destroy_internal_var (struct variable *var)
1640 dict_delete_var (internal_dict, var);
1642 /* Destroy internal_dict if it has no variables left, just so that
1643 valgrind --leak-check --show-reachable won't show internal_dict. */
1644 if (dict_get_var_cnt (internal_dict) == 0)
1646 dict_destroy (internal_dict);
1647 internal_dict = NULL;
1653 vardict_get_dict_index (const struct vardict_info *vardict)
1655 return vardict - vardict->dict->var;