1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013, 2014,
3 2015, 2020 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "data/dictionary.h"
27 #include "data/attributes.h"
28 #include "data/case.h"
29 #include "data/identifier.h"
30 #include "data/mrset.h"
31 #include "data/settings.h"
32 #include "data/value-labels.h"
33 #include "data/vardict.h"
34 #include "data/variable.h"
35 #include "data/varset.h"
36 #include "data/vector.h"
37 #include "libpspp/array.h"
38 #include "libpspp/assertion.h"
39 #include "libpspp/compiler.h"
40 #include "libpspp/hash-functions.h"
41 #include "libpspp/hmap.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/pool.h"
46 #include "libpspp/str.h"
47 #include "libpspp/string-array.h"
48 #include "libpspp/ll.h"
50 #include "gl/intprops.h"
51 #include "gl/minmax.h"
52 #include "gl/xalloc.h"
53 #include "gl/xmemdup0.h"
56 #define _(msgid) gettext (msgid)
62 struct vardict_info *vars; /* Variables. */
63 size_t n_vars; /* Number of variables. */
64 size_t allocated_vars; /* Allocated space in 'vars'. */
65 struct caseproto *proto; /* Prototype for dictionary cases
67 struct hmap name_map; /* Variable index by name. */
68 int next_value_idx; /* Index of next `union value' to allocate. */
69 const struct variable **split; /* SPLIT FILE vars. */
70 size_t n_splits; /* SPLIT FILE count. */
71 enum split_type split_type;
72 struct variable *weight; /* WEIGHT variable. */
73 struct variable *filter; /* FILTER variable. */
74 casenumber case_limit; /* Current case limit (N command). */
75 char *label; /* File label. */
76 struct string_array documents; /* Documents. */
77 struct vector **vector; /* Vectors of variables. */
78 size_t n_vectors; /* Number of vectors. */
79 struct attrset attributes; /* Custom attributes. */
80 struct mrset **mrsets; /* Multiple response sets. */
81 size_t n_mrsets; /* Number of multiple response sets. */
82 struct varset **varsets; /* Variable sets. */
83 size_t n_varsets; /* Number of variable sets. */
85 /* Whether variable names must be valid identifiers. Normally, this is
86 true, but sometimes a dictionary is prepared for external use
87 (e.g. output to a CSV file) where names don't have to be valid. */
88 bool names_must_be_ids;
90 char *encoding; /* Character encoding of string data */
92 const struct dict_callbacks *callbacks; /* Callbacks on dictionary
94 void *cb_data ; /* Data passed to callbacks */
96 void (*changed) (struct dictionary *, void *); /* Generic change callback */
100 static void dict_unset_split_var (struct dictionary *, struct variable *, bool);
101 static void dict_unset_mrset_var (struct dictionary *, struct variable *);
102 static void dict_unset_varset_var (struct dictionary *, struct variable *);
104 /* Compares two double pointers to variables, which should point
105 to elements of a struct dictionary's `var' member array. */
107 compare_var_ptrs (const void *a_, const void *b_, const void *aux UNUSED)
109 struct variable *const *a = a_;
110 struct variable *const *b = b_;
112 return *a < *b ? -1 : *a > *b;
116 unindex_var (struct dictionary *d, struct vardict_info *vardict)
118 hmap_delete (&d->name_map, &vardict->name_node);
121 /* This function assumes that vardict->name_node.hash is valid, that is, that
122 its name has not changed since it was hashed (rename_var() updates this
123 hash along with the name itself). */
125 reindex_var (struct dictionary *d, struct vardict_info *vardict, bool skip_callbacks)
127 struct variable *old = (d->callbacks && d->callbacks->var_changed
128 ? var_clone (vardict->var)
131 struct variable *var = vardict->var;
132 var_set_vardict (var, vardict);
133 hmap_insert_fast (&d->name_map, &vardict->name_node,
134 vardict->name_node.hash);
136 if (! skip_callbacks)
138 if (d->changed) d->changed (d, d->changed_data);
141 d->callbacks->var_changed (d, var_get_dict_index (var), VAR_TRAIT_POSITION, old, d->cb_data);
147 /* Sets the case_index in V's vardict to CASE_INDEX. */
149 set_var_case_index (struct variable *v, int case_index)
151 var_get_vardict (v)->case_index = case_index;
154 /* Removes the dictionary variables with indexes from FROM to TO (exclusive)
157 unindex_vars (struct dictionary *d, size_t from, size_t to)
161 for (i = from; i < to; i++)
162 unindex_var (d, &d->vars[i]);
165 /* Re-sets the dict_index in the dictionary variables with
166 indexes from FROM to TO (exclusive). */
168 reindex_vars (struct dictionary *d, size_t from, size_t to, bool skip_callbacks)
172 for (i = from; i < to; i++)
173 reindex_var (d, &d->vars[i], skip_callbacks);
178 /* Returns the encoding for data in dictionary D. The return value is a
179 nonnull string that contains an IANA character set name. */
181 dict_get_encoding (const struct dictionary *d)
186 /* Checks whether UTF-8 string ID is an acceptable identifier in DICT's
187 encoding. Returns true if it is, otherwise an error message that the caller
189 char * WARN_UNUSED_RESULT
190 dict_id_is_valid__ (const struct dictionary *dict, const char *id)
192 if (!dict->names_must_be_ids)
194 return id_is_valid__ (id, dict->encoding);
198 error_to_bool (char *error)
209 /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
210 encoding, false otherwise. */
212 dict_id_is_valid (const struct dictionary *dict, const char *id)
214 return error_to_bool (dict_id_is_valid__ (dict, id));
218 dict_set_change_callback (struct dictionary *d,
219 void (*changed) (struct dictionary *, void*),
222 d->changed = changed;
223 d->changed_data = data;
226 /* Discards dictionary D's caseproto. (It will be regenerated
227 lazily, on demand.) */
229 invalidate_proto (struct dictionary *d)
231 caseproto_unref (d->proto);
235 /* Print a representation of dictionary D to stdout, for
236 debugging purposes. */
238 dict_dump (const struct dictionary *d)
241 for (i = 0 ; i < d->n_vars ; ++i)
243 const struct variable *v = d->vars[i].var;
244 printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
246 var_get_dict_index (v),
247 var_get_case_index (v));
252 /* Associate CALLBACKS with DICT. Callbacks will be invoked whenever
253 the dictionary or any of the variables it contains are modified.
254 Each callback will get passed CALLBACK_DATA.
255 Any callback may be NULL, in which case it'll be ignored.
258 dict_set_callbacks (struct dictionary *dict,
259 const struct dict_callbacks *callbacks,
262 dict->callbacks = callbacks;
263 dict->cb_data = callback_data;
266 /* Shallow copy the callbacks from SRC to DEST */
268 dict_copy_callbacks (struct dictionary *dest,
269 const struct dictionary *src)
271 dest->callbacks = src->callbacks;
272 dest->cb_data = src->cb_data;
275 /* Creates and returns a new dictionary with the specified ENCODING. */
277 dict_create (const char *encoding)
279 struct dictionary *d = xmalloc (sizeof *d);
281 *d = (struct dictionary) {
282 .encoding = xstrdup (encoding),
283 .names_must_be_ids = true,
284 .name_map = HMAP_INITIALIZER (d->name_map),
285 .attributes = ATTRSET_INITIALIZER (d->attributes),
286 .split_type = SPLIT_NONE,
293 /* Creates and returns a (deep) copy of an existing
296 The new dictionary's case indexes are copied from the old
297 dictionary. If the new dictionary won't be used to access
298 cases produced with the old dictionary, then the new
299 dictionary's case indexes should be compacted with
300 dict_compact_values to save space.
302 Callbacks are not cloned. */
304 dict_clone (const struct dictionary *s)
306 struct dictionary *d = dict_create (s->encoding);
307 dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s));
309 for (size_t i = 0; i < s->n_vars; i++)
311 struct variable *sv = s->vars[i].var;
312 struct variable *dv = dict_clone_var_assert (d, sv);
314 for (size_t j = 0; j < var_get_n_short_names (sv); j++)
315 var_set_short_name (dv, j, var_get_short_name (sv, j));
317 var_get_vardict (dv)->case_index = var_get_vardict (sv)->case_index;
320 d->next_value_idx = s->next_value_idx;
322 d->n_splits = s->n_splits;
325 d->split = xnmalloc (d->n_splits, sizeof *d->split);
326 for (size_t i = 0; i < d->n_splits; i++)
327 d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
329 d->split_type = s->split_type;
331 if (s->weight != NULL)
332 dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
334 if (s->filter != NULL)
335 dict_set_filter (d, dict_lookup_var_assert (d, var_get_name (s->filter)));
337 d->case_limit = s->case_limit;
338 dict_set_label (d, dict_get_label (s));
339 dict_set_documents (d, dict_get_documents (s));
341 d->n_vectors = s->n_vectors;
342 d->vector = xnmalloc (d->n_vectors, sizeof *d->vector);
343 for (size_t i = 0; i < s->n_vectors; i++)
344 d->vector[i] = vector_clone (s->vector[i], s, d);
346 dict_set_attributes (d, dict_get_attributes (s));
348 for (size_t i = 0; i < s->n_mrsets; i++)
350 const struct mrset *old = s->mrsets[i];
354 /* Clone old mrset, then replace vars from D by vars from S. */
355 new = mrset_clone (old);
356 for (j = 0; j < new->n_vars; j++)
357 new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
359 dict_add_mrset (d, new);
362 for (size_t i = 0; i < s->n_varsets; i++)
364 const struct varset *old = s->varsets[i];
366 /* Clone old varset, then replace vars from D by vars from S. */
367 struct varset *new = varset_clone (old);
368 for (size_t j = 0; j < new->n_vars; j++)
369 new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
371 dict_add_varset (d, new);
377 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
378 dict_get_n_splits() to determine how many SPLIT FILE vars
379 there are. Returns a null pointer if and only if there are no
381 const struct variable *const *
382 dict_get_split_vars (const struct dictionary *d)
387 /* Returns the number of SPLIT FILE vars. */
389 dict_get_n_splits (const struct dictionary *d)
394 /* Removes variable V, which must be in D, from D's set of split
397 dict_unset_split_var (struct dictionary *d, struct variable *v, bool skip_callbacks)
401 assert (dict_contains_var (d, v));
403 orig_count = d->n_splits;
404 d->n_splits = remove_equal (d->split, d->n_splits, sizeof *d->split,
405 &v, compare_var_ptrs, NULL);
406 if (orig_count != d->n_splits && !skip_callbacks)
408 if (d->changed) d->changed (d, d->changed_data);
409 /* We changed the set of split variables so invoke the
411 if (d->callbacks && d->callbacks->split_changed)
412 d->callbacks->split_changed (d, d->cb_data);
417 /* Sets N split vars SPLIT in dictionary D. N is silently capped to a maximum
420 dict_set_split_vars__ (struct dictionary *d,
421 struct variable *const *split, size_t n,
422 enum split_type type, bool skip_callbacks)
426 assert (n == 0 || split != NULL);
429 d->split_type = (n == 0 ? SPLIT_NONE
430 : type == SPLIT_NONE ? SPLIT_LAYERED
434 d->split = xnrealloc (d->split, n, sizeof *d->split) ;
435 memcpy (d->split, split, n * sizeof *d->split);
445 if (d->changed) d->changed (d, d->changed_data);
446 if (d->callbacks && d->callbacks->split_changed)
447 d->callbacks->split_changed (d, d->cb_data);
452 dict_get_split_type (const struct dictionary *d)
454 return d->split_type;
457 /* Sets N split vars SPLIT in dictionary D. */
459 dict_set_split_vars (struct dictionary *d,
460 struct variable *const *split, size_t n,
461 enum split_type type)
463 dict_set_split_vars__ (d, split, n, type, false);
467 dict_clear_split_vars (struct dictionary *d)
469 dict_set_split_vars (d, NULL, 0, SPLIT_NONE);
473 /* Deletes variable V from dictionary D and frees V.
475 This is a very bad idea if there might be any pointers to V
476 from outside D. In general, no variable in the active dataset's
477 dictionary should be deleted when any transformations are
478 active on the dictionary's dataset, because those
479 transformations might reference the deleted variable. The
480 safest time to delete a variable is just after a procedure has
481 been executed, as done by DELETE VARIABLES.
483 Pointers to V within D are not a problem, because
484 dict_delete_var() knows to remove V from split variables,
485 weights, filters, etc. */
487 dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks)
489 int dict_index = var_get_dict_index (v);
490 const int case_index = var_get_case_index (v);
492 assert (dict_contains_var (d, v));
494 dict_unset_split_var (d, v, skip_callbacks);
495 dict_unset_mrset_var (d, v);
496 dict_unset_varset_var (d, v);
499 dict_set_weight (d, NULL);
502 dict_set_filter (d, NULL);
504 dict_clear_vectors (d);
506 /* Remove V from var array. */
507 unindex_vars (d, dict_index, d->n_vars);
508 remove_element (d->vars, d->n_vars, sizeof *d->vars, dict_index);
511 /* Update dict_index for each affected variable. */
512 reindex_vars (d, dict_index, d->n_vars, skip_callbacks);
515 var_clear_vardict (v);
517 if (! skip_callbacks)
519 if (d->changed) d->changed (d, d->changed_data);
520 if (d->callbacks && d->callbacks->var_deleted)
521 d->callbacks->var_deleted (d, v, dict_index, case_index, d->cb_data);
524 invalidate_proto (d);
528 /* Deletes variable V from dictionary D and frees V.
530 This is a very bad idea if there might be any pointers to V
531 from outside D. In general, no variable in the active dataset's
532 dictionary should be deleted when any transformations are
533 active on the dictionary's dataset, because those
534 transformations might reference the deleted variable. The
535 safest time to delete a variable is just after a procedure has
536 been executed, as done by DELETE VARIABLES.
538 Pointers to V within D are not a problem, because
539 dict_delete_var() knows to remove V from split variables,
540 weights, filters, etc. */
542 dict_delete_var (struct dictionary *d, struct variable *v)
544 dict_delete_var__ (d, v, false);
548 /* Deletes the COUNT variables listed in VARS from D. This is
549 unsafe; see the comment on dict_delete_var() for details. */
551 dict_delete_vars (struct dictionary *d,
552 struct variable *const *vars, size_t count)
554 /* FIXME: this can be done in O(count) time, but this algorithm
556 assert (count == 0 || vars != NULL);
559 dict_delete_var (d, *vars++);
562 /* Deletes the COUNT variables in D starting at index IDX. This
563 is unsafe; see the comment on dict_delete_var() for
564 details. Deleting consecutive vars will result in less callbacks
565 compared to iterating over dict_delete_var.
566 A simple while loop over dict_delete_var will
567 produce (d->n_vars - IDX) * COUNT variable changed callbacks
568 plus COUNT variable delete callbacks.
569 This here produces d->n_vars - IDX variable changed callbacks
570 plus COUNT variable delete callbacks. */
572 dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
574 assert (idx + count <= d->n_vars);
576 /* We need to store the variable and the corresponding case_index
577 for the delete callbacks later. We store them in a linked list.*/
580 struct variable *var;
583 struct ll_list list = LL_INITIALIZER (list);
585 for (size_t i = idx; i < idx + count; i++)
587 struct delvar *dv = xmalloc (sizeof (struct delvar));
589 struct variable *v = d->vars[i].var;
591 dict_unset_split_var (d, v, false);
592 dict_unset_mrset_var (d, v);
593 dict_unset_varset_var (d, v);
596 dict_set_weight (d, NULL);
599 dict_set_filter (d, NULL);
602 dv->case_index = var_get_case_index (v);
603 ll_push_tail (&list, (struct ll *)dv);
606 dict_clear_vectors (d);
608 /* Remove variables from var array. */
609 unindex_vars (d, idx, d->n_vars);
610 remove_range (d->vars, d->n_vars, sizeof *d->vars, idx, count);
613 /* Reindexing will result variable-changed callback */
614 reindex_vars (d, idx, d->n_vars, false);
616 invalidate_proto (d);
617 if (d->changed) d->changed (d, d->changed_data);
619 /* Now issue the variable delete callbacks and delete
620 the variables. The vardict is not valid at this point
621 anymore. That is the reason why we stored the
622 caseindex before reindexing. */
623 for (size_t vi = idx; vi < idx + count; vi++)
625 struct delvar *dv = (struct delvar *) ll_pop_head (&list);
626 var_clear_vardict (dv->var);
627 if (d->callbacks && d->callbacks->var_deleted)
628 d->callbacks->var_deleted (d, dv->var, vi, dv->case_index, d->cb_data);
634 /* Deletes scratch variables from dictionary D. */
636 dict_delete_scratch_vars (struct dictionary *d)
640 /* FIXME: this can be done in O(count) time, but this algorithm
642 for (i = 0; i < d->n_vars;)
643 if (var_get_dict_class (d->vars[i].var) == DC_SCRATCH)
644 dict_delete_var (d, d->vars[i].var);
651 /* Clears the contents from a dictionary without destroying the
652 dictionary itself. */
654 dict_clear__ (struct dictionary *d, bool skip_callbacks)
656 /* FIXME? Should we really clear case_limit, label, documents?
657 Others are necessarily cleared by deleting all the variables.*/
658 while (d->n_vars > 0)
660 dict_delete_var__ (d, d->vars[d->n_vars - 1].var, skip_callbacks);
665 d->n_vars = d->allocated_vars = 0;
666 invalidate_proto (d);
667 hmap_clear (&d->name_map);
668 d->next_value_idx = 0;
669 dict_set_split_vars__ (d, NULL, 0, SPLIT_NONE, skip_callbacks);
678 dict_set_weight (d, NULL);
679 dict_set_filter (d, NULL);
684 string_array_clear (&d->documents);
685 dict_clear_vectors (d);
686 attrset_clear (&d->attributes);
689 /* Clears the contents from a dictionary without destroying the
690 dictionary itself. */
692 dict_clear (struct dictionary *d)
694 dict_clear__ (d, false);
697 /* Clears a dictionary and destroys it. */
699 _dict_destroy (struct dictionary *d)
701 /* In general, we don't want callbacks occurring, if the dictionary
702 is being destroyed */
703 d->callbacks = NULL ;
705 dict_clear__ (d, true);
706 string_array_destroy (&d->documents);
707 hmap_destroy (&d->name_map);
708 attrset_destroy (&d->attributes);
709 dict_clear_mrsets (d);
710 dict_clear_varsets (d);
716 dict_ref (struct dictionary *d)
723 dict_unref (struct dictionary *d)
728 assert (d->ref_cnt >= 0);
733 /* Returns the number of variables in D. */
735 dict_get_n_vars (const struct dictionary *d)
740 /* Returns the variable in D with dictionary index IDX, which
741 must be between 0 and the count returned by
742 dict_get_n_vars(), exclusive. */
744 dict_get_var (const struct dictionary *d, size_t idx)
746 assert (idx < d->n_vars);
748 return d->vars[idx].var;
751 /* Sets *VARS to an array of pointers to variables in D and *N
752 to the number of variables in *D. All variables are returned
753 except for those, if any, in the classes indicated by EXCLUDE.
754 (There is no point in putting DC_SYSTEM in EXCLUDE as
755 dictionaries never include system variables.) */
757 dict_get_vars (const struct dictionary *d, const struct variable ***vars,
758 size_t *n, enum dict_class exclude)
760 dict_get_vars_mutable (d, (struct variable ***) vars, n, exclude);
763 /* Sets *VARS to an array of pointers to variables in D and *N
764 to the number of variables in *D. All variables are returned
765 except for those, if any, in the classes indicated by EXCLUDE.
766 (There is no point in putting DC_SYSTEM in EXCLUDE as
767 dictionaries never include system variables.) */
769 dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars,
770 size_t *n, enum dict_class exclude)
775 assert (exclude == (exclude & DC_ALL));
778 for (i = 0; i < d->n_vars; i++)
780 enum dict_class class = var_get_dict_class (d->vars[i].var);
781 if (!(class & exclude))
785 *vars = xnmalloc (count, sizeof **vars);
787 for (i = 0; i < d->n_vars; i++)
789 enum dict_class class = var_get_dict_class (d->vars[i].var);
790 if (!(class & exclude))
791 (*vars)[(*n)++] = d->vars[i].var;
793 assert (*n == count);
796 static struct variable *
797 add_var_with_case_index (struct dictionary *d, struct variable *v,
800 struct vardict_info *vardict;
802 assert (case_index >= d->next_value_idx);
804 /* Update dictionary. */
805 if (d->n_vars >= d->allocated_vars)
809 d->vars = x2nrealloc (d->vars, &d->allocated_vars, sizeof *d->vars);
810 hmap_clear (&d->name_map);
811 for (i = 0; i < d->n_vars; i++)
813 var_set_vardict (d->vars[i].var, &d->vars[i]);
814 hmap_insert_fast (&d->name_map, &d->vars[i].name_node,
815 d->vars[i].name_node.hash);
819 vardict = &d->vars[d->n_vars++];
822 hmap_insert (&d->name_map, &vardict->name_node,
823 utf8_hash_case_string (var_get_name (v), 0));
824 vardict->case_index = case_index;
825 var_set_vardict (v, vardict);
827 if (d->changed) d->changed (d, d->changed_data);
828 if (d->callbacks && d->callbacks->var_added)
829 d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
831 invalidate_proto (d);
832 d->next_value_idx = case_index + 1;
837 static struct variable *
838 add_var (struct dictionary *d, struct variable *v)
840 return add_var_with_case_index (d, v, d->next_value_idx);
843 /* Creates and returns a new variable in D with the given NAME
844 and WIDTH. Returns a null pointer if the given NAME would
845 duplicate that of an existing variable in the dictionary. */
847 dict_create_var (struct dictionary *d, const char *name, int width)
849 return (dict_lookup_var (d, name) == NULL
850 ? dict_create_var_assert (d, name, width)
854 /* Creates and returns a new variable in D with the given NAME
855 and WIDTH. Assert-fails if the given NAME would duplicate
856 that of an existing variable in the dictionary. */
858 dict_create_var_assert (struct dictionary *d, const char *name, int width)
860 assert (dict_lookup_var (d, name) == NULL);
861 return add_var (d, var_create (name, width));
864 /* Creates and returns a new variable in D, as a copy of existing variable
865 OLD_VAR, which need not be in D or in any dictionary. Returns a null
866 pointer if OLD_VAR's name would duplicate that of an existing variable in
869 dict_clone_var (struct dictionary *d, const struct variable *old_var)
871 return dict_clone_var_as (d, old_var, var_get_name (old_var));
874 /* Creates and returns a new variable in D, as a copy of existing variable
875 OLD_VAR, which need not be in D or in any dictionary. Assert-fails if
876 OLD_VAR's name would duplicate that of an existing variable in the
879 dict_clone_var_assert (struct dictionary *d, const struct variable *old_var)
881 return dict_clone_var_as_assert (d, old_var, var_get_name (old_var));
884 /* Creates and returns a new variable in D with name NAME, as a copy of
885 existing variable OLD_VAR, which need not be in D or in any dictionary.
886 Returns a null pointer if the given NAME would duplicate that of an existing
887 variable in the dictionary. */
889 dict_clone_var_as (struct dictionary *d, const struct variable *old_var,
892 return (dict_lookup_var (d, name) == NULL
893 ? dict_clone_var_as_assert (d, old_var, name)
897 /* Creates and returns a new variable in D with name NAME, as a copy of
898 existing variable OLD_VAR, which need not be in D or in any dictionary.
899 Assert-fails if the given NAME would duplicate that of an existing variable
900 in the dictionary. */
902 dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
905 struct variable *new_var = var_clone (old_var);
906 assert (dict_lookup_var (d, name) == NULL);
907 var_set_name (new_var, name);
908 return add_var (d, new_var);
912 dict_clone_var_in_place_assert (struct dictionary *d,
913 const struct variable *old_var)
915 assert (dict_lookup_var (d, var_get_name (old_var)) == NULL);
916 return add_var_with_case_index (d, var_clone (old_var),
917 var_get_case_index (old_var));
920 /* Returns the variable named NAME in D, or a null pointer if no
921 variable has that name. */
923 dict_lookup_var (const struct dictionary *d, const char *name)
925 struct vardict_info *vardict;
927 HMAP_FOR_EACH_WITH_HASH (vardict, struct vardict_info, name_node,
928 utf8_hash_case_string (name, 0), &d->name_map)
930 struct variable *var = vardict->var;
931 if (!utf8_strcasecmp (var_get_name (var), name))
938 /* Returns the variable named NAME in D. Assert-fails if no
939 variable has that name. */
941 dict_lookup_var_assert (const struct dictionary *d, const char *name)
943 struct variable *v = dict_lookup_var (d, name);
948 /* Returns true if variable V is in dictionary D,
951 dict_contains_var (const struct dictionary *d, const struct variable *v)
953 return (var_has_vardict (v)
954 && vardict_get_dictionary (var_get_vardict (v)) == d);
957 /* Moves V to 0-based position IDX in D. Other variables in D,
958 if any, retain their relative positions. Runs in time linear
959 in the distance moved. */
961 dict_reorder_var (struct dictionary *d, struct variable *v, size_t new_index)
963 size_t old_index = var_get_dict_index (v);
965 assert (new_index < d->n_vars);
967 unindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
968 move_element (d->vars, d->n_vars, sizeof *d->vars, old_index, new_index);
969 reindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1, false);
972 /* Reorders the variables in D, placing the COUNT variables
973 listed in ORDER in that order at the beginning of D. The
974 other variables in D, if any, retain their relative
977 dict_reorder_vars (struct dictionary *d,
978 struct variable *const *order, size_t count)
980 struct vardict_info *new_var;
983 assert (count == 0 || order != NULL);
984 assert (count <= d->n_vars);
986 new_var = xnmalloc (d->allocated_vars, sizeof *new_var);
988 /* Add variables in ORDER to new_var. */
989 for (i = 0; i < count; i++)
991 struct vardict_info *old_var;
993 assert (dict_contains_var (d, order[i]));
995 old_var = var_get_vardict (order[i]);
996 new_var[i] = *old_var;
997 old_var->dict = NULL;
1000 /* Add remaining variables to new_var. */
1001 for (i = 0; i < d->n_vars; i++)
1002 if (d->vars[i].dict != NULL)
1003 new_var[count++] = d->vars[i];
1004 assert (count == d->n_vars);
1006 /* Replace old vardicts by new ones. */
1010 hmap_clear (&d->name_map);
1011 reindex_vars (d, 0, d->n_vars, false);
1014 /* Changes the name of variable V that is currently in a dictionary to
1017 rename_var (struct variable *v, const char *new_name)
1019 struct vardict_info *vardict = var_get_vardict (v);
1020 var_clear_vardict (v);
1021 var_set_name (v, new_name);
1022 vardict->name_node.hash = utf8_hash_case_string (new_name, 0);
1023 var_set_vardict (v, vardict);
1026 /* Tries to changes the name of V in D to name NEW_NAME. Returns true if
1027 successful, false if a variable (other than V) with the given name already
1030 dict_try_rename_var (struct dictionary *d, struct variable *v,
1031 const char *new_name)
1033 struct variable *conflict = dict_lookup_var (d, new_name);
1034 if (conflict && v != conflict)
1037 struct variable *old = var_clone (v);
1038 unindex_var (d, var_get_vardict (v));
1039 rename_var (v, new_name);
1040 reindex_var (d, var_get_vardict (v), false);
1042 if (settings_get_algorithm () == ENHANCED)
1043 var_clear_short_names (v);
1045 if (d->changed) d->changed (d, d->changed_data);
1046 if (d->callbacks && d->callbacks->var_changed)
1047 d->callbacks->var_changed (d, var_get_dict_index (v), VAR_TRAIT_NAME, old, d->cb_data);
1054 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
1055 a variable named NEW_NAME is already in D, except that
1056 NEW_NAME may be the same as V's existing name. */
1058 dict_rename_var (struct dictionary *d, struct variable *v,
1059 const char *new_name)
1061 bool ok UNUSED = dict_try_rename_var (d, v, new_name);
1065 /* Renames COUNT variables specified in VARS to the names given
1066 in NEW_NAMES within dictionary D. If the renaming would
1067 result in a duplicate variable name, returns false and stores a
1068 name that would be duplicated into *ERR_NAME (if ERR_NAME is
1069 non-null). Otherwise, the renaming is successful, and true
1072 dict_rename_vars (struct dictionary *d,
1073 struct variable **vars, char **new_names, size_t count,
1080 assert (count == 0 || vars != NULL);
1081 assert (count == 0 || new_names != NULL);
1083 /* Save the names of the variables to be renamed. */
1084 pool = pool_create ();
1085 old_names = pool_nalloc (pool, count, sizeof *old_names);
1086 for (i = 0; i < count; i++)
1087 old_names[i] = pool_strdup (pool, var_get_name (vars[i]));
1089 /* Remove the variables to be renamed from the name hash,
1091 for (i = 0; i < count; i++)
1093 unindex_var (d, var_get_vardict (vars[i]));
1094 rename_var (vars[i], new_names[i]);
1097 /* Add the renamed variables back into the name hash,
1098 checking for conflicts. */
1099 for (i = 0; i < count; i++)
1101 if (dict_lookup_var (d, var_get_name (vars[i])) != NULL)
1103 /* There is a name conflict.
1104 Back out all the name changes that have already
1105 taken place, and indicate failure. */
1106 size_t fail_idx = i;
1107 if (err_name != NULL)
1108 *err_name = new_names[i];
1110 for (i = 0; i < fail_idx; i++)
1111 unindex_var (d, var_get_vardict (vars[i]));
1113 for (i = 0; i < count; i++)
1115 rename_var (vars[i], old_names[i]);
1116 reindex_var (d, var_get_vardict (vars[i]), false);
1119 pool_destroy (pool);
1122 reindex_var (d, var_get_vardict (vars[i]), false);
1125 /* Clear short names. */
1126 if (settings_get_algorithm () == ENHANCED)
1127 for (i = 0; i < count; i++)
1128 var_clear_short_names (vars[i]);
1130 pool_destroy (pool);
1134 /* Returns true if a variable named NAME may be inserted in DICT;
1135 that is, if there is not already a variable with that name in
1136 DICT and if NAME is not a reserved word. (The caller's checks
1137 have already verified that NAME is otherwise acceptable as a
1140 var_name_is_insertable (const struct dictionary *dict, const char *name)
1142 return (dict_lookup_var (dict, name) == NULL
1143 && lex_id_to_token (ss_cstr (name)) == T_ID);
1147 make_hinted_name (const struct dictionary *dict, const char *hint)
1149 size_t hint_len = strlen (hint);
1150 bool dropped = false;
1155 if (hint_len > ID_MAX_LEN)
1156 hint_len = ID_MAX_LEN;
1158 /* The allocation size here is OK: characters that are copied directly fit
1159 OK, and characters that are not copied directly are replaced by a single
1160 '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get
1161 replaced by '_' too. */
1162 root = rp = xmalloc (hint_len + 1);
1163 for (ofs = 0; ofs < hint_len; ofs += mblen)
1167 mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
1170 ? lex_uc_is_id1 (uc) && uc != '$'
1171 : lex_uc_is_idn (uc))
1178 rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6);
1180 else if (rp != root)
1185 if (root[0] != '\0')
1187 unsigned long int i;
1189 if (var_name_is_insertable (dict, root))
1192 for (i = 0; i < ULONG_MAX; i++)
1194 char suffix[INT_BUFSIZE_BOUND (i) + 1];
1198 if (!str_format_26adic (i + 1, true, &suffix[1], sizeof suffix - 1))
1201 name = utf8_encoding_concat (root, suffix, dict->encoding, 64);
1202 if (var_name_is_insertable (dict, name))
1217 make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
1219 unsigned long int number;
1221 for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
1225 char name[3 + INT_STRLEN_BOUND (number) + 1];
1227 sprintf (name, "VAR%03lu", number);
1228 if (dict_lookup_var (dict, name) == NULL)
1230 if (num_start != NULL)
1231 *num_start = number + 1;
1232 return xstrdup (name);
1240 /* Devises and returns a variable name unique within DICT. The variable name
1241 is owned by the caller, which must free it with free() when it is no longer
1244 HINT, if it is non-null, is used as a suggestion that will be
1245 modified for suitability as a variable name and for
1248 If HINT is null or entirely unsuitable, a name in the form
1249 "VAR%03d" will be generated, where the smallest unused integer
1250 value is used. If NUM_START is non-null, then its value is
1251 used as the minimum numeric value to check, and it is updated
1252 to the next value to be checked.
1255 dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
1256 unsigned long int *num_start)
1260 char *hinted_name = make_hinted_name (dict, hint);
1261 if (hinted_name != NULL)
1265 return make_numeric_name (dict, num_start);
1268 /* Returns whether variable names must be valid identifiers. Normally, this is
1269 true, but sometimes a dictionary is prepared for external use (e.g. output
1270 to a CSV file) where names don't have to be valid. */
1272 dict_get_names_must_be_ids (const struct dictionary *d)
1274 return d->names_must_be_ids;
1277 /* Sets whether variable names must be valid identifiers. Normally, this is
1278 true, but sometimes a dictionary is prepared for external use (e.g. output
1279 to a CSV file) where names don't have to be valid.
1281 Changing this setting from false to true doesn't make the dictionary check
1282 all the existing variable names, so it can cause an invariant violation. */
1284 dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids)
1286 d->names_must_be_ids = names_must_be_ids;
1289 /* Returns the weighting variable in dictionary D, or a null
1290 pointer if the dictionary is unweighted. */
1292 dict_get_weight (const struct dictionary *d)
1294 assert (d->weight == NULL || dict_contains_var (d, d->weight));
1299 /* Returns the value of D's weighting variable in case C, except
1300 that a negative or missing weight is returned as 0. Returns 1 if the
1301 dictionary is unweighted. Will warn about missing, negative,
1302 or zero values if *WARN_ON_INVALID is true. The function will
1303 set *WARN_ON_INVALID to false if an invalid weight is
1306 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
1307 bool *warn_on_invalid)
1311 if (d->weight == NULL)
1315 double w = case_num (c, d->weight);
1317 return var_force_valid_weight (d->weight, w, warn_on_invalid);
1321 /* Like dict_get_case_weight(), but additionally rounds each weight to the
1324 dict_get_rounded_case_weight (const struct dictionary *d,
1325 const struct ccase *c, bool *warn_on_invalid)
1327 return floor (dict_get_case_weight (d, c, warn_on_invalid) + 0.5);
1330 /* Returns the format to use for weights. */
1332 dict_get_weight_format (const struct dictionary *d)
1334 return d->weight ? var_get_print_format (d->weight) : F_8_0;
1337 /* Sets the weighting variable of D to V, or turning off
1338 weighting if V is a null pointer. */
1340 dict_set_weight (struct dictionary *d, struct variable *v)
1342 assert (v == NULL || dict_contains_var (d, v));
1343 assert (v == NULL || var_is_numeric (v));
1347 if (d->changed) d->changed (d, d->changed_data);
1348 if (d->callbacks && d->callbacks->weight_changed)
1349 d->callbacks->weight_changed (d,
1350 v ? var_get_dict_index (v) : -1,
1354 /* Returns the filter variable in dictionary D (see cmd_filter())
1355 or a null pointer if the dictionary is unfiltered. */
1357 dict_get_filter (const struct dictionary *d)
1359 assert (d->filter == NULL || dict_contains_var (d, d->filter));
1364 /* Sets V as the filter variable for dictionary D. Passing a
1365 null pointer for V turn off filtering. */
1367 dict_set_filter (struct dictionary *d, struct variable *v)
1369 assert (v == NULL || dict_contains_var (d, v));
1370 assert (v == NULL || var_is_numeric (v));
1374 if (d->changed) d->changed (d, d->changed_data);
1375 if (d->callbacks && d->callbacks->filter_changed)
1376 d->callbacks->filter_changed (d,
1377 v ? var_get_dict_index (v) : -1,
1381 /* Returns the case limit for dictionary D, or zero if the number
1382 of cases is unlimited. */
1384 dict_get_case_limit (const struct dictionary *d)
1386 return d->case_limit;
1389 /* Sets CASE_LIMIT as the case limit for dictionary D. Use
1390 0 for CASE_LIMIT to indicate no limit. */
1392 dict_set_case_limit (struct dictionary *d, casenumber case_limit)
1394 d->case_limit = case_limit;
1397 /* Returns the prototype used for cases created by dictionary D. */
1398 const struct caseproto *
1399 dict_get_proto (const struct dictionary *d_)
1401 struct dictionary *d = CONST_CAST (struct dictionary *, d_);
1402 if (d->proto == NULL)
1406 d->proto = caseproto_create ();
1407 d->proto = caseproto_reserve (d->proto, d->n_vars);
1408 for (i = 0; i < d->n_vars; i++)
1409 d->proto = caseproto_set_width (d->proto,
1410 var_get_case_index (d->vars[i].var),
1411 var_get_width (d->vars[i].var));
1416 /* Returns the case index of the next value to be added to D.
1417 This value is the number of `union value's that need to be
1418 allocated to store a case for dictionary D. */
1420 dict_get_next_value_idx (const struct dictionary *d)
1422 return d->next_value_idx;
1425 /* Returns the number of bytes needed to store a case for
1428 dict_get_case_size (const struct dictionary *d)
1430 return sizeof (union value) * dict_get_next_value_idx (d);
1433 /* Reassigns values in dictionary D so that fragmentation is
1436 dict_compact_values (struct dictionary *d)
1440 d->next_value_idx = 0;
1441 for (i = 0; i < d->n_vars; i++)
1443 struct variable *v = d->vars[i].var;
1444 set_var_case_index (v, d->next_value_idx++);
1446 invalidate_proto (d);
1449 /* Returns the number of values occupied by the variables in
1450 dictionary D. All variables are considered if EXCLUDE_CLASSES
1451 is 0, or it may contain one or more of DC_ORDINARY, DC_SYSTEM,
1452 or DC_SCRATCH to exclude the corresponding type of variable.
1454 The return value may be less than the number of values in one
1455 of dictionary D's cases (as returned by
1456 dict_get_next_value_idx) even if E is 0, because there may be
1457 gaps in D's cases due to deleted variables. */
1459 dict_count_values (const struct dictionary *d, unsigned int exclude_classes)
1461 assert (!(exclude_classes & ~DC_ALL));
1464 for (size_t i = 0; i < d->n_vars; i++)
1466 enum dict_class class = var_get_dict_class (d->vars[i].var);
1467 if (!(exclude_classes & class))
1473 /* Returns the case prototype that would result after deleting
1474 all variables from D that are not in one of the
1475 EXCLUDE_CLASSES and compacting the dictionary with
1478 The caller must unref the returned caseproto when it is no
1481 dict_get_compacted_proto (const struct dictionary *d,
1482 unsigned int exclude_classes)
1484 struct caseproto *proto;
1487 assert (!(exclude_classes & ~DC_ALL));
1489 proto = caseproto_create ();
1490 for (i = 0; i < d->n_vars; i++)
1492 struct variable *v = d->vars[i].var;
1493 if (!(exclude_classes & var_get_dict_class (v)))
1494 proto = caseproto_add_width (proto, var_get_width (v));
1498 /* Returns the file label for D, or a null pointer if D is
1499 unlabeled (see cmd_file_label()). */
1501 dict_get_label (const struct dictionary *d)
1506 /* Sets D's file label to LABEL, truncating it to at most 60 bytes in D's
1509 Removes D's label if LABEL is null or the empty string. */
1511 dict_set_label (struct dictionary *d, const char *label)
1514 if (label == NULL || label[0] == '\0')
1517 d->label = utf8_encoding_trunc (label, d->encoding, 60);
1520 /* Returns the documents for D, as an UTF-8 encoded string_array. The
1521 return value is always nonnull; if there are no documents then the
1522 string_arary is empty.*/
1523 const struct string_array *
1524 dict_get_documents (const struct dictionary *d)
1526 return &d->documents;
1529 /* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */
1531 dict_set_documents (struct dictionary *d, const struct string_array *new_docs)
1533 /* Swap out the old documents, instead of destroying them immediately, to
1534 allow the new documents to include pointers into the old ones. */
1535 struct string_array old_docs = STRING_ARRAY_INITIALIZER;
1536 string_array_swap (&d->documents, &old_docs);
1538 for (size_t i = 0; i < new_docs->n; i++)
1539 dict_add_document_line (d, new_docs->strings[i], false);
1541 string_array_destroy (&old_docs);
1544 /* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it
1545 into individual lines at new-line characters. Each line is truncated to at
1546 most DOC_LINE_LENGTH bytes in D's encoding. */
1548 dict_set_documents_string (struct dictionary *d, const char *new_docs)
1552 dict_clear_documents (d);
1553 for (s = new_docs; *s != '\0';)
1555 size_t len = strcspn (s, "\n");
1556 char *line = xmemdup0 (s, len);
1557 dict_add_document_line (d, line, false);
1566 /* Drops the documents from dictionary D. */
1568 dict_clear_documents (struct dictionary *d)
1570 string_array_clear (&d->documents);
1573 /* Appends the UTF-8 encoded LINE to the documents in D. LINE will be
1574 truncated so that it is no more than 80 bytes in the dictionary's
1575 encoding. If this causes some text to be lost, and ISSUE_WARNING is true,
1576 then a warning will be issued. */
1578 dict_add_document_line (struct dictionary *d, const char *line,
1584 trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH);
1585 truncated = line[trunc_len] != '\0';
1586 if (truncated && issue_warning)
1588 /* TRANSLATORS: "bytes" is correct, not characters due to UTF encoding */
1589 msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
1592 string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len));
1597 /* Returns the number of document lines in dictionary D. */
1599 dict_get_document_n_lines (const struct dictionary *d)
1601 return d->documents.n;
1604 /* Returns document line number IDX in dictionary D. The caller must not
1605 modify or free the returned string. */
1607 dict_get_document_line (const struct dictionary *d, size_t idx)
1609 assert (idx < d->documents.n);
1610 return d->documents.strings[idx];
1613 /* Creates in D a vector named NAME that contains the N
1614 variables in VAR. Returns true if successful, or false if a
1615 vector named NAME already exists in D. */
1617 dict_create_vector (struct dictionary *d,
1619 struct variable **var, size_t n)
1622 for (size_t i = 0; i < n; i++)
1623 assert (dict_contains_var (d, var[i]));
1625 if (dict_lookup_vector (d, name) == NULL)
1627 d->vector = xnrealloc (d->vector, d->n_vectors + 1, sizeof *d->vector);
1628 d->vector[d->n_vectors++] = vector_create (name, var, n);
1635 /* Creates in D a vector named NAME that contains the N
1636 variables in VAR. A vector named NAME must not already exist
1639 dict_create_vector_assert (struct dictionary *d,
1641 struct variable **var, size_t n)
1643 assert (dict_lookup_vector (d, name) == NULL);
1644 dict_create_vector (d, name, var, n);
1647 /* Returns the vector in D with index IDX, which must be less
1648 than dict_get_n_vectors (D). */
1649 const struct vector *
1650 dict_get_vector (const struct dictionary *d, size_t idx)
1652 assert (idx < d->n_vectors);
1654 return d->vector[idx];
1657 /* Returns the number of vectors in D. */
1659 dict_get_n_vectors (const struct dictionary *d)
1661 return d->n_vectors;
1664 /* Looks up and returns the vector within D with the given
1666 const struct vector *
1667 dict_lookup_vector (const struct dictionary *d, const char *name)
1670 for (i = 0; i < d->n_vectors; i++)
1671 if (!utf8_strcasecmp (vector_get_name (d->vector[i]), name))
1672 return d->vector[i];
1676 /* Deletes all vectors from D. */
1678 dict_clear_vectors (struct dictionary *d)
1682 for (i = 0; i < d->n_vectors; i++)
1683 vector_destroy (d->vector[i]);
1690 /* Multiple response sets. */
1692 /* Returns the multiple response set in DICT with index IDX, which must be
1693 between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
1694 const struct mrset *
1695 dict_get_mrset (const struct dictionary *dict, size_t idx)
1697 assert (idx < dict->n_mrsets);
1698 return dict->mrsets[idx];
1701 /* Returns the number of multiple response sets in DICT. */
1703 dict_get_n_mrsets (const struct dictionary *dict)
1705 return dict->n_mrsets;
1708 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1709 returns its index; otherwise, returns SIZE_MAX. */
1711 dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
1715 for (i = 0; i < dict->n_mrsets; i++)
1716 if (!utf8_strcasecmp (name, dict->mrsets[i]->name))
1722 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1723 returns it; otherwise, returns NULL. */
1724 const struct mrset *
1725 dict_lookup_mrset (const struct dictionary *dict, const char *name)
1727 size_t idx = dict_lookup_mrset_idx (dict, name);
1728 return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
1731 /* Adds MRSET to DICT, replacing any existing set with the same name. Returns
1732 true if a set was replaced, false if none existed with the specified name.
1734 Ownership of MRSET is transferred to DICT. */
1736 dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
1740 assert (mrset_ok (mrset, dict));
1742 idx = dict_lookup_mrset_idx (dict, mrset->name);
1743 if (idx == SIZE_MAX)
1745 dict->mrsets = xrealloc (dict->mrsets,
1746 (dict->n_mrsets + 1) * sizeof *dict->mrsets);
1747 dict->mrsets[dict->n_mrsets++] = mrset;
1752 mrset_destroy (dict->mrsets[idx]);
1753 dict->mrsets[idx] = mrset;
1758 /* Looks for a multiple response set in DICT named NAME. If found, removes it
1759 from DICT and returns true. If none is found, returns false without
1762 Deleting one multiple response set causes the indexes of other sets within
1765 dict_delete_mrset (struct dictionary *dict, const char *name)
1767 size_t idx = dict_lookup_mrset_idx (dict, name);
1768 if (idx != SIZE_MAX)
1770 mrset_destroy (dict->mrsets[idx]);
1771 dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
1778 /* Deletes all multiple response sets from DICT. */
1780 dict_clear_mrsets (struct dictionary *dict)
1784 for (i = 0; i < dict->n_mrsets; i++)
1785 mrset_destroy (dict->mrsets[i]);
1786 free (dict->mrsets);
1787 dict->mrsets = NULL;
1791 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1793 dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
1797 assert (dict_contains_var (dict, var));
1799 for (i = 0; i < dict->n_mrsets;)
1801 struct mrset *mrset = dict->mrsets[i];
1804 for (j = 0; j < mrset->n_vars;)
1805 if (mrset->vars[j] == var)
1806 remove_element (mrset->vars, mrset->n_vars--,
1807 sizeof *mrset->vars, j);
1811 if (mrset->n_vars < 2)
1813 mrset_destroy (mrset);
1814 dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
1822 /* Returns the variable set in DICT with index IDX, which must be between 0 and
1823 the count returned by dict_get_n_varsets(), exclusive. */
1824 const struct varset *
1825 dict_get_varset (const struct dictionary *dict, size_t idx)
1827 assert (idx < dict->n_varsets);
1828 return dict->varsets[idx];
1831 /* Returns the number of variable sets in DICT. */
1833 dict_get_n_varsets (const struct dictionary *dict)
1835 return dict->n_varsets;
1838 /* Looks for a variable set named NAME in DICT. If it finds one, returns its
1839 index; otherwise, returns SIZE_MAX. */
1841 dict_lookup_varset_idx (const struct dictionary *dict, const char *name)
1843 for (size_t i = 0; i < dict->n_varsets; i++)
1844 if (!utf8_strcasecmp (name, dict->varsets[i]->name))
1850 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1851 returns it; otherwise, returns NULL. */
1852 const struct varset *
1853 dict_lookup_varset (const struct dictionary *dict, const char *name)
1855 size_t idx = dict_lookup_varset_idx (dict, name);
1856 return idx != SIZE_MAX ? dict->varsets[idx] : NULL;
1859 /* Adds VARSET to DICT, replacing any existing set with the same name. Returns
1860 true if a set was replaced, false if none existed with the specified name.
1862 Ownership of VARSET is transferred to DICT. */
1864 dict_add_varset (struct dictionary *dict, struct varset *varset)
1866 size_t idx = dict_lookup_varset_idx (dict, varset->name);
1867 if (idx == SIZE_MAX)
1869 dict->varsets = xrealloc (dict->varsets,
1870 (dict->n_varsets + 1) * sizeof *dict->varsets);
1871 dict->varsets[dict->n_varsets++] = varset;
1876 varset_destroy (dict->varsets[idx]);
1877 dict->varsets[idx] = varset;
1882 /* Deletes all variable sets from DICT. */
1884 dict_clear_varsets (struct dictionary *dict)
1886 for (size_t i = 0; i < dict->n_varsets; i++)
1887 varset_destroy (dict->varsets[i]);
1888 free (dict->varsets);
1889 dict->varsets = NULL;
1890 dict->n_varsets = 0;
1893 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1895 dict_unset_varset_var (struct dictionary *dict, struct variable *var)
1897 assert (dict_contains_var (dict, var));
1899 for (size_t i = 0; i < dict->n_varsets; i++)
1901 struct varset *varset = dict->varsets[i];
1903 for (size_t j = 0; j < varset->n_vars;)
1904 if (varset->vars[j] == var)
1905 remove_element (varset->vars, varset->n_vars--,
1906 sizeof *varset->vars, j);
1912 /* Returns D's attribute set. The caller may examine or modify
1913 the attribute set, but must not destroy it. Destroying D or
1914 calling dict_set_attributes for D will also destroy D's
1917 dict_get_attributes (const struct dictionary *d)
1919 return CONST_CAST (struct attrset *, &d->attributes);
1922 /* Replaces D's attributes set by a copy of ATTRS. */
1924 dict_set_attributes (struct dictionary *d, const struct attrset *attrs)
1926 attrset_destroy (&d->attributes);
1927 attrset_clone (&d->attributes, attrs);
1930 /* Returns true if D has at least one attribute in its attribute
1931 set, false if D's attribute set is empty. */
1933 dict_has_attributes (const struct dictionary *d)
1935 return attrset_count (&d->attributes) > 0;
1938 /* Called from variable.c to notify the dictionary that some property (indicated
1939 by WHAT) of the variable has changed. OLDVAR is a copy of V as it existed
1940 prior to the change. OLDVAR is destroyed by this function.
1943 dict_var_changed (const struct variable *v, unsigned int what, struct variable *oldvar)
1945 if (var_has_vardict (v))
1947 const struct vardict_info *vardict = var_get_vardict (v);
1948 struct dictionary *d = vardict->dict;
1953 if (what & (VAR_TRAIT_WIDTH | VAR_TRAIT_POSITION))
1954 invalidate_proto (d);
1956 if (d->changed) d->changed (d, d->changed_data);
1957 if (d->callbacks && d->callbacks->var_changed)
1958 d->callbacks->var_changed (d, var_get_dict_index (v), what, oldvar, d->cb_data);
1965 /* Dictionary used to contain "internal variables". */
1966 static struct dictionary *internal_dict;
1968 /* Create a variable of the specified WIDTH to be used for internal
1969 calculations only. The variable is assigned case index CASE_IDX. */
1971 dict_create_internal_var (int case_idx, int width)
1973 if (internal_dict == NULL)
1974 internal_dict = dict_create ("UTF-8");
1978 static int counter = INT_MAX / 2;
1979 struct variable *var;
1982 if (++counter == INT_MAX)
1983 counter = INT_MAX / 2;
1985 sprintf (name, "$internal%d", counter);
1986 var = dict_create_var (internal_dict, name, width);
1989 set_var_case_index (var, case_idx);
1995 /* Destroys VAR, which must have been created with
1996 dict_create_internal_var(). */
1998 dict_destroy_internal_var (struct variable *var)
2002 dict_delete_var (internal_dict, var);
2004 /* Destroy internal_dict if it has no variables left, just so that
2005 valgrind --leak-check --show-reachable won't show internal_dict. */
2006 if (dict_get_n_vars (internal_dict) == 0)
2008 dict_unref (internal_dict);
2009 internal_dict = NULL;
2015 vardict_get_dict_index (const struct vardict_info *vardict)
2017 return vardict - vardict->dict->vars;