1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/dictionary.h"
26 #include "data/attributes.h"
27 #include "data/case.h"
28 #include "data/identifier.h"
29 #include "data/mrset.h"
30 #include "data/settings.h"
31 #include "data/value-labels.h"
32 #include "data/vardict.h"
33 #include "data/variable.h"
34 #include "data/vector.h"
35 #include "libpspp/array.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/compiler.h"
38 #include "libpspp/hash-functions.h"
39 #include "libpspp/hmap.h"
40 #include "libpspp/i18n.h"
41 #include "libpspp/message.h"
42 #include "libpspp/misc.h"
43 #include "libpspp/pool.h"
44 #include "libpspp/str.h"
45 #include "libpspp/string-array.h"
47 #include "gl/intprops.h"
48 #include "gl/minmax.h"
49 #include "gl/xalloc.h"
50 #include "gl/xmemdup0.h"
53 #define _(msgid) gettext (msgid)
58 struct vardict_info *var; /* Variables. */
59 size_t var_cnt, var_cap; /* Number of variables, capacity. */
60 struct caseproto *proto; /* Prototype for dictionary cases
62 struct hmap name_map; /* Variable index by name. */
63 int next_value_idx; /* Index of next `union value' to allocate. */
64 const struct variable **split; /* SPLIT FILE vars. */
65 size_t split_cnt; /* SPLIT FILE count. */
66 struct variable *weight; /* WEIGHT variable. */
67 struct variable *filter; /* FILTER variable. */
68 casenumber case_limit; /* Current case limit (N command). */
69 char *label; /* File label. */
70 struct string_array documents; /* Documents. */
71 struct vector **vector; /* Vectors of variables. */
72 size_t vector_cnt; /* Number of vectors. */
73 struct attrset attributes; /* Custom attributes. */
74 struct mrset **mrsets; /* Multiple response sets. */
75 size_t n_mrsets; /* Number of multiple response sets. */
77 char *encoding; /* Character encoding of string data */
79 const struct dict_callbacks *callbacks; /* Callbacks on dictionary
81 void *cb_data ; /* Data passed to callbacks */
83 void (*changed) (struct dictionary *, void *); /* Generic change callback */
87 static void dict_unset_split_var (struct dictionary *, struct variable *);
88 static void dict_unset_mrset_var (struct dictionary *, struct variable *);
91 dict_set_encoding (struct dictionary *d, const char *enc)
96 d->encoding = xstrdup (enc);
101 dict_get_encoding (const struct dictionary *d)
106 /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
107 encoding, false otherwise. If ISSUE_ERROR is true, issues an explanatory
108 error message on failure. */
110 dict_id_is_valid (const struct dictionary *dict, const char *id,
113 return id_is_valid (id, dict->encoding, issue_error);
117 dict_set_change_callback (struct dictionary *d,
118 void (*changed) (struct dictionary *, void*),
121 d->changed = changed;
122 d->changed_data = data;
125 /* Discards dictionary D's caseproto. (It will be regenerated
126 lazily, on demand.) */
128 invalidate_proto (struct dictionary *d)
130 caseproto_unref (d->proto);
134 /* Print a representation of dictionary D to stdout, for
135 debugging purposes. */
137 dict_dump (const struct dictionary *d)
140 for (i = 0 ; i < d->var_cnt ; ++i )
142 const struct variable *v = d->var[i].var;
143 printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
145 var_get_dict_index (v),
146 var_get_case_index (v));
151 /* Associate CALLBACKS with DICT. Callbacks will be invoked whenever
152 the dictionary or any of the variables it contains are modified.
153 Each callback will get passed CALLBACK_DATA.
154 Any callback may be NULL, in which case it'll be ignored.
157 dict_set_callbacks (struct dictionary *dict,
158 const struct dict_callbacks *callbacks,
161 dict->callbacks = callbacks;
162 dict->cb_data = callback_data;
165 /* Shallow copy the callbacks from SRC to DEST */
167 dict_copy_callbacks (struct dictionary *dest,
168 const struct dictionary *src)
170 dest->callbacks = src->callbacks;
171 dest->cb_data = src->cb_data;
174 /* Creates and returns a new dictionary. */
178 struct dictionary *d = xzalloc (sizeof *d);
180 hmap_init (&d->name_map);
181 attrset_init (&d->attributes);
185 /* Creates and returns a (deep) copy of an existing
188 The new dictionary's case indexes are copied from the old
189 dictionary. If the new dictionary won't be used to access
190 cases produced with the old dictionary, then the new
191 dictionary's case indexes should be compacted with
192 dict_compact_values to save space. */
194 dict_clone (const struct dictionary *s)
196 struct dictionary *d;
201 /* Set the new dictionary's encoding early so that string length limitations
202 are interpreted correctly. */
204 d->encoding = xstrdup (s->encoding);
206 for (i = 0; i < s->var_cnt; i++)
208 struct variable *sv = s->var[i].var;
209 struct variable *dv = dict_clone_var_assert (d, sv);
212 for (i = 0; i < var_get_short_name_cnt (sv); i++)
213 var_set_short_name (dv, i, var_get_short_name (sv, i));
215 var_get_vardict (dv)->case_index = var_get_vardict (sv)->case_index;
218 d->next_value_idx = s->next_value_idx;
220 d->split_cnt = s->split_cnt;
221 if (d->split_cnt > 0)
223 d->split = xnmalloc (d->split_cnt, sizeof *d->split);
224 for (i = 0; i < d->split_cnt; i++)
225 d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
228 if (s->weight != NULL)
229 dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
231 if (s->filter != NULL)
232 dict_set_filter (d, dict_lookup_var_assert (d, var_get_name (s->filter)));
234 d->case_limit = s->case_limit;
235 dict_set_label (d, dict_get_label (s));
236 dict_set_documents (d, dict_get_documents (s));
238 d->vector_cnt = s->vector_cnt;
239 d->vector = xnmalloc (d->vector_cnt, sizeof *d->vector);
240 for (i = 0; i < s->vector_cnt; i++)
241 d->vector[i] = vector_clone (s->vector[i], s, d);
243 dict_set_attributes (d, dict_get_attributes (s));
245 for (i = 0; i < s->n_mrsets; i++)
247 const struct mrset *old = s->mrsets[i];
251 /* Clone old mrset, then replace vars from D by vars from S. */
252 new = mrset_clone (old);
253 for (j = 0; j < new->n_vars; j++)
254 new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
256 dict_add_mrset (d, new);
262 /* Clears the contents from a dictionary without destroying the
263 dictionary itself. */
265 dict_clear (struct dictionary *d)
267 /* FIXME? Should we really clear case_limit, label, documents?
268 Others are necessarily cleared by deleting all the variables.*/
269 while (d->var_cnt > 0 )
271 dict_delete_var (d, d->var[d->var_cnt - 1].var);
276 d->var_cnt = d->var_cap = 0;
277 invalidate_proto (d);
278 hmap_clear (&d->name_map);
279 d->next_value_idx = 0;
280 dict_set_split_vars (d, NULL, 0);
281 dict_set_weight (d, NULL);
282 dict_set_filter (d, NULL);
286 string_array_clear (&d->documents);
287 dict_clear_vectors (d);
288 attrset_clear (&d->attributes);
291 /* Destroys the aux data for every variable in D, by calling
292 var_clear_aux() for each variable. */
294 dict_clear_aux (struct dictionary *d)
298 for (i = 0; i < d->var_cnt; i++)
299 var_clear_aux (d->var[i].var);
302 /* Clears a dictionary and destroys it. */
304 dict_destroy (struct dictionary *d)
308 /* In general, we don't want callbacks occuring, if the dictionary
309 is being destroyed */
310 d->callbacks = NULL ;
313 hmap_destroy (&d->name_map);
314 attrset_destroy (&d->attributes);
315 dict_clear_mrsets (d);
321 /* Returns the number of variables in D. */
323 dict_get_var_cnt (const struct dictionary *d)
328 /* Returns the variable in D with dictionary index IDX, which
329 must be between 0 and the count returned by
330 dict_get_var_cnt(), exclusive. */
332 dict_get_var (const struct dictionary *d, size_t idx)
334 assert (idx < d->var_cnt);
336 return d->var[idx].var;
339 /* Sets *VARS to an array of pointers to variables in D and *CNT
340 to the number of variables in *D. All variables are returned
341 except for those, if any, in the classes indicated by EXCLUDE.
342 (There is no point in putting DC_SYSTEM in EXCLUDE as
343 dictionaries never include system variables.) */
345 dict_get_vars (const struct dictionary *d, const struct variable ***vars,
346 size_t *cnt, enum dict_class exclude)
348 dict_get_vars_mutable (d, (struct variable ***) vars, cnt, exclude);
351 /* Sets *VARS to an array of pointers to variables in D and *CNT
352 to the number of variables in *D. All variables are returned
353 except for those, if any, in the classes indicated by EXCLUDE.
354 (There is no point in putting DC_SYSTEM in EXCLUDE as
355 dictionaries never include system variables.) */
357 dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars,
358 size_t *cnt, enum dict_class exclude)
363 assert (exclude == (exclude & DC_ALL));
366 for (i = 0; i < d->var_cnt; i++)
368 enum dict_class class = var_get_dict_class (d->var[i].var);
369 if (!(class & exclude))
373 *vars = xnmalloc (count, sizeof **vars);
375 for (i = 0; i < d->var_cnt; i++)
377 enum dict_class class = var_get_dict_class (d->var[i].var);
378 if (!(class & exclude))
379 (*vars)[(*cnt)++] = d->var[i].var;
381 assert (*cnt == count);
384 static struct variable *
385 add_var (struct dictionary *d, struct variable *v)
387 struct vardict_info *vardict;
389 /* Update dictionary. */
390 if (d->var_cnt >= d->var_cap)
394 d->var = x2nrealloc (d->var, &d->var_cap, sizeof *d->var);
395 hmap_clear (&d->name_map);
396 for (i = 0; i < d->var_cnt; i++)
398 var_set_vardict (d->var[i].var, &d->var[i]);
399 hmap_insert_fast (&d->name_map, &d->var[i].name_node,
400 d->var[i].name_node.hash);
404 vardict = &d->var[d->var_cnt++];
407 hmap_insert (&d->name_map, &vardict->name_node,
408 hash_case_string (var_get_name (v), 0));
409 vardict->case_index = d->next_value_idx;
410 var_set_vardict (v, vardict);
412 if ( d->changed ) d->changed (d, d->changed_data);
413 if ( d->callbacks && d->callbacks->var_added )
414 d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
417 invalidate_proto (d);
422 /* Creates and returns a new variable in D with the given NAME
423 and WIDTH. Returns a null pointer if the given NAME would
424 duplicate that of an existing variable in the dictionary. */
426 dict_create_var (struct dictionary *d, const char *name, int width)
428 return (dict_lookup_var (d, name) == NULL
429 ? dict_create_var_assert (d, name, width)
433 /* Creates and returns a new variable in D with the given NAME
434 and WIDTH. Assert-fails if the given NAME would duplicate
435 that of an existing variable in the dictionary. */
437 dict_create_var_assert (struct dictionary *d, const char *name, int width)
439 assert (dict_lookup_var (d, name) == NULL);
440 return add_var (d, var_create (name, width));
443 /* Creates and returns a new variable in D, as a copy of existing variable
444 OLD_VAR, which need not be in D or in any dictionary. Returns a null
445 pointer if OLD_VAR's name would duplicate that of an existing variable in
448 dict_clone_var (struct dictionary *d, const struct variable *old_var)
450 return dict_clone_var_as (d, old_var, var_get_name (old_var));
453 /* Creates and returns a new variable in D, as a copy of existing variable
454 OLD_VAR, which need not be in D or in any dictionary. Assert-fails if
455 OLD_VAR's name would duplicate that of an existing variable in the
458 dict_clone_var_assert (struct dictionary *d, const struct variable *old_var)
460 return dict_clone_var_as_assert (d, old_var, var_get_name (old_var));
463 /* Creates and returns a new variable in D with name NAME, as a copy of
464 existing variable OLD_VAR, which need not be in D or in any dictionary.
465 Returns a null pointer if the given NAME would duplicate that of an existing
466 variable in the dictionary. */
468 dict_clone_var_as (struct dictionary *d, const struct variable *old_var,
471 return (dict_lookup_var (d, name) == NULL
472 ? dict_clone_var_as_assert (d, old_var, name)
476 /* Creates and returns a new variable in D with name NAME, as a copy of
477 existing variable OLD_VAR, which need not be in D or in any dictionary.
478 Assert-fails if the given NAME would duplicate that of an existing variable
479 in the dictionary. */
481 dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
484 struct variable *new_var = var_clone (old_var);
485 assert (dict_lookup_var (d, name) == NULL);
486 var_set_name (new_var, name);
487 return add_var (d, new_var);
490 /* Returns the variable named NAME in D, or a null pointer if no
491 variable has that name. */
493 dict_lookup_var (const struct dictionary *d, const char *name)
495 struct vardict_info *vardict;
497 HMAP_FOR_EACH_WITH_HASH (vardict, struct vardict_info, name_node,
498 hash_case_string (name, 0), &d->name_map)
500 struct variable *var = vardict->var;
501 if (!strcasecmp (var_get_name (var), name))
508 /* Returns the variable named NAME in D. Assert-fails if no
509 variable has that name. */
511 dict_lookup_var_assert (const struct dictionary *d, const char *name)
513 struct variable *v = dict_lookup_var (d, name);
518 /* Returns true if variable V is in dictionary D,
521 dict_contains_var (const struct dictionary *d, const struct variable *v)
523 return (var_has_vardict (v)
524 && vardict_get_dictionary (var_get_vardict (v)) == d);
527 /* Compares two double pointers to variables, which should point
528 to elements of a struct dictionary's `var' member array. */
530 compare_var_ptrs (const void *a_, const void *b_, const void *aux UNUSED)
532 struct variable *const *a = a_;
533 struct variable *const *b = b_;
535 return *a < *b ? -1 : *a > *b;
539 unindex_var (struct dictionary *d, struct vardict_info *vardict)
541 hmap_delete (&d->name_map, &vardict->name_node);
544 /* This function assumes that vardict->name_node.hash is valid, that is, that
545 its name has not changed since it was hashed (rename_var() updates this
546 hash along with the name itself). */
548 reindex_var (struct dictionary *d, struct vardict_info *vardict)
550 struct variable *var = vardict->var;
552 var_set_vardict (var, vardict);
553 hmap_insert_fast (&d->name_map, &vardict->name_node,
554 vardict->name_node.hash);
556 if ( d->changed ) d->changed (d, d->changed_data);
557 if ( d->callbacks && d->callbacks->var_changed )
558 d->callbacks->var_changed (d, var_get_dict_index (var), d->cb_data);
561 /* Sets the case_index in V's vardict to CASE_INDEX. */
563 set_var_case_index (struct variable *v, int case_index)
565 var_get_vardict (v)->case_index = case_index;
568 /* Removes the dictionary variables with indexes from FROM to TO (exclusive)
571 unindex_vars (struct dictionary *d, size_t from, size_t to)
575 for (i = from; i < to; i++)
576 unindex_var (d, &d->var[i]);
579 /* Re-sets the dict_index in the dictionary variables with
580 indexes from FROM to TO (exclusive). */
582 reindex_vars (struct dictionary *d, size_t from, size_t to)
586 for (i = from; i < to; i++)
587 reindex_var (d, &d->var[i]);
590 /* Deletes variable V from dictionary D and frees V.
592 This is a very bad idea if there might be any pointers to V
593 from outside D. In general, no variable in the active dataset's
594 dictionary should be deleted when any transformations are
595 active on the dictionary's dataset, because those
596 transformations might reference the deleted variable. The
597 safest time to delete a variable is just after a procedure has
598 been executed, as done by DELETE VARIABLES.
600 Pointers to V within D are not a problem, because
601 dict_delete_var() knows to remove V from split variables,
602 weights, filters, etc. */
604 dict_delete_var (struct dictionary *d, struct variable *v)
606 int dict_index = var_get_dict_index (v);
607 const int case_index = var_get_case_index (v);
608 const int width = var_get_width (v);
610 assert (dict_contains_var (d, v));
612 /* Delete aux data. */
615 dict_unset_split_var (d, v);
616 dict_unset_mrset_var (d, v);
619 dict_set_weight (d, NULL);
622 dict_set_filter (d, NULL);
624 dict_clear_vectors (d);
626 /* Remove V from var array. */
627 unindex_vars (d, dict_index, d->var_cnt);
628 remove_element (d->var, d->var_cnt, sizeof *d->var, dict_index);
631 /* Update dict_index for each affected variable. */
632 reindex_vars (d, dict_index, d->var_cnt);
635 var_clear_vardict (v);
638 if ( d->changed ) d->changed (d, d->changed_data);
640 invalidate_proto (d);
641 if (d->callbacks && d->callbacks->var_deleted )
642 d->callbacks->var_deleted (d, dict_index, case_index, width, d->cb_data);
645 /* Deletes the COUNT variables listed in VARS from D. This is
646 unsafe; see the comment on dict_delete_var() for details. */
648 dict_delete_vars (struct dictionary *d,
649 struct variable *const *vars, size_t count)
651 /* FIXME: this can be done in O(count) time, but this algorithm
653 assert (count == 0 || vars != NULL);
656 dict_delete_var (d, *vars++);
659 /* Deletes the COUNT variables in D starting at index IDX. This
660 is unsafe; see the comment on dict_delete_var() for
663 dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
665 /* FIXME: this can be done in O(count) time, but this algorithm
667 assert (idx + count <= d->var_cnt);
670 dict_delete_var (d, d->var[idx].var);
673 /* Deletes scratch variables from dictionary D. */
675 dict_delete_scratch_vars (struct dictionary *d)
679 /* FIXME: this can be done in O(count) time, but this algorithm
681 for (i = 0; i < d->var_cnt; )
682 if (var_get_dict_class (d->var[i].var) == DC_SCRATCH)
683 dict_delete_var (d, d->var[i].var);
688 /* Moves V to 0-based position IDX in D. Other variables in D,
689 if any, retain their relative positions. Runs in time linear
690 in the distance moved. */
692 dict_reorder_var (struct dictionary *d, struct variable *v, size_t new_index)
694 size_t old_index = var_get_dict_index (v);
696 assert (new_index < d->var_cnt);
698 unindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
699 move_element (d->var, d->var_cnt, sizeof *d->var, old_index, new_index);
700 reindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
703 /* Reorders the variables in D, placing the COUNT variables
704 listed in ORDER in that order at the beginning of D. The
705 other variables in D, if any, retain their relative
708 dict_reorder_vars (struct dictionary *d,
709 struct variable *const *order, size_t count)
711 struct vardict_info *new_var;
714 assert (count == 0 || order != NULL);
715 assert (count <= d->var_cnt);
717 new_var = xnmalloc (d->var_cap, sizeof *new_var);
719 /* Add variables in ORDER to new_var. */
720 for (i = 0; i < count; i++)
722 struct vardict_info *old_var;
724 assert (dict_contains_var (d, order[i]));
726 old_var = var_get_vardict (order[i]);
727 new_var[i] = *old_var;
728 old_var->dict = NULL;
731 /* Add remaining variables to new_var. */
732 for (i = 0; i < d->var_cnt; i++)
733 if (d->var[i].dict != NULL)
734 new_var[count++] = d->var[i];
735 assert (count == d->var_cnt);
737 /* Replace old vardicts by new ones. */
741 hmap_clear (&d->name_map);
742 reindex_vars (d, 0, d->var_cnt);
745 /* Changes the name of variable V that is currently in a dictionary to
748 rename_var (struct variable *v, const char *new_name)
750 struct vardict_info *vardict = var_get_vardict (v);
751 var_clear_vardict (v);
752 var_set_name (v, new_name);
753 vardict->name_node.hash = hash_case_string (new_name, 0);
754 var_set_vardict (v, vardict);
757 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
758 a variable named NEW_NAME is already in D, except that
759 NEW_NAME may be the same as V's existing name. */
761 dict_rename_var (struct dictionary *d, struct variable *v,
762 const char *new_name)
764 assert (!strcasecmp (var_get_name (v), new_name)
765 || dict_lookup_var (d, new_name) == NULL);
767 unindex_var (d, var_get_vardict (v));
768 rename_var (v, new_name);
769 reindex_var (d, var_get_vardict (v));
771 if (settings_get_algorithm () == ENHANCED)
772 var_clear_short_names (v);
774 if ( d->changed ) d->changed (d, d->changed_data);
775 if ( d->callbacks && d->callbacks->var_changed )
776 d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data);
779 /* Renames COUNT variables specified in VARS to the names given
780 in NEW_NAMES within dictionary D. If the renaming would
781 result in a duplicate variable name, returns false and stores a
782 name that would be duplicated into *ERR_NAME (if ERR_NAME is
783 non-null). Otherwise, the renaming is successful, and true
786 dict_rename_vars (struct dictionary *d,
787 struct variable **vars, char **new_names, size_t count,
794 assert (count == 0 || vars != NULL);
795 assert (count == 0 || new_names != NULL);
797 /* Save the names of the variables to be renamed. */
798 pool = pool_create ();
799 old_names = pool_nalloc (pool, count, sizeof *old_names);
800 for (i = 0; i < count; i++)
801 old_names[i] = pool_strdup (pool, var_get_name (vars[i]));
803 /* Remove the variables to be renamed from the name hash,
805 for (i = 0; i < count; i++)
807 unindex_var (d, var_get_vardict (vars[i]));
808 rename_var (vars[i], new_names[i]);
811 /* Add the renamed variables back into the name hash,
812 checking for conflicts. */
813 for (i = 0; i < count; i++)
815 if (dict_lookup_var (d, var_get_name (vars[i])) != NULL)
817 /* There is a name conflict.
818 Back out all the name changes that have already
819 taken place, and indicate failure. */
821 if (err_name != NULL)
822 *err_name = new_names[i];
824 for (i = 0; i < fail_idx; i++)
825 unindex_var (d, var_get_vardict (vars[i]));
827 for (i = 0; i < count; i++)
829 rename_var (vars[i], old_names[i]);
830 reindex_var (d, var_get_vardict (vars[i]));
836 reindex_var (d, var_get_vardict (vars[i]));
839 /* Clear short names. */
840 if (settings_get_algorithm () == ENHANCED)
841 for (i = 0; i < count; i++)
842 var_clear_short_names (vars[i]);
848 /* Returns true if a variable named NAME may be inserted in DICT;
849 that is, if there is not already a variable with that name in
850 DICT and if NAME is not a reserved word. (The caller's checks
851 have already verified that NAME is otherwise acceptable as a
854 var_name_is_insertable (const struct dictionary *dict, const char *name)
856 return (dict_lookup_var (dict, name) == NULL
857 && lex_id_to_token (ss_cstr (name)) == T_ID);
861 make_hinted_name (const struct dictionary *dict, const char *hint)
863 size_t hint_len = strlen (hint);
864 bool dropped = false;
869 /* The allocation size here is OK: characters that are copied directly fit
870 OK, and characters that are not copied directly are replaced by a single
871 '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get
872 replaced by '_' too. */
873 root = rp = xmalloc (hint_len + 1);
874 for (ofs = 0; ofs < hint_len; ofs += mblen)
878 mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
881 ? lex_uc_is_id1 (uc) && uc != '$'
882 : lex_uc_is_idn (uc))
889 rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6);
900 if (var_name_is_insertable (dict, root))
903 for (i = 0; i < ULONG_MAX; i++)
905 char suffix[INT_BUFSIZE_BOUND (i) + 1];
909 if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1))
912 name = utf8_encoding_concat (root, suffix, dict->encoding, 64);
913 if (var_name_is_insertable (dict, name))
928 make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
930 unsigned long int number;
932 for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
936 char name[3 + INT_STRLEN_BOUND (number) + 1];
938 sprintf (name, "VAR%03lu", number);
939 if (dict_lookup_var (dict, name) == NULL)
941 if (num_start != NULL)
942 *num_start = number + 1;
943 return xstrdup (name);
951 /* Devises and returns a variable name unique within DICT. The variable name
952 is owned by the caller, which must free it with free() when it is no longer
955 HINT, if it is non-null, is used as a suggestion that will be
956 modified for suitability as a variable name and for
959 If HINT is null or entirely unsuitable, a name in the form
960 "VAR%03d" will be generated, where the smallest unused integer
961 value is used. If NUM_START is non-null, then its value is
962 used as the minimum numeric value to check, and it is updated
963 to the next value to be checked.
966 dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
967 unsigned long int *num_start)
971 char *hinted_name = make_hinted_name (dict, hint);
972 if (hinted_name != NULL)
975 return make_numeric_name (dict, num_start);
978 /* Returns the weighting variable in dictionary D, or a null
979 pointer if the dictionary is unweighted. */
981 dict_get_weight (const struct dictionary *d)
983 assert (d->weight == NULL || dict_contains_var (d, d->weight));
988 /* Returns the value of D's weighting variable in case C, except
989 that a negative weight is returned as 0. Returns 1 if the
990 dictionary is unweighted. Will warn about missing, negative,
991 or zero values if *WARN_ON_INVALID is true. The function will
992 set *WARN_ON_INVALID to false if an invalid weight is
995 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
996 bool *warn_on_invalid)
1000 if (d->weight == NULL)
1004 double w = case_num (c, d->weight);
1005 if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY))
1007 if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) {
1008 *warn_on_invalid = false;
1009 msg (SW, _("At least one case in the data file had a weight value "
1010 "that was user-missing, system-missing, zero, or "
1011 "negative. These case(s) were ignored."));
1017 /* Sets the weighting variable of D to V, or turning off
1018 weighting if V is a null pointer. */
1020 dict_set_weight (struct dictionary *d, struct variable *v)
1022 assert (v == NULL || dict_contains_var (d, v));
1023 assert (v == NULL || var_is_numeric (v));
1027 if (d->changed) d->changed (d, d->changed_data);
1028 if ( d->callbacks && d->callbacks->weight_changed )
1029 d->callbacks->weight_changed (d,
1030 v ? var_get_dict_index (v) : -1,
1034 /* Returns the filter variable in dictionary D (see cmd_filter())
1035 or a null pointer if the dictionary is unfiltered. */
1037 dict_get_filter (const struct dictionary *d)
1039 assert (d->filter == NULL || dict_contains_var (d, d->filter));
1044 /* Sets V as the filter variable for dictionary D. Passing a
1045 null pointer for V turn off filtering. */
1047 dict_set_filter (struct dictionary *d, struct variable *v)
1049 assert (v == NULL || dict_contains_var (d, v));
1050 assert (v == NULL || var_is_numeric (v));
1054 if (d->changed) d->changed (d, d->changed_data);
1055 if ( d->callbacks && d->callbacks->filter_changed )
1056 d->callbacks->filter_changed (d,
1057 v ? var_get_dict_index (v) : -1,
1061 /* Returns the case limit for dictionary D, or zero if the number
1062 of cases is unlimited. */
1064 dict_get_case_limit (const struct dictionary *d)
1066 return d->case_limit;
1069 /* Sets CASE_LIMIT as the case limit for dictionary D. Use
1070 0 for CASE_LIMIT to indicate no limit. */
1072 dict_set_case_limit (struct dictionary *d, casenumber case_limit)
1074 d->case_limit = case_limit;
1077 /* Returns the prototype used for cases created by dictionary D. */
1078 const struct caseproto *
1079 dict_get_proto (const struct dictionary *d_)
1081 struct dictionary *d = CONST_CAST (struct dictionary *, d_);
1082 if (d->proto == NULL)
1086 d->proto = caseproto_create ();
1087 d->proto = caseproto_reserve (d->proto, d->var_cnt);
1088 for (i = 0; i < d->var_cnt; i++)
1089 d->proto = caseproto_set_width (d->proto,
1090 var_get_case_index (d->var[i].var),
1091 var_get_width (d->var[i].var));
1096 /* Returns the case index of the next value to be added to D.
1097 This value is the number of `union value's that need to be
1098 allocated to store a case for dictionary D. */
1100 dict_get_next_value_idx (const struct dictionary *d)
1102 return d->next_value_idx;
1105 /* Returns the number of bytes needed to store a case for
1108 dict_get_case_size (const struct dictionary *d)
1110 return sizeof (union value) * dict_get_next_value_idx (d);
1113 /* Reassigns values in dictionary D so that fragmentation is
1116 dict_compact_values (struct dictionary *d)
1120 d->next_value_idx = 0;
1121 for (i = 0; i < d->var_cnt; i++)
1123 struct variable *v = d->var[i].var;
1124 set_var_case_index (v, d->next_value_idx++);
1126 invalidate_proto (d);
1129 /* Returns the number of values occupied by the variables in
1130 dictionary D. All variables are considered if EXCLUDE_CLASSES
1131 is 0, or it may contain one or more of (1u << DC_ORDINARY),
1132 (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the
1133 corresponding type of variable.
1135 The return value may be less than the number of values in one
1136 of dictionary D's cases (as returned by
1137 dict_get_next_value_idx) even if E is 0, because there may be
1138 gaps in D's cases due to deleted variables. */
1140 dict_count_values (const struct dictionary *d, unsigned int exclude_classes)
1145 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1147 | (1u << DC_SCRATCH))) == 0);
1150 for (i = 0; i < d->var_cnt; i++)
1152 enum dict_class class = var_get_dict_class (d->var[i].var);
1153 if (!(exclude_classes & (1u << class)))
1159 /* Returns the case prototype that would result after deleting
1160 all variables from D that are not in one of the
1161 EXCLUDE_CLASSES and compacting the dictionary with
1164 The caller must unref the returned caseproto when it is no
1167 dict_get_compacted_proto (const struct dictionary *d,
1168 unsigned int exclude_classes)
1170 struct caseproto *proto;
1173 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1175 | (1u << DC_SCRATCH))) == 0);
1177 proto = caseproto_create ();
1178 for (i = 0; i < d->var_cnt; i++)
1180 struct variable *v = d->var[i].var;
1181 if (!(exclude_classes & (1u << var_get_dict_class (v))))
1182 proto = caseproto_add_width (proto, var_get_width (v));
1187 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
1188 dict_get_split_cnt() to determine how many SPLIT FILE vars
1189 there are. Returns a null pointer if and only if there are no
1191 const struct variable *const *
1192 dict_get_split_vars (const struct dictionary *d)
1197 /* Returns the number of SPLIT FILE vars. */
1199 dict_get_split_cnt (const struct dictionary *d)
1201 return d->split_cnt;
1204 /* Removes variable V, which must be in D, from D's set of split
1207 dict_unset_split_var (struct dictionary *d, struct variable *v)
1211 assert (dict_contains_var (d, v));
1213 orig_count = d->split_cnt;
1214 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
1215 &v, compare_var_ptrs, NULL);
1216 if (orig_count != d->split_cnt)
1218 if (d->changed) d->changed (d, d->changed_data);
1219 /* We changed the set of split variables so invoke the
1221 if (d->callbacks && d->callbacks->split_changed)
1222 d->callbacks->split_changed (d, d->cb_data);
1226 /* Sets CNT split vars SPLIT in dictionary D. */
1228 dict_set_split_vars (struct dictionary *d,
1229 struct variable *const *split, size_t cnt)
1231 assert (cnt == 0 || split != NULL);
1236 d->split = xnrealloc (d->split, cnt, sizeof *d->split) ;
1237 memcpy (d->split, split, cnt * sizeof *d->split);
1245 if (d->changed) d->changed (d, d->changed_data);
1246 if ( d->callbacks && d->callbacks->split_changed )
1247 d->callbacks->split_changed (d, d->cb_data);
1250 /* Returns the file label for D, or a null pointer if D is
1251 unlabeled (see cmd_file_label()). */
1253 dict_get_label (const struct dictionary *d)
1258 /* Sets D's file label to LABEL, truncating it to a maximum of 60
1261 Removes D's label if LABEL is null or the empty string. */
1263 dict_set_label (struct dictionary *d, const char *label)
1266 d->label = label != NULL && label[0] != '\0' ? xstrndup (label, 60) : NULL;
1269 /* Returns the documents for D, as an UTF-8 encoded string_array. The
1270 return value is always nonnull; if there are no documents then the
1271 string_arary is empty.*/
1272 const struct string_array *
1273 dict_get_documents (const struct dictionary *d)
1275 return &d->documents;
1278 /* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */
1280 dict_set_documents (struct dictionary *d, const struct string_array *new_docs)
1284 dict_clear_documents (d);
1286 for (i = 0; i < new_docs->n; i++)
1287 dict_add_document_line (d, new_docs->strings[i], false);
1290 /* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it
1291 into individual lines at new-line characters. Each line is truncated to at
1292 most DOC_LINE_LENGTH bytes in D's encoding. */
1294 dict_set_documents_string (struct dictionary *d, const char *new_docs)
1298 dict_clear_documents (d);
1299 for (s = new_docs; *s != '\0'; )
1301 size_t len = strcspn (s, "\n");
1302 char *line = xmemdup0 (s, len);
1303 dict_add_document_line (d, line, false);
1312 /* Drops the documents from dictionary D. */
1314 dict_clear_documents (struct dictionary *d)
1316 string_array_clear (&d->documents);
1319 /* Appends the UTF-8 encoded LINE to the documents in D. LINE will be
1320 truncated so that it is no more than 80 bytes in the dictionary's
1321 encoding. If this causes some text to be lost, and ISSUE_WARNING is true,
1322 then a warning will be issued. */
1324 dict_add_document_line (struct dictionary *d, const char *line,
1330 trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH);
1331 truncated = line[trunc_len] != '\0';
1332 if (truncated && issue_warning)
1334 /* Note to translators: "bytes" is correct, not characters */
1335 msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
1338 string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len));
1343 /* Returns the number of document lines in dictionary D. */
1345 dict_get_document_line_cnt (const struct dictionary *d)
1347 return d->documents.n;
1350 /* Returns document line number IDX in dictionary D. The caller must not
1351 modify or free the returned string. */
1353 dict_get_document_line (const struct dictionary *d, size_t idx)
1355 assert (idx < d->documents.n);
1356 return d->documents.strings[idx];
1359 /* Creates in D a vector named NAME that contains the CNT
1360 variables in VAR. Returns true if successful, or false if a
1361 vector named NAME already exists in D. */
1363 dict_create_vector (struct dictionary *d,
1365 struct variable **var, size_t cnt)
1370 for (i = 0; i < cnt; i++)
1371 assert (dict_contains_var (d, var[i]));
1373 if (dict_lookup_vector (d, name) == NULL)
1375 d->vector = xnrealloc (d->vector, d->vector_cnt + 1, sizeof *d->vector);
1376 d->vector[d->vector_cnt++] = vector_create (name, var, cnt);
1383 /* Creates in D a vector named NAME that contains the CNT
1384 variables in VAR. A vector named NAME must not already exist
1387 dict_create_vector_assert (struct dictionary *d,
1389 struct variable **var, size_t cnt)
1391 assert (dict_lookup_vector (d, name) == NULL);
1392 dict_create_vector (d, name, var, cnt);
1395 /* Returns the vector in D with index IDX, which must be less
1396 than dict_get_vector_cnt (D). */
1397 const struct vector *
1398 dict_get_vector (const struct dictionary *d, size_t idx)
1400 assert (idx < d->vector_cnt);
1402 return d->vector[idx];
1405 /* Returns the number of vectors in D. */
1407 dict_get_vector_cnt (const struct dictionary *d)
1409 return d->vector_cnt;
1412 /* Looks up and returns the vector within D with the given
1414 const struct vector *
1415 dict_lookup_vector (const struct dictionary *d, const char *name)
1418 for (i = 0; i < d->vector_cnt; i++)
1419 if (!strcasecmp (vector_get_name (d->vector[i]), name))
1420 return d->vector[i];
1424 /* Deletes all vectors from D. */
1426 dict_clear_vectors (struct dictionary *d)
1430 for (i = 0; i < d->vector_cnt; i++)
1431 vector_destroy (d->vector[i]);
1438 /* Multiple response sets. */
1440 /* Returns the multiple response set in DICT with index IDX, which must be
1441 between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
1442 const struct mrset *
1443 dict_get_mrset (const struct dictionary *dict, size_t idx)
1445 assert (idx < dict->n_mrsets);
1446 return dict->mrsets[idx];
1449 /* Returns the number of multiple response sets in DICT. */
1451 dict_get_n_mrsets (const struct dictionary *dict)
1453 return dict->n_mrsets;
1456 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1457 returns its index; otherwise, returns SIZE_MAX. */
1459 dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
1463 for (i = 0; i < dict->n_mrsets; i++)
1464 if (!strcasecmp (name, dict->mrsets[i]->name))
1470 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1471 returns it; otherwise, returns NULL. */
1472 const struct mrset *
1473 dict_lookup_mrset (const struct dictionary *dict, const char *name)
1475 size_t idx = dict_lookup_mrset_idx (dict, name);
1476 return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
1479 /* Adds MRSET to DICT, replacing any existing set with the same name. Returns
1480 true if a set was replaced, false if none existed with the specified name.
1482 Ownership of MRSET is transferred to DICT. */
1484 dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
1488 assert (mrset_ok (mrset, dict));
1490 idx = dict_lookup_mrset_idx (dict, mrset->name);
1491 if (idx == SIZE_MAX)
1493 dict->mrsets = xrealloc (dict->mrsets,
1494 (dict->n_mrsets + 1) * sizeof *dict->mrsets);
1495 dict->mrsets[dict->n_mrsets++] = mrset;
1500 mrset_destroy (dict->mrsets[idx]);
1501 dict->mrsets[idx] = mrset;
1506 /* Looks for a multiple response set in DICT named NAME. If found, removes it
1507 from DICT and returns true. If none is found, returns false without
1510 Deleting one multiple response set causes the indexes of other sets within
1513 dict_delete_mrset (struct dictionary *dict, const char *name)
1515 size_t idx = dict_lookup_mrset_idx (dict, name);
1516 if (idx != SIZE_MAX)
1518 mrset_destroy (dict->mrsets[idx]);
1519 dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
1526 /* Deletes all multiple response sets from DICT. */
1528 dict_clear_mrsets (struct dictionary *dict)
1532 for (i = 0; i < dict->n_mrsets; i++)
1533 mrset_destroy (dict->mrsets[i]);
1534 free (dict->mrsets);
1535 dict->mrsets = NULL;
1539 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1541 dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
1545 assert (dict_contains_var (dict, var));
1547 for (i = 0; i < dict->n_mrsets; )
1549 struct mrset *mrset = dict->mrsets[i];
1552 for (j = 0; j < mrset->n_vars; )
1553 if (mrset->vars[j] == var)
1554 remove_element (mrset->vars, mrset->n_vars--,
1555 sizeof *mrset->vars, j);
1559 if (mrset->n_vars < 2)
1561 mrset_destroy (mrset);
1562 dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
1569 /* Returns D's attribute set. The caller may examine or modify
1570 the attribute set, but must not destroy it. Destroying D or
1571 calling dict_set_attributes for D will also destroy D's
1574 dict_get_attributes (const struct dictionary *d)
1576 return CONST_CAST (struct attrset *, &d->attributes);
1579 /* Replaces D's attributes set by a copy of ATTRS. */
1581 dict_set_attributes (struct dictionary *d, const struct attrset *attrs)
1583 attrset_destroy (&d->attributes);
1584 attrset_clone (&d->attributes, attrs);
1587 /* Returns true if D has at least one attribute in its attribute
1588 set, false if D's attribute set is empty. */
1590 dict_has_attributes (const struct dictionary *d)
1592 return attrset_count (&d->attributes) > 0;
1595 /* Called from variable.c to notify the dictionary that some property of
1596 the variable has changed */
1598 dict_var_changed (const struct variable *v)
1600 if ( var_has_vardict (v))
1602 const struct vardict_info *vardict = var_get_vardict (v);
1603 struct dictionary *d = vardict->dict;
1608 if (d->changed ) d->changed (d, d->changed_data);
1609 if ( d->callbacks && d->callbacks->var_changed )
1610 d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data);
1615 /* Called from variable.c to notify the dictionary that the variable's width
1618 dict_var_resized (const struct variable *v, int old_width)
1620 if ( var_has_vardict (v))
1622 const struct vardict_info *vardict = var_get_vardict (v);
1623 struct dictionary *d;
1627 if (d->changed) d->changed (d, d->changed_data);
1629 invalidate_proto (d);
1630 if ( d->callbacks && d->callbacks->var_resized )
1631 d->callbacks->var_resized (d, var_get_dict_index (v), old_width,
1636 /* Called from variable.c to notify the dictionary that the variable's display width
1639 dict_var_display_width_changed (const struct variable *v)
1641 if ( var_has_vardict (v))
1643 const struct vardict_info *vardict = var_get_vardict (v);
1644 struct dictionary *d;
1648 if (d->changed) d->changed (d, d->changed_data);
1649 if ( d->callbacks && d->callbacks->var_display_width_changed )
1650 d->callbacks->var_display_width_changed (d, var_get_dict_index (v), d->cb_data);
1654 /* Dictionary used to contain "internal variables". */
1655 static struct dictionary *internal_dict;
1657 /* Create a variable of the specified WIDTH to be used for internal
1658 calculations only. The variable is assigned case index CASE_IDX. */
1660 dict_create_internal_var (int case_idx, int width)
1662 if (internal_dict == NULL)
1663 internal_dict = dict_create ();
1667 static int counter = INT_MAX / 2;
1668 struct variable *var;
1671 if (++counter == INT_MAX)
1672 counter = INT_MAX / 2;
1674 sprintf (name, "$internal%d", counter);
1675 var = dict_create_var (internal_dict, name, width);
1678 set_var_case_index (var, case_idx);
1684 /* Destroys VAR, which must have been created with
1685 dict_create_internal_var(). */
1687 dict_destroy_internal_var (struct variable *var)
1691 dict_delete_var (internal_dict, var);
1693 /* Destroy internal_dict if it has no variables left, just so that
1694 valgrind --leak-check --show-reachable won't show internal_dict. */
1695 if (dict_get_var_cnt (internal_dict) == 0)
1697 dict_destroy (internal_dict);
1698 internal_dict = NULL;
1704 vardict_get_dict_index (const struct vardict_info *vardict)
1706 return vardict - vardict->dict->var;