1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/variable.h"
23 #include "data/attributes.h"
24 #include "data/data-out.h"
25 #include "data/dictionary.h"
26 #include "data/format.h"
27 #include "data/identifier.h"
28 #include "data/missing-values.h"
29 #include "data/settings.h"
30 #include "data/value-labels.h"
31 #include "data/vardict.h"
32 #include "libpspp/assertion.h"
33 #include "libpspp/compiler.h"
34 #include "libpspp/hash-functions.h"
35 #include "libpspp/i18n.h"
36 #include "libpspp/message.h"
37 #include "libpspp/misc.h"
38 #include "libpspp/str.h"
40 #include "gl/minmax.h"
41 #include "gl/xalloc.h"
44 #define _(msgid) gettext (msgid)
49 /* Dictionary information. */
50 char *name; /* Variable name. Mixed case. */
51 int width; /* 0 for numeric, otherwise string width. */
52 struct missing_values miss; /* Missing values. */
53 struct fmt_spec print; /* Default format for PRINT. */
54 struct fmt_spec write; /* Default format for WRITE. */
55 struct val_labs *val_labs; /* Value labels. */
56 char *label; /* Variable label. */
57 struct string name_and_label; /* The name and label in the same string */
59 /* GUI information. */
60 enum measure measure; /* Nominal, ordinal, or continuous. */
61 int display_width; /* Width of data editor column. */
62 enum alignment alignment; /* Alignment of data in GUI. */
64 /* Case information. */
65 bool leave; /* Leave value from case to case? */
67 /* Data for use by containing dictionary. */
68 struct vardict_info *vardict;
70 /* Used only for system and portable file input and output.
73 size_t short_name_cnt;
75 /* Custom attributes. */
76 struct attrset attributes;
79 /* Creates and returns a new variable with the given NAME and
80 WIDTH and other fields initialized to default values. The
81 variable is not added to a dictionary; for that, use
82 dict_create_var instead. */
84 var_create (const char *name, int width)
89 assert (width >= 0 && width <= MAX_STRING);
91 v = xzalloc (sizeof *v);
92 var_set_name (v, name);
94 mv_init (&v->miss, width);
95 v->leave = var_must_leave (v);
96 type = val_type_from_width (width);
97 v->alignment = var_default_alignment (type);
98 v->measure = var_default_measure (type);
99 v->display_width = var_default_display_width (width);
100 v->print = v->write = var_default_formats (width);
101 attrset_init (&v->attributes);
102 ds_init_empty (&v->name_and_label);
107 /* Creates and returns a clone of OLD_VAR. Most properties of
108 the new variable are copied from OLD_VAR, except:
110 - The variable's short name is not copied, because there is
111 no reason to give a new variable with potentially a new
112 name the same short name.
114 - The new variable is not added to OLD_VAR's dictionary by
115 default. Use dict_clone_var, instead, to do that.
118 var_clone (const struct variable *old_var)
120 struct variable *new_var = var_create (var_get_name (old_var),
121 var_get_width (old_var));
123 var_set_missing_values (new_var, var_get_missing_values (old_var));
124 var_set_print_format (new_var, var_get_print_format (old_var));
125 var_set_write_format (new_var, var_get_write_format (old_var));
126 var_set_value_labels (new_var, var_get_value_labels (old_var));
127 var_set_label (new_var, var_get_label (old_var), false);
128 var_set_measure (new_var, var_get_measure (old_var));
129 var_set_display_width (new_var, var_get_display_width (old_var));
130 var_set_alignment (new_var, var_get_alignment (old_var));
131 var_set_leave (new_var, var_get_leave (old_var));
132 var_set_attributes (new_var, var_get_attributes (old_var));
137 /* Destroys variable V.
138 V must not belong to a dictionary. If it does, use
139 dict_delete_var instead. */
141 var_destroy (struct variable *v)
145 assert (!var_has_vardict (v));
146 mv_destroy (&v->miss);
147 var_clear_short_names (v);
148 val_labs_destroy (v->val_labs);
150 attrset_destroy (var_get_attributes (v));
152 ds_destroy (&v->name_and_label);
157 /* Variable names. */
159 /* Return variable V's name, as a UTF-8 encoded string. */
161 var_get_name (const struct variable *v)
168 /* Sets V's name to NAME, a UTF-8 encoded string.
169 Do not use this function for a variable in a dictionary. Use
170 dict_rename_var instead. */
172 var_set_name (struct variable *v, const char *name)
174 assert (!var_has_vardict (v));
175 assert (id_is_plausible (name, false));
178 v->name = xstrdup (name);
179 ds_destroy (&v->name_and_label);
180 ds_init_empty (&v->name_and_label);
181 dict_var_changed (v);
184 /* Returns VAR's dictionary class. */
186 var_get_dict_class (const struct variable *var)
188 return dict_class_from_id (var->name);
191 /* A hsh_compare_func that orders variables A and B by their
194 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
196 const struct variable *a = a_;
197 const struct variable *b = b_;
199 return utf8_strcasecmp (a->name, b->name);
202 /* A hsh_hash_func that hashes variable V based on its name. */
204 hash_var_by_name (const void *v_, const void *aux UNUSED)
206 const struct variable *v = v_;
208 return utf8_hash_case_string (v->name, 0);
211 /* A hsh_compare_func that orders pointers to variables A and B
214 compare_var_ptrs_by_name (const void *a_, const void *b_,
215 const void *aux UNUSED)
217 struct variable *const *a = a_;
218 struct variable *const *b = b_;
220 return utf8_strcasecmp (var_get_name (*a), var_get_name (*b));
223 /* A hsh_compare_func that orders pointers to variables A and B
224 by their dictionary indexes. */
226 compare_var_ptrs_by_dict_index (const void *a_, const void *b_,
227 const void *aux UNUSED)
229 struct variable *const *a = a_;
230 struct variable *const *b = b_;
231 size_t a_index = var_get_dict_index (*a);
232 size_t b_index = var_get_dict_index (*b);
234 return a_index < b_index ? -1 : a_index > b_index;
237 /* A hsh_hash_func that hashes pointer to variable V based on its
240 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
242 struct variable *const *v = v_;
244 return utf8_hash_case_string (var_get_name (*v), 0);
247 /* Returns the type of variable V. */
249 var_get_type (const struct variable *v)
251 return val_type_from_width (v->width);
254 /* Returns the width of variable V. */
256 var_get_width (const struct variable *v)
261 /* Changes the width of V to NEW_WIDTH.
262 This function should be used cautiously. */
264 var_set_width (struct variable *v, int new_width)
266 const int old_width = v->width;
268 if (old_width == new_width)
271 if (mv_is_resizable (&v->miss, new_width))
272 mv_resize (&v->miss, new_width);
275 mv_destroy (&v->miss);
276 mv_init (&v->miss, new_width);
279 if (v->val_labs != NULL)
281 if (val_labs_can_set_width (v->val_labs, new_width))
282 val_labs_set_width (v->val_labs, new_width);
285 val_labs_destroy (v->val_labs);
290 fmt_resize (&v->print, new_width);
291 fmt_resize (&v->write, new_width);
293 v->width = new_width;
294 dict_var_resized (v, old_width);
295 dict_var_changed (v);
298 /* Returns true if variable V is numeric, false otherwise. */
300 var_is_numeric (const struct variable *v)
302 return var_get_type (v) == VAL_NUMERIC;
305 /* Returns true if variable V is a string variable, false
308 var_is_alpha (const struct variable *v)
310 return var_get_type (v) == VAL_STRING;
313 /* Returns variable V's missing values. */
314 const struct missing_values *
315 var_get_missing_values (const struct variable *v)
320 /* Sets variable V's missing values to MISS, which must be of V's
321 width or at least resizable to V's width.
322 If MISS is null, then V's missing values, if any, are
325 var_set_missing_values (struct variable *v, const struct missing_values *miss)
329 assert (mv_is_resizable (miss, v->width));
330 mv_destroy (&v->miss);
331 mv_copy (&v->miss, miss);
332 mv_resize (&v->miss, v->width);
337 dict_var_changed (v);
340 /* Sets variable V to have no user-missing values. */
342 var_clear_missing_values (struct variable *v)
344 var_set_missing_values (v, NULL);
347 /* Returns true if V has any user-missing values,
350 var_has_missing_values (const struct variable *v)
352 return !mv_is_empty (&v->miss);
355 /* Returns true if VALUE is in the given CLASS of missing values
356 in V, false otherwise. */
358 var_is_value_missing (const struct variable *v, const union value *value,
361 return mv_is_value_missing (&v->miss, value, class);
364 /* Returns true if D is in the given CLASS of missing values in
366 V must be a numeric variable. */
368 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
370 return mv_is_num_missing (&v->miss, d, class);
373 /* Returns true if S[] is a missing value for V, false otherwise.
374 S[] must contain exactly as many characters as V's width.
375 V must be a string variable. */
377 var_is_str_missing (const struct variable *v, const uint8_t s[],
380 return mv_is_str_missing (&v->miss, s, class);
383 /* Returns variable V's value labels,
384 possibly a null pointer if it has none. */
385 const struct val_labs *
386 var_get_value_labels (const struct variable *v)
391 /* Returns true if variable V has at least one value label. */
393 var_has_value_labels (const struct variable *v)
395 return val_labs_count (v->val_labs) > 0;
398 /* Sets variable V's value labels to a copy of VLS,
399 which must have a width equal to V's width or one that can be
400 changed to V's width.
401 If VLS is null, then V's value labels, if any, are removed. */
403 var_set_value_labels (struct variable *v, const struct val_labs *vls)
405 val_labs_destroy (v->val_labs);
410 assert (val_labs_can_set_width (vls, v->width));
411 v->val_labs = val_labs_clone (vls);
412 val_labs_set_width (v->val_labs, v->width);
413 dict_var_changed (v);
417 /* Makes sure that V has a set of value labels,
418 by assigning one to it if necessary. */
420 alloc_value_labels (struct variable *v)
422 if (v->val_labs == NULL)
423 v->val_labs = val_labs_create (v->width);
426 /* Attempts to add a value label with the given VALUE and UTF-8 encoded LABEL
427 to V. Returns true if successful, false otherwise (probably due to an
430 In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
432 var_add_value_label (struct variable *v,
433 const union value *value, const char *label)
435 alloc_value_labels (v);
436 return val_labs_add (v->val_labs, value, label);
439 /* Adds or replaces a value label with the given VALUE and UTF-8 encoded LABEL
442 In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
444 var_replace_value_label (struct variable *v,
445 const union value *value, const char *label)
447 alloc_value_labels (v);
448 val_labs_replace (v->val_labs, value, label);
451 /* Removes V's value labels, if any. */
453 var_clear_value_labels (struct variable *v)
455 var_set_value_labels (v, NULL);
458 /* Returns the label associated with VALUE for variable V, as a UTF-8 string in
459 a format suitable for output, or a null pointer if none. */
461 var_lookup_value_label (const struct variable *v, const union value *value)
463 return val_labs_find (v->val_labs, value);
467 Append to STR the string representation of VALUE for variable V.
468 STR must be a pointer to an initialised struct string.
471 append_value (const struct variable *v, const union value *value,
474 char *s = data_out (value, var_get_encoding (v), &v->print);
475 ds_put_cstr (str, s);
479 /* Append STR with a string representing VALUE for variable V.
480 That is, if VALUE has a label, append that label,
481 otherwise format VALUE and append the formatted string.
482 STR must be a pointer to an initialised struct string.
485 var_append_value_name (const struct variable *v, const union value *value,
488 enum settings_value_style style = settings_get_value_style ();
489 const char *name = var_lookup_value_label (v, value);
493 case SETTINGS_VAL_STYLE_VALUES:
494 append_value (v, value, str);
497 case SETTINGS_VAL_STYLE_LABELS:
499 append_value (v, value, str);
501 ds_put_cstr (str, name);
504 case SETTINGS_VAL_STYLE_BOTH:
506 append_value (v, value, str);
509 ds_put_cstr (str, " (");
510 ds_put_cstr (str, name);
511 ds_put_cstr (str, ")");
517 /* Print and write formats. */
519 /* Returns V's print format specification. */
520 const struct fmt_spec *
521 var_get_print_format (const struct variable *v)
526 /* Sets V's print format specification to PRINT, which must be a
527 valid format specification for a variable of V's width
528 (ordinarily an output format, but input formats are not
531 var_set_print_format (struct variable *v, const struct fmt_spec *print)
533 if (!fmt_equal (&v->print, print))
535 assert (fmt_check_width_compat (print, v->width));
537 dict_var_changed (v);
541 /* Returns V's write format specification. */
542 const struct fmt_spec *
543 var_get_write_format (const struct variable *v)
548 /* Sets V's write format specification to WRITE, which must be a
549 valid format specification for a variable of V's width
550 (ordinarily an output format, but input formats are not
553 var_set_write_format (struct variable *v, const struct fmt_spec *write)
555 if (!fmt_equal (&v->write, write))
557 assert (fmt_check_width_compat (write, v->width));
559 dict_var_changed (v);
563 /* Sets V's print and write format specifications to FORMAT,
564 which must be a valid format specification for a variable of
565 V's width (ordinarily an output format, but input formats are
568 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
570 var_set_print_format (v, format);
571 var_set_write_format (v, format);
574 /* Returns the default print and write format for a variable of
575 the given TYPE, as set by var_create. The return value can be
576 used to reset a variable's print and write formats to the
579 var_default_formats (int width)
582 ? fmt_for_output (FMT_F, 8, 2)
583 : fmt_for_output (FMT_A, width, 0));
589 /* Update the combined name and label string if necessary */
591 update_vl_string (const struct variable *v)
593 /* Cast away const! */
594 struct string *str = (struct string *) &v->name_and_label;
596 if (ds_is_empty (str))
599 ds_put_format (str, _("%s (%s)"), v->label, v->name);
601 ds_put_cstr (str, v->name);
606 /* Return a string representing this variable, in the form most
607 appropriate from a human factors perspective, that is, its
608 variable label if it has one, otherwise its name. */
610 var_to_string (const struct variable *v)
612 enum settings_var_style style = settings_get_var_style ();
616 case SETTINGS_VAR_STYLE_NAMES:
619 case SETTINGS_VAR_STYLE_LABELS:
620 return v->label != NULL ? v->label : v->name;
622 case SETTINGS_VAR_STYLE_BOTH:
623 update_vl_string (v);
624 return ds_cstr (&v->name_and_label);
632 /* Returns V's variable label, or a null pointer if it has none. */
634 var_get_label (const struct variable *v)
639 /* Sets V's variable label to UTF-8 encoded string LABEL, stripping off leading
640 and trailing white space. If LABEL is a null pointer or if LABEL is an
641 empty string (after stripping white space), then V's variable label (if any)
644 Variable labels are limited to 255 bytes in V's encoding (as returned by
645 var_get_encoding()). If LABEL fits within this limit, this function returns
646 true. Otherwise, the variable label is set to a truncated value, this
647 function returns false and, if ISSUE_WARNING is true, issues a warning. */
649 var_set_label (struct variable *v, const char *label, bool issue_warning)
651 bool truncated = false;
656 if (label != NULL && label[strspn (label, CC_SPACES)])
658 const char *dict_encoding = var_get_encoding (v);
659 struct substring s = ss_cstr (label);
662 if (dict_encoding != NULL)
664 enum { MAX_LABEL_LEN = 255 };
666 trunc_len = utf8_encoding_trunc_len (label, dict_encoding,
668 if (ss_length (s) > trunc_len)
671 msg (SW, _("Truncating variable label for variable `%s' to %d "
672 "bytes."), var_get_name (v), MAX_LABEL_LEN);
673 ss_truncate (&s, trunc_len);
678 v->label = ss_xstrdup (s);
681 ds_destroy (&v->name_and_label);
682 ds_init_empty (&v->name_and_label);
684 dict_var_changed (v);
689 /* Removes any variable label from V. */
691 var_clear_label (struct variable *v)
693 var_set_label (v, NULL, false);
696 /* Returns true if V has a variable V,
699 var_has_label (const struct variable *v)
701 return v->label != NULL;
704 /* Returns true if M is a valid variable measurement level,
707 measure_is_valid (enum measure m)
709 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
712 /* Returns a string version of measurement level M, for display to a user. */
714 measure_to_string (enum measure m)
718 case MEASURE_NOMINAL:
721 case MEASURE_ORDINAL:
732 /* Returns V's measurement level. */
734 var_get_measure (const struct variable *v)
739 /* Sets V's measurement level to MEASURE. */
741 var_set_measure (struct variable *v, enum measure measure)
743 assert (measure_is_valid (measure));
744 v->measure = measure;
745 dict_var_changed (v);
748 /* Returns the default measurement level for a variable of the
749 given TYPE, as set by var_create. The return value can be
750 used to reset a variable's measurement level to the
753 var_default_measure (enum val_type type)
755 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
758 /* Returns V's display width, which applies only to GUIs. */
760 var_get_display_width (const struct variable *v)
762 return v->display_width;
765 /* Sets V's display width to DISPLAY_WIDTH. */
767 var_set_display_width (struct variable *v, int new_width)
769 if (v->display_width != new_width)
771 v->display_width = new_width;
772 dict_var_display_width_changed (v);
773 dict_var_changed (v);
777 /* Returns the default display width for a variable of the given
778 WIDTH, as set by var_create. The return value can be used to
779 reset a variable's display width to the default. */
781 var_default_display_width (int width)
783 return width == 0 ? 8 : MIN (width, 32);
786 /* Returns true if A is a valid alignment,
789 alignment_is_valid (enum alignment a)
791 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
794 /* Returns a string version of alignment A, for display to a user. */
796 alignment_to_string (enum alignment a)
814 /* Returns V's display alignment, which applies only to GUIs. */
816 var_get_alignment (const struct variable *v)
821 /* Sets V's display alignment to ALIGNMENT. */
823 var_set_alignment (struct variable *v, enum alignment alignment)
825 assert (alignment_is_valid (alignment));
826 v->alignment = alignment;
827 dict_var_changed (v);
830 /* Returns the default display alignment for a variable of the
831 given TYPE, as set by var_create. The return value can be
832 used to reset a variable's display alignment to the default. */
834 var_default_alignment (enum val_type type)
836 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
839 /* Whether variables' values should be preserved from case to
842 /* Returns true if variable V's value should be left from case to
843 case, instead of being reset to system-missing or blanks. */
845 var_get_leave (const struct variable *v)
850 /* Sets V's leave setting to LEAVE. */
852 var_set_leave (struct variable *v, bool leave)
854 assert (leave || !var_must_leave (v));
856 dict_var_changed (v);
859 /* Returns true if V must be left from case to case,
860 false if it can be set either way. */
862 var_must_leave (const struct variable *v)
864 return var_get_dict_class (v) == DC_SCRATCH;
867 /* Returns the number of short names stored in VAR.
869 Short names are used only for system and portable file input
870 and output. They are upper-case only, not necessarily unique,
871 and limited to SHORT_NAME_LEN characters (plus a null
872 terminator). Ordinarily a variable has at most one short
873 name, but very long string variables (longer than 255 bytes)
874 may have more. A variable might not have any short name at
875 all if it hasn't been saved to or read from a system or
878 var_get_short_name_cnt (const struct variable *var)
880 return var->short_name_cnt;
883 /* Returns VAR's short name with the given IDX, if it has one
884 with that index, or a null pointer otherwise. Short names may
885 be sparse: even if IDX is less than the number of short names
886 in VAR, this function may return a null pointer. */
888 var_get_short_name (const struct variable *var, size_t idx)
890 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
893 /* Sets VAR's short name with the given IDX to the UTF-8 string SHORT_NAME.
894 The caller must already have checked that, in the dictionary encoding,
895 SHORT_NAME is no more than SHORT_NAME_LEN bytes long. The new short name
896 will be converted to uppercase.
898 Specifying a null pointer for SHORT_NAME clears the specified short name. */
900 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
902 assert (short_name == NULL || id_is_plausible (short_name, false));
904 /* Clear old short name numbered IDX, if any. */
905 if (idx < var->short_name_cnt)
907 free (var->short_names[idx]);
908 var->short_names[idx] = NULL;
911 /* Install new short name for IDX. */
912 if (short_name != NULL)
914 if (idx >= var->short_name_cnt)
916 size_t old_cnt = var->short_name_cnt;
918 var->short_name_cnt = MAX (idx * 2, 1);
919 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
920 sizeof *var->short_names);
921 for (i = old_cnt; i < var->short_name_cnt; i++)
922 var->short_names[i] = NULL;
924 var->short_names[idx] = utf8_to_upper (short_name);
927 dict_var_changed (var);
930 /* Clears V's short names. */
932 var_clear_short_names (struct variable *v)
936 for (i = 0; i < v->short_name_cnt; i++)
937 free (v->short_names[i]);
938 free (v->short_names);
939 v->short_names = NULL;
940 v->short_name_cnt = 0;
943 /* Relationship with dictionary. */
945 /* Returns V's index within its dictionary, the value
946 for which "dict_get_var (dict, index)" will return V.
947 V must be in a dictionary. */
949 var_get_dict_index (const struct variable *v)
951 assert (var_has_vardict (v));
952 return vardict_get_dict_index (v->vardict);
955 /* Returns V's index within the case represented by its
956 dictionary, that is, the value for which "case_data_idx (case,
957 index)" will return the data for V in that case.
958 V must be in a dictionary. */
960 var_get_case_index (const struct variable *v)
962 assert (var_has_vardict (v));
963 return vardict_get_case_index (v->vardict);
966 /* Returns variable V's attribute set. The caller may examine or
967 modify the attribute set, but must not destroy it. Destroying
968 V, or calling var_set_attributes() on V, will also destroy its
971 var_get_attributes (const struct variable *v)
973 return CONST_CAST (struct attrset *, &v->attributes);
976 /* Replaces variable V's attributes set by a copy of ATTRS. */
978 var_set_attributes (struct variable *v, const struct attrset *attrs)
980 attrset_destroy (&v->attributes);
981 attrset_clone (&v->attributes, attrs);
984 /* Returns true if V has any custom attributes, false if it has none. */
986 var_has_attributes (const struct variable *v)
988 return attrset_count (&v->attributes) > 0;
991 /* Returns the encoding of values of variable VAR. (This is actually a
992 property of the dictionary.) Returns null if no specific encoding has been
995 var_get_encoding (const struct variable *var)
997 return (var_has_vardict (var)
998 ? dict_get_encoding (vardict_get_dictionary (var->vardict))
1002 /* Returns V's vardict structure. */
1003 struct vardict_info *
1004 var_get_vardict (const struct variable *v)
1006 return CONST_CAST (struct vardict_info *, v->vardict);
1009 /* Sets V's vardict data to VARDICT. */
1011 var_set_vardict (struct variable *v, struct vardict_info *vardict)
1013 v->vardict = vardict;
1016 /* Returns true if V has vardict data. */
1018 var_has_vardict (const struct variable *v)
1020 return v->vardict != NULL;
1023 /* Clears V's vardict data. */
1025 var_clear_vardict (struct variable *v)