1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/variable.h"
23 #include "data/attributes.h"
24 #include "data/data-out.h"
25 #include "data/dictionary.h"
26 #include "data/format.h"
27 #include "data/identifier.h"
28 #include "data/missing-values.h"
29 #include "data/value-labels.h"
30 #include "data/vardict.h"
31 #include "libpspp/assertion.h"
32 #include "libpspp/compiler.h"
33 #include "libpspp/hash-functions.h"
34 #include "libpspp/i18n.h"
35 #include "libpspp/message.h"
36 #include "libpspp/misc.h"
37 #include "libpspp/str.h"
39 #include "gl/minmax.h"
40 #include "gl/xalloc.h"
43 #define _(msgid) gettext (msgid)
48 /* Dictionary information. */
49 char *name; /* Variable name. Mixed case. */
50 int width; /* 0 for numeric, otherwise string width. */
51 struct missing_values miss; /* Missing values. */
52 struct fmt_spec print; /* Default format for PRINT. */
53 struct fmt_spec write; /* Default format for WRITE. */
54 struct val_labs *val_labs; /* Value labels. */
55 char *label; /* Variable label. */
57 /* GUI information. */
58 enum measure measure; /* Nominal, ordinal, or continuous. */
59 int display_width; /* Width of data editor column. */
60 enum alignment alignment; /* Alignment of data in GUI. */
62 /* Case information. */
63 bool leave; /* Leave value from case to case? */
65 /* Data for use by containing dictionary. */
66 struct vardict_info *vardict;
68 /* Used only for system and portable file input and output.
71 size_t short_name_cnt;
73 /* Each command may use these fields as needed. */
75 void (*aux_dtor) (struct variable *);
77 /* Custom attributes. */
78 struct attrset attributes;
81 /* Creates and returns a new variable with the given NAME and
82 WIDTH and other fields initialized to default values. The
83 variable is not added to a dictionary; for that, use
84 dict_create_var instead. */
86 var_create (const char *name, int width)
91 assert (width >= 0 && width <= MAX_STRING);
93 v = xmalloc (sizeof *v);
96 var_set_name (v, name);
98 mv_init (&v->miss, width);
99 v->leave = var_must_leave (v);
100 type = val_type_from_width (width);
101 v->alignment = var_default_alignment (type);
102 v->measure = var_default_measure (type);
103 v->display_width = var_default_display_width (width);
104 v->print = v->write = var_default_formats (width);
107 v->short_names = NULL;
108 v->short_name_cnt = 0;
111 attrset_init (&v->attributes);
116 /* Creates and returns a clone of OLD_VAR. Most properties of
117 the new variable are copied from OLD_VAR, except:
119 - The variable's short name is not copied, because there is
120 no reason to give a new variable with potentially a new
121 name the same short name.
123 - The new variable is not added to OLD_VAR's dictionary by
124 default. Use dict_clone_var, instead, to do that.
127 var_clone (const struct variable *old_var)
129 struct variable *new_var = var_create (var_get_name (old_var),
130 var_get_width (old_var));
132 var_set_missing_values (new_var, var_get_missing_values (old_var));
133 var_set_print_format (new_var, var_get_print_format (old_var));
134 var_set_write_format (new_var, var_get_write_format (old_var));
135 var_set_value_labels (new_var, var_get_value_labels (old_var));
136 var_set_label (new_var, var_get_label (old_var), false);
137 var_set_measure (new_var, var_get_measure (old_var));
138 var_set_display_width (new_var, var_get_display_width (old_var));
139 var_set_alignment (new_var, var_get_alignment (old_var));
140 var_set_leave (new_var, var_get_leave (old_var));
141 var_set_attributes (new_var, var_get_attributes (old_var));
146 /* Destroys variable V.
147 V must not belong to a dictionary. If it does, use
148 dict_delete_var instead. */
150 var_destroy (struct variable *v)
154 assert (!var_has_vardict (v));
155 mv_destroy (&v->miss);
156 var_clear_short_names (v);
158 val_labs_destroy (v->val_labs);
165 /* Variable names. */
167 /* Return variable V's name, as a UTF-8 encoded string. */
169 var_get_name (const struct variable *v)
174 /* Sets V's name to NAME, a UTF-8 encoded string.
175 Do not use this function for a variable in a dictionary. Use
176 dict_rename_var instead. */
178 var_set_name (struct variable *v, const char *name)
180 assert (!var_has_vardict (v));
181 assert (id_is_plausible (name, false));
184 v->name = xstrdup (name);
185 dict_var_changed (v);
188 /* Returns VAR's dictionary class. */
190 var_get_dict_class (const struct variable *var)
192 return dict_class_from_id (var->name);
195 /* A hsh_compare_func that orders variables A and B by their
198 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
200 const struct variable *a = a_;
201 const struct variable *b = b_;
203 return strcasecmp (a->name, b->name);
206 /* A hsh_hash_func that hashes variable V based on its name. */
208 hash_var_by_name (const void *v_, const void *aux UNUSED)
210 const struct variable *v = v_;
212 return hash_case_string (v->name, 0);
215 /* A hsh_compare_func that orders pointers to variables A and B
218 compare_var_ptrs_by_name (const void *a_, const void *b_,
219 const void *aux UNUSED)
221 struct variable *const *a = a_;
222 struct variable *const *b = b_;
224 return strcasecmp (var_get_name (*a), var_get_name (*b));
227 /* A hsh_compare_func that orders pointers to variables A and B
228 by their dictionary indexes. */
230 compare_var_ptrs_by_dict_index (const void *a_, const void *b_,
231 const void *aux UNUSED)
233 struct variable *const *a = a_;
234 struct variable *const *b = b_;
235 size_t a_index = var_get_dict_index (*a);
236 size_t b_index = var_get_dict_index (*b);
238 return a_index < b_index ? -1 : a_index > b_index;
241 /* A hsh_hash_func that hashes pointer to variable V based on its
244 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
246 struct variable *const *v = v_;
248 return hash_case_string (var_get_name (*v), 0);
251 /* Returns the type of variable V. */
253 var_get_type (const struct variable *v)
255 return val_type_from_width (v->width);
258 /* Returns the width of variable V. */
260 var_get_width (const struct variable *v)
265 /* Changes the width of V to NEW_WIDTH.
266 This function should be used cautiously. */
268 var_set_width (struct variable *v, int new_width)
270 const int old_width = v->width;
272 if (old_width == new_width)
275 if (mv_is_resizable (&v->miss, new_width))
276 mv_resize (&v->miss, new_width);
279 mv_destroy (&v->miss);
280 mv_init (&v->miss, new_width);
283 if (v->val_labs != NULL)
285 if (val_labs_can_set_width (v->val_labs, new_width))
286 val_labs_set_width (v->val_labs, new_width);
289 val_labs_destroy (v->val_labs);
294 fmt_resize (&v->print, new_width);
295 fmt_resize (&v->write, new_width);
297 v->width = new_width;
298 dict_var_resized (v, old_width);
299 dict_var_changed (v);
302 /* Returns true if variable V is numeric, false otherwise. */
304 var_is_numeric (const struct variable *v)
306 return var_get_type (v) == VAL_NUMERIC;
309 /* Returns true if variable V is a string variable, false
312 var_is_alpha (const struct variable *v)
314 return var_get_type (v) == VAL_STRING;
317 /* Returns variable V's missing values. */
318 const struct missing_values *
319 var_get_missing_values (const struct variable *v)
324 /* Sets variable V's missing values to MISS, which must be of V's
325 width or at least resizable to V's width.
326 If MISS is null, then V's missing values, if any, are
329 var_set_missing_values (struct variable *v, const struct missing_values *miss)
333 assert (mv_is_resizable (miss, v->width));
334 mv_destroy (&v->miss);
335 mv_copy (&v->miss, miss);
336 mv_resize (&v->miss, v->width);
341 dict_var_changed (v);
344 /* Sets variable V to have no user-missing values. */
346 var_clear_missing_values (struct variable *v)
348 var_set_missing_values (v, NULL);
351 /* Returns true if V has any user-missing values,
354 var_has_missing_values (const struct variable *v)
356 return !mv_is_empty (&v->miss);
359 /* Returns true if VALUE is in the given CLASS of missing values
360 in V, false otherwise. */
362 var_is_value_missing (const struct variable *v, const union value *value,
365 return mv_is_value_missing (&v->miss, value, class);
368 /* Returns true if D is in the given CLASS of missing values in
370 V must be a numeric variable. */
372 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
374 return mv_is_num_missing (&v->miss, d, class);
377 /* Returns true if S[] is a missing value for V, false otherwise.
378 S[] must contain exactly as many characters as V's width.
379 V must be a string variable. */
381 var_is_str_missing (const struct variable *v, const uint8_t s[],
384 return mv_is_str_missing (&v->miss, s, class);
387 /* Returns variable V's value labels,
388 possibly a null pointer if it has none. */
389 const struct val_labs *
390 var_get_value_labels (const struct variable *v)
395 /* Returns true if variable V has at least one value label. */
397 var_has_value_labels (const struct variable *v)
399 return val_labs_count (v->val_labs) > 0;
402 /* Sets variable V's value labels to a copy of VLS,
403 which must have a width equal to V's width or one that can be
404 changed to V's width.
405 If VLS is null, then V's value labels, if any, are removed. */
407 var_set_value_labels (struct variable *v, const struct val_labs *vls)
409 val_labs_destroy (v->val_labs);
414 assert (val_labs_can_set_width (vls, v->width));
415 v->val_labs = val_labs_clone (vls);
416 val_labs_set_width (v->val_labs, v->width);
417 dict_var_changed (v);
421 /* Makes sure that V has a set of value labels,
422 by assigning one to it if necessary. */
424 alloc_value_labels (struct variable *v)
426 if (v->val_labs == NULL)
427 v->val_labs = val_labs_create (v->width);
430 /* Attempts to add a value label with the given VALUE and UTF-8 encoded LABEL
431 to V. Returns true if successful, false otherwise (probably due to an
434 In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
436 var_add_value_label (struct variable *v,
437 const union value *value, const char *label)
439 alloc_value_labels (v);
440 return val_labs_add (v->val_labs, value, label);
443 /* Adds or replaces a value label with the given VALUE and UTF-8 encoded LABEL
446 In LABEL, the two-byte sequence "\\n" is interpreted as a new-line. */
448 var_replace_value_label (struct variable *v,
449 const union value *value, const char *label)
451 alloc_value_labels (v);
452 val_labs_replace (v->val_labs, value, label);
455 /* Removes V's value labels, if any. */
457 var_clear_value_labels (struct variable *v)
459 var_set_value_labels (v, NULL);
462 /* Returns the label associated with VALUE for variable V, as a UTF-8 string in
463 a format suitable for output, or a null pointer if none. */
465 var_lookup_value_label (const struct variable *v, const union value *value)
467 return val_labs_find (v->val_labs, value);
470 /* Append STR with a string representing VALUE for variable V.
471 That is, if VALUE has a label, append that label,
472 otherwise format VALUE and append the formatted string.
473 STR must be a pointer to an initialised struct string.
476 var_append_value_name (const struct variable *v, const union value *value,
479 const char *name = var_lookup_value_label (v, value);
482 char *s = data_out (value, var_get_encoding (v), &v->print);
483 ds_put_cstr (str, s);
487 ds_put_cstr (str, name);
490 /* Print and write formats. */
492 /* Returns V's print format specification. */
493 const struct fmt_spec *
494 var_get_print_format (const struct variable *v)
499 /* Sets V's print format specification to PRINT, which must be a
500 valid format specification for a variable of V's width
501 (ordinarily an output format, but input formats are not
504 var_set_print_format (struct variable *v, const struct fmt_spec *print)
506 assert (fmt_check_width_compat (print, v->width));
508 dict_var_changed (v);
511 /* Returns V's write format specification. */
512 const struct fmt_spec *
513 var_get_write_format (const struct variable *v)
518 /* Sets V's write format specification to WRITE, which must be a
519 valid format specification for a variable of V's width
520 (ordinarily an output format, but input formats are not
523 var_set_write_format (struct variable *v, const struct fmt_spec *write)
525 assert (fmt_check_width_compat (write, v->width));
527 dict_var_changed (v);
530 /* Sets V's print and write format specifications to FORMAT,
531 which must be a valid format specification for a variable of
532 V's width (ordinarily an output format, but input formats are
535 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
537 var_set_print_format (v, format);
538 var_set_write_format (v, format);
541 /* Returns the default print and write format for a variable of
542 the given TYPE, as set by var_create. The return value can be
543 used to reset a variable's print and write formats to the
546 var_default_formats (int width)
549 ? fmt_for_output (FMT_F, 8, 2)
550 : fmt_for_output (FMT_A, width, 0));
553 /* Return a string representing this variable, in the form most
554 appropriate from a human factors perspective, that is, its
555 variable label if it has one, otherwise its name. */
557 var_to_string (const struct variable *v)
559 return v->label != NULL ? v->label : v->name;
562 /* Returns V's variable label, or a null pointer if it has none. */
564 var_get_label (const struct variable *v)
569 /* Sets V's variable label to UTF-8 encoded string LABEL, stripping off leading
570 and trailing white space. If LABEL is a null pointer or if LABEL is an
571 empty string (after stripping white space), then V's variable label (if any)
574 Variable labels are limited to 255 bytes in V's encoding (as returned by
575 var_get_encoding()). If LABEL fits within this limit, this function returns
576 true. Otherwise, the variable label is set to a truncated value, this
577 function returns false and, if ISSUE_WARNING is true, issues a warning. */
579 var_set_label (struct variable *v, const char *label, bool issue_warning)
581 bool truncated = false;
588 const char *dict_encoding = var_get_encoding (v);
589 struct substring s = ss_cstr (label);
592 if (dict_encoding != NULL)
594 enum { MAX_LABEL_LEN = 255 };
596 trunc_len = utf8_encoding_trunc_len (label, dict_encoding,
598 if (ss_length (s) > trunc_len)
601 msg (SW, _("Truncating variable label for variable `%s' to %d "
602 "bytes."), var_get_name (v), MAX_LABEL_LEN);
603 ss_truncate (&s, trunc_len);
608 ss_trim (&s, ss_cstr (CC_SPACES));
609 if (!ss_is_empty (s))
610 v->label = ss_xstrdup (s);
613 dict_var_changed (v);
618 /* Removes any variable label from V. */
620 var_clear_label (struct variable *v)
622 var_set_label (v, NULL, false);
625 /* Returns true if V has a variable V,
628 var_has_label (const struct variable *v)
630 return v->label != NULL;
633 /* Returns true if M is a valid variable measurement level,
636 measure_is_valid (enum measure m)
638 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
641 /* Returns V's measurement level. */
643 var_get_measure (const struct variable *v)
648 /* Sets V's measurement level to MEASURE. */
650 var_set_measure (struct variable *v, enum measure measure)
652 assert (measure_is_valid (measure));
653 v->measure = measure;
654 dict_var_changed (v);
657 /* Returns the default measurement level for a variable of the
658 given TYPE, as set by var_create. The return value can be
659 used to reset a variable's measurement level to the
662 var_default_measure (enum val_type type)
664 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
667 /* Returns V's display width, which applies only to GUIs. */
669 var_get_display_width (const struct variable *v)
671 return v->display_width;
674 /* Sets V's display width to DISPLAY_WIDTH. */
676 var_set_display_width (struct variable *v, int new_width)
678 int old_width = v->display_width;
680 v->display_width = new_width;
682 if ( old_width != new_width)
683 dict_var_display_width_changed (v);
685 dict_var_changed (v);
688 /* Returns the default display width for a variable of the given
689 WIDTH, as set by var_create. The return value can be used to
690 reset a variable's display width to the default. */
692 var_default_display_width (int width)
694 return width == 0 ? 8 : MIN (width, 32);
697 /* Returns true if A is a valid alignment,
700 alignment_is_valid (enum alignment a)
702 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
705 /* Returns V's display alignment, which applies only to GUIs. */
707 var_get_alignment (const struct variable *v)
712 /* Sets V's display alignment to ALIGNMENT. */
714 var_set_alignment (struct variable *v, enum alignment alignment)
716 assert (alignment_is_valid (alignment));
717 v->alignment = alignment;
718 dict_var_changed (v);
721 /* Returns the default display alignment for a variable of the
722 given TYPE, as set by var_create. The return value can be
723 used to reset a variable's display alignment to the default. */
725 var_default_alignment (enum val_type type)
727 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
730 /* Whether variables' values should be preserved from case to
733 /* Returns true if variable V's value should be left from case to
734 case, instead of being reset to system-missing or blanks. */
736 var_get_leave (const struct variable *v)
741 /* Sets V's leave setting to LEAVE. */
743 var_set_leave (struct variable *v, bool leave)
745 assert (leave || !var_must_leave (v));
747 dict_var_changed (v);
750 /* Returns true if V must be left from case to case,
751 false if it can be set either way. */
753 var_must_leave (const struct variable *v)
755 return var_get_dict_class (v) == DC_SCRATCH;
758 /* Returns the number of short names stored in VAR.
760 Short names are used only for system and portable file input
761 and output. They are upper-case only, not necessarily unique,
762 and limited to SHORT_NAME_LEN characters (plus a null
763 terminator). Ordinarily a variable has at most one short
764 name, but very long string variables (longer than 255 bytes)
765 may have more. A variable might not have any short name at
766 all if it hasn't been saved to or read from a system or
769 var_get_short_name_cnt (const struct variable *var)
771 return var->short_name_cnt;
774 /* Returns VAR's short name with the given IDX, if it has one
775 with that index, or a null pointer otherwise. Short names may
776 be sparse: even if IDX is less than the number of short names
777 in VAR, this function may return a null pointer. */
779 var_get_short_name (const struct variable *var, size_t idx)
781 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
784 /* Sets VAR's short name with the given IDX to the UTF-8 string SHORT_NAME.
785 The caller must already have checked that, in the dictionary encoding,
786 SHORT_NAME is no more than SHORT_NAME_LEN bytes long. The new short name
787 will be converted to uppercase.
789 Specifying a null pointer for SHORT_NAME clears the specified short name. */
791 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
793 assert (short_name == NULL || id_is_plausible (short_name, false));
795 /* Clear old short name numbered IDX, if any. */
796 if (idx < var->short_name_cnt)
798 free (var->short_names[idx]);
799 var->short_names[idx] = NULL;
802 /* Install new short name for IDX. */
803 if (short_name != NULL)
805 if (idx >= var->short_name_cnt)
807 size_t old_cnt = var->short_name_cnt;
809 var->short_name_cnt = MAX (idx * 2, 1);
810 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
811 sizeof *var->short_names);
812 for (i = old_cnt; i < var->short_name_cnt; i++)
813 var->short_names[i] = NULL;
815 var->short_names[idx] = xstrdup (short_name);
816 str_uppercase (var->short_names[idx]);
819 dict_var_changed (var);
822 /* Clears V's short names. */
824 var_clear_short_names (struct variable *v)
828 for (i = 0; i < v->short_name_cnt; i++)
829 free (v->short_names[i]);
830 free (v->short_names);
831 v->short_names = NULL;
832 v->short_name_cnt = 0;
835 /* Relationship with dictionary. */
837 /* Returns V's index within its dictionary, the value
838 for which "dict_get_var (dict, index)" will return V.
839 V must be in a dictionary. */
841 var_get_dict_index (const struct variable *v)
843 assert (var_has_vardict (v));
844 return vardict_get_dict_index (v->vardict);
847 /* Returns V's index within the case represented by its
848 dictionary, that is, the value for which "case_data_idx (case,
849 index)" will return the data for V in that case.
850 V must be in a dictionary. */
852 var_get_case_index (const struct variable *v)
854 assert (var_has_vardict (v));
855 return vardict_get_case_index (v->vardict);
858 /* Returns V's auxiliary data, or a null pointer if none has been
861 var_get_aux (const struct variable *v)
866 /* Assign auxiliary data AUX to variable V, which must not
867 already have auxiliary data. Before V's auxiliary data is
868 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
869 may be appropriate for use as AUX_DTOR.) */
871 var_attach_aux (const struct variable *v_,
872 void *aux, void (*aux_dtor) (struct variable *))
874 struct variable *v = CONST_CAST (struct variable *, v_);
875 assert (v->aux == NULL);
876 assert (aux != NULL);
878 v->aux_dtor = aux_dtor;
882 /* Remove auxiliary data, if any, from V, and return it, without
883 calling any associated destructor. */
885 var_detach_aux (struct variable *v)
888 assert (aux != NULL);
893 /* Clears auxiliary data, if any, from V, and calls any
894 associated destructor. */
896 var_clear_aux (struct variable *v)
900 if (v->aux_dtor != NULL)
906 /* This function is appropriate for use an auxiliary data
907 destructor (passed as AUX_DTOR to var_attach_aux()) for the
908 case where the auxiliary data should be passed to free(). */
910 var_dtor_free (struct variable *v)
915 /* Returns variable V's attribute set. The caller may examine or
916 modify the attribute set, but must not destroy it. Destroying
917 V, or calling var_set_attributes() on V, will also destroy its
920 var_get_attributes (const struct variable *v)
922 return CONST_CAST (struct attrset *, &v->attributes);
925 /* Replaces variable V's attributes set by a copy of ATTRS. */
927 var_set_attributes (struct variable *v, const struct attrset *attrs)
929 attrset_destroy (&v->attributes);
930 attrset_clone (&v->attributes, attrs);
933 /* Returns true if V has any custom attributes, false if it has none. */
935 var_has_attributes (const struct variable *v)
937 return attrset_count (&v->attributes) > 0;
940 /* Returns the encoding of values of variable VAR. (This is actually a
941 property of the dictionary.) Returns null if no specific encoding has been
944 var_get_encoding (const struct variable *var)
946 return (var_has_vardict (var)
947 ? dict_get_encoding (vardict_get_dictionary (var->vardict))
951 /* Returns V's vardict structure. */
952 struct vardict_info *
953 var_get_vardict (const struct variable *v)
955 return CONST_CAST (struct vardict_info *, v->vardict);
958 /* Sets V's vardict data to VARDICT. */
960 var_set_vardict (struct variable *v, struct vardict_info *vardict)
962 v->vardict = vardict;
965 /* Returns true if V has vardict data. */
967 var_has_vardict (const struct variable *v)
969 return v->vardict != NULL;
972 /* Clears V's vardict data. */
974 var_clear_vardict (struct variable *v)