1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include <data/attributes.h>
23 #include <data/data-out.h>
24 #include <data/format.h>
25 #include <data/dictionary.h>
26 #include <data/identifier.h>
27 #include <data/missing-values.h>
28 #include <data/value-labels.h>
29 #include <data/vardict.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash-functions.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
42 #define _(msgid) gettext (msgid)
47 /* Dictionary information. */
48 char *name; /* Variable name. Mixed case. */
49 int width; /* 0 for numeric, otherwise string width. */
50 struct missing_values miss; /* Missing values. */
51 struct fmt_spec print; /* Default format for PRINT. */
52 struct fmt_spec write; /* Default format for WRITE. */
53 struct val_labs *val_labs; /* Value labels. */
54 char *label; /* Variable label. */
56 /* GUI information. */
57 enum measure measure; /* Nominal, ordinal, or continuous. */
58 int display_width; /* Width of data editor column. */
59 enum alignment alignment; /* Alignment of data in GUI. */
61 /* Case information. */
62 bool leave; /* Leave value from case to case? */
64 /* Data for use by containing dictionary. */
65 struct vardict_info *vardict;
67 /* Used only for system and portable file input and output.
70 size_t short_name_cnt;
72 /* Each command may use these fields as needed. */
74 void (*aux_dtor) (struct variable *);
76 /* Custom attributes. */
77 struct attrset attributes;
80 /* Creates and returns a new variable with the given NAME and
81 WIDTH and other fields initialized to default values. The
82 variable is not added to a dictionary; for that, use
83 dict_create_var instead. */
85 var_create (const char *name, int width)
90 assert (width >= 0 && width <= MAX_STRING);
92 v = xmalloc (sizeof *v);
95 var_set_name (v, name);
97 mv_init (&v->miss, width);
98 v->leave = var_must_leave (v);
99 type = val_type_from_width (width);
100 v->alignment = var_default_alignment (type);
101 v->measure = var_default_measure (type);
102 v->display_width = var_default_display_width (width);
103 v->print = v->write = var_default_formats (width);
106 v->short_names = NULL;
107 v->short_name_cnt = 0;
110 attrset_init (&v->attributes);
115 /* Creates and returns a clone of OLD_VAR. Most properties of
116 the new variable are copied from OLD_VAR, except:
118 - The variable's short name is not copied, because there is
119 no reason to give a new variable with potentially a new
120 name the same short name.
122 - The new variable is not added to OLD_VAR's dictionary by
123 default. Use dict_clone_var, instead, to do that.
126 var_clone (const struct variable *old_var)
128 struct variable *new_var = var_create (var_get_name (old_var),
129 var_get_width (old_var));
131 var_set_missing_values (new_var, var_get_missing_values (old_var));
132 var_set_print_format (new_var, var_get_print_format (old_var));
133 var_set_write_format (new_var, var_get_write_format (old_var));
134 var_set_value_labels (new_var, var_get_value_labels (old_var));
135 var_set_label (new_var, var_get_label (old_var));
136 var_set_measure (new_var, var_get_measure (old_var));
137 var_set_display_width (new_var, var_get_display_width (old_var));
138 var_set_alignment (new_var, var_get_alignment (old_var));
139 var_set_leave (new_var, var_get_leave (old_var));
140 var_set_attributes (new_var, var_get_attributes (old_var));
145 /* Destroys variable V.
146 V must not belong to a dictionary. If it does, use
147 dict_delete_var instead. */
149 var_destroy (struct variable *v)
153 assert (!var_has_vardict (v));
154 mv_destroy (&v->miss);
155 var_clear_short_names (v);
157 val_labs_destroy (v->val_labs);
164 /* Variable names. */
166 /* Return variable V's name. */
168 var_get_name (const struct variable *v)
173 /* Sets V's name to NAME.
174 Do not use this function for a variable in a dictionary. Use
175 dict_rename_var instead. */
177 var_set_name (struct variable *v, const char *name)
179 assert (!var_has_vardict (v));
180 assert (var_is_plausible_name (name, false));
183 v->name = xstrdup (name);
184 dict_var_changed (v);
187 /* Returns true if NAME is an acceptable name for a variable,
188 false otherwise. If ISSUE_ERROR is true, issues an
189 explanatory error message on failure. */
191 var_is_valid_name (const char *name, bool issue_error)
196 /* Note that strlen returns number of BYTES, not the number of
198 length = strlen (name);
200 plausible = var_is_plausible_name(name, issue_error);
206 if (!lex_is_id1 (name[0]))
209 msg (SE, _("Character `%c' (in %s) may not appear "
210 "as the first character in a variable name."),
216 for (i = 0; i < length; i++)
218 if (!lex_is_idn (name[i]))
221 msg (SE, _("Character `%c' (in %s) may not appear in "
231 /* Returns true if NAME is an plausible name for a variable,
232 false otherwise. If ISSUE_ERROR is true, issues an
233 explanatory error message on failure.
234 This function makes no use of LC_CTYPE.
237 var_is_plausible_name (const char *name, bool issue_error)
241 /* Note that strlen returns number of BYTES, not the number of
243 length = strlen (name);
247 msg (SE, _("Variable name cannot be empty string."));
250 else if (length > VAR_NAME_LEN)
253 msg (SE, _("Variable name %s exceeds %d-character limit."),
254 name, (int) VAR_NAME_LEN);
258 if (lex_id_to_token (ss_cstr (name)) != T_ID)
261 msg (SE, _("`%s' may not be used as a variable name because it "
262 "is a reserved word."), name);
269 /* Returns VAR's dictionary class. */
271 var_get_dict_class (const struct variable *var)
273 return dict_class_from_id (var->name);
276 /* A hsh_compare_func that orders variables A and B by their
279 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
281 const struct variable *a = a_;
282 const struct variable *b = b_;
284 return strcasecmp (a->name, b->name);
287 /* A hsh_hash_func that hashes variable V based on its name. */
289 hash_var_by_name (const void *v_, const void *aux UNUSED)
291 const struct variable *v = v_;
293 return hash_case_string (v->name, 0);
296 /* A hsh_compare_func that orders pointers to variables A and B
299 compare_var_ptrs_by_name (const void *a_, const void *b_,
300 const void *aux UNUSED)
302 struct variable *const *a = a_;
303 struct variable *const *b = b_;
305 return strcasecmp (var_get_name (*a), var_get_name (*b));
308 /* A hsh_compare_func that orders pointers to variables A and B
309 by their dictionary indexes. */
311 compare_var_ptrs_by_dict_index (const void *a_, const void *b_,
312 const void *aux UNUSED)
314 struct variable *const *a = a_;
315 struct variable *const *b = b_;
316 size_t a_index = var_get_dict_index (*a);
317 size_t b_index = var_get_dict_index (*b);
319 return a_index < b_index ? -1 : a_index > b_index;
322 /* A hsh_hash_func that hashes pointer to variable V based on its
325 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
327 struct variable *const *v = v_;
329 return hash_case_string (var_get_name (*v), 0);
332 /* Returns the type of variable V. */
334 var_get_type (const struct variable *v)
336 return val_type_from_width (v->width);
339 /* Returns the width of variable V. */
341 var_get_width (const struct variable *v)
346 /* Changes the width of V to NEW_WIDTH.
347 This function should be used cautiously. */
349 var_set_width (struct variable *v, int new_width)
351 const int old_width = v->width;
353 if (old_width == new_width)
356 if (mv_is_resizable (&v->miss, new_width))
357 mv_resize (&v->miss, new_width);
360 mv_destroy (&v->miss);
361 mv_init (&v->miss, new_width);
364 if (v->val_labs != NULL)
366 if (val_labs_can_set_width (v->val_labs, new_width))
367 val_labs_set_width (v->val_labs, new_width);
370 val_labs_destroy (v->val_labs);
375 fmt_resize (&v->print, new_width);
376 fmt_resize (&v->write, new_width);
378 v->width = new_width;
379 dict_var_resized (v, old_width);
380 dict_var_changed (v);
383 /* Returns true if variable V is numeric, false otherwise. */
385 var_is_numeric (const struct variable *v)
387 return var_get_type (v) == VAL_NUMERIC;
390 /* Returns true if variable V is a string variable, false
393 var_is_alpha (const struct variable *v)
395 return var_get_type (v) == VAL_STRING;
398 /* Returns variable V's missing values. */
399 const struct missing_values *
400 var_get_missing_values (const struct variable *v)
405 /* Sets variable V's missing values to MISS, which must be of V's
406 width or at least resizable to V's width.
407 If MISS is null, then V's missing values, if any, are
410 var_set_missing_values (struct variable *v, const struct missing_values *miss)
414 assert (mv_is_resizable (miss, v->width));
415 mv_destroy (&v->miss);
416 mv_copy (&v->miss, miss);
417 mv_resize (&v->miss, v->width);
422 dict_var_changed (v);
425 /* Sets variable V to have no user-missing values. */
427 var_clear_missing_values (struct variable *v)
429 var_set_missing_values (v, NULL);
432 /* Returns true if V has any user-missing values,
435 var_has_missing_values (const struct variable *v)
437 return !mv_is_empty (&v->miss);
440 /* Returns true if VALUE is in the given CLASS of missing values
441 in V, false otherwise. */
443 var_is_value_missing (const struct variable *v, const union value *value,
446 return mv_is_value_missing (&v->miss, value, class);
449 /* Returns true if D is in the given CLASS of missing values in
451 V must be a numeric variable. */
453 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
455 return mv_is_num_missing (&v->miss, d, class);
458 /* Returns true if S[] is a missing value for V, false otherwise.
459 S[] must contain exactly as many characters as V's width.
460 V must be a string variable. */
462 var_is_str_missing (const struct variable *v, const uint8_t s[],
465 return mv_is_str_missing (&v->miss, s, class);
468 /* Returns variable V's value labels,
469 possibly a null pointer if it has none. */
470 const struct val_labs *
471 var_get_value_labels (const struct variable *v)
476 /* Returns true if variable V has at least one value label. */
478 var_has_value_labels (const struct variable *v)
480 return val_labs_count (v->val_labs) > 0;
483 /* Sets variable V's value labels to a copy of VLS,
484 which must have a width equal to V's width or one that can be
485 changed to V's width.
486 If VLS is null, then V's value labels, if any, are removed. */
488 var_set_value_labels (struct variable *v, const struct val_labs *vls)
490 val_labs_destroy (v->val_labs);
495 assert (val_labs_can_set_width (vls, v->width));
496 v->val_labs = val_labs_clone (vls);
497 val_labs_set_width (v->val_labs, v->width);
498 dict_var_changed (v);
502 /* Makes sure that V has a set of value labels,
503 by assigning one to it if necessary. */
505 alloc_value_labels (struct variable *v)
507 if (v->val_labs == NULL)
508 v->val_labs = val_labs_create (v->width);
511 /* Attempts to add a value label with the given VALUE and LABEL
512 to V. Returns true if successful, false otherwise (probably
513 due to an existing label). */
515 var_add_value_label (struct variable *v,
516 const union value *value, const char *label)
518 alloc_value_labels (v);
519 return val_labs_add (v->val_labs, value, label);
522 /* Adds or replaces a value label with the given VALUE and LABEL
526 var_replace_value_label (struct variable *v,
527 const union value *value, const char *label)
529 alloc_value_labels (v);
530 val_labs_replace (v->val_labs, value, label);
533 /* Removes V's value labels, if any. */
535 var_clear_value_labels (struct variable *v)
537 var_set_value_labels (v, NULL);
540 /* Returns the label associated with VALUE for variable V,
541 or a null pointer if none. */
543 var_lookup_value_label (const struct variable *v, const union value *value)
545 return val_labs_find (v->val_labs, value);
548 /* Append STR with a string representing VALUE for variable V.
549 That is, if VALUE has a label, append that label,
550 otherwise format VALUE and append the formatted string.
551 STR must be a pointer to an initialised struct string.
554 var_append_value_name (const struct variable *v, const union value *value,
557 const char *name = var_lookup_value_label (v, value);
560 char *s = data_out (value, var_get_encoding (v), &v->print);
561 ds_put_cstr (str, s);
565 ds_put_cstr (str, name);
568 /* Print and write formats. */
570 /* Returns V's print format specification. */
571 const struct fmt_spec *
572 var_get_print_format (const struct variable *v)
577 /* Sets V's print format specification to PRINT, which must be a
578 valid format specification for a variable of V's width
579 (ordinarily an output format, but input formats are not
582 var_set_print_format (struct variable *v, const struct fmt_spec *print)
584 assert (fmt_check_width_compat (print, v->width));
586 dict_var_changed (v);
589 /* Returns V's write format specification. */
590 const struct fmt_spec *
591 var_get_write_format (const struct variable *v)
596 /* Sets V's write format specification to WRITE, which must be a
597 valid format specification for a variable of V's width
598 (ordinarily an output format, but input formats are not
601 var_set_write_format (struct variable *v, const struct fmt_spec *write)
603 assert (fmt_check_width_compat (write, v->width));
605 dict_var_changed (v);
608 /* Sets V's print and write format specifications to FORMAT,
609 which must be a valid format specification for a variable of
610 V's width (ordinarily an output format, but input formats are
613 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
615 var_set_print_format (v, format);
616 var_set_write_format (v, format);
619 /* Returns the default print and write format for a variable of
620 the given TYPE, as set by var_create. The return value can be
621 used to reset a variable's print and write formats to the
624 var_default_formats (int width)
627 ? fmt_for_output (FMT_F, 8, 2)
628 : fmt_for_output (FMT_A, width, 0));
631 /* Return a string representing this variable, in the form most
632 appropriate from a human factors perspective, that is, its
633 variable label if it has one, otherwise its name. */
635 var_to_string (const struct variable *v)
637 return v->label != NULL ? v->label : v->name;
640 /* Returns V's variable label, or a null pointer if it has none. */
642 var_get_label (const struct variable *v)
647 /* Sets V's variable label to LABEL, stripping off leading and
648 trailing white space and truncating to 255 characters.
649 If LABEL is a null pointer or if LABEL is an empty string
650 (after stripping white space), then V's variable label (if
653 var_set_label (struct variable *v, const char *label)
660 struct substring s = ss_cstr (label);
661 ss_trim (&s, ss_cstr (CC_SPACES));
662 ss_truncate (&s, 255);
663 if (!ss_is_empty (s))
664 v->label = ss_xstrdup (s);
666 dict_var_changed (v);
669 /* Removes any variable label from V. */
671 var_clear_label (struct variable *v)
673 var_set_label (v, NULL);
676 /* Returns true if V has a variable V,
679 var_has_label (const struct variable *v)
681 return v->label != NULL;
684 /* Returns true if M is a valid variable measurement level,
687 measure_is_valid (enum measure m)
689 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
692 /* Returns V's measurement level. */
694 var_get_measure (const struct variable *v)
699 /* Sets V's measurement level to MEASURE. */
701 var_set_measure (struct variable *v, enum measure measure)
703 assert (measure_is_valid (measure));
704 v->measure = measure;
705 dict_var_changed (v);
708 /* Returns the default measurement level for a variable of the
709 given TYPE, as set by var_create. The return value can be
710 used to reset a variable's measurement level to the
713 var_default_measure (enum val_type type)
715 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
718 /* Returns V's display width, which applies only to GUIs. */
720 var_get_display_width (const struct variable *v)
722 return v->display_width;
725 /* Sets V's display width to DISPLAY_WIDTH. */
727 var_set_display_width (struct variable *v, int new_width)
729 int old_width = v->display_width;
731 v->display_width = new_width;
733 if ( old_width != new_width)
734 dict_var_display_width_changed (v);
736 dict_var_changed (v);
739 /* Returns the default display width for a variable of the given
740 WIDTH, as set by var_create. The return value can be used to
741 reset a variable's display width to the default. */
743 var_default_display_width (int width)
745 return width == 0 ? 8 : MIN (width, 32);
748 /* Returns true if A is a valid alignment,
751 alignment_is_valid (enum alignment a)
753 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
756 /* Returns V's display alignment, which applies only to GUIs. */
758 var_get_alignment (const struct variable *v)
763 /* Sets V's display alignment to ALIGNMENT. */
765 var_set_alignment (struct variable *v, enum alignment alignment)
767 assert (alignment_is_valid (alignment));
768 v->alignment = alignment;
769 dict_var_changed (v);
772 /* Returns the default display alignment for a variable of the
773 given TYPE, as set by var_create. The return value can be
774 used to reset a variable's display alignment to the default. */
776 var_default_alignment (enum val_type type)
778 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
781 /* Whether variables' values should be preserved from case to
784 /* Returns true if variable V's value should be left from case to
785 case, instead of being reset to system-missing or blanks. */
787 var_get_leave (const struct variable *v)
792 /* Sets V's leave setting to LEAVE. */
794 var_set_leave (struct variable *v, bool leave)
796 assert (leave || !var_must_leave (v));
798 dict_var_changed (v);
801 /* Returns true if V must be left from case to case,
802 false if it can be set either way. */
804 var_must_leave (const struct variable *v)
806 return var_get_dict_class (v) == DC_SCRATCH;
809 /* Returns the number of short names stored in VAR.
811 Short names are used only for system and portable file input
812 and output. They are upper-case only, not necessarily unique,
813 and limited to SHORT_NAME_LEN characters (plus a null
814 terminator). Ordinarily a variable has at most one short
815 name, but very long string variables (longer than 255 bytes)
816 may have more. A variable might not have any short name at
817 all if it hasn't been saved to or read from a system or
820 var_get_short_name_cnt (const struct variable *var)
822 return var->short_name_cnt;
825 /* Returns VAR's short name with the given IDX, if it has one
826 with that index, or a null pointer otherwise. Short names may
827 be sparse: even if IDX is less than the number of short names
828 in VAR, this function may return a null pointer. */
830 var_get_short_name (const struct variable *var, size_t idx)
832 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
835 /* Sets VAR's short name with the given IDX to SHORT_NAME,
836 truncating it to SHORT_NAME_LEN characters and converting it
837 to uppercase in the process. Specifying a null pointer for
838 SHORT_NAME clears the specified short name. */
840 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
842 assert (short_name == NULL || var_is_plausible_name (short_name, false));
844 /* Clear old short name numbered IDX, if any. */
845 if (idx < var->short_name_cnt)
847 free (var->short_names[idx]);
848 var->short_names[idx] = NULL;
851 /* Install new short name for IDX. */
852 if (short_name != NULL)
854 if (idx >= var->short_name_cnt)
856 size_t old_cnt = var->short_name_cnt;
858 var->short_name_cnt = MAX (idx * 2, 1);
859 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
860 sizeof *var->short_names);
861 for (i = old_cnt; i < var->short_name_cnt; i++)
862 var->short_names[i] = NULL;
864 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
865 str_uppercase (var->short_names[idx]);
868 dict_var_changed (var);
871 /* Clears V's short names. */
873 var_clear_short_names (struct variable *v)
877 for (i = 0; i < v->short_name_cnt; i++)
878 free (v->short_names[i]);
879 free (v->short_names);
880 v->short_names = NULL;
881 v->short_name_cnt = 0;
884 /* Relationship with dictionary. */
886 /* Returns V's index within its dictionary, the value
887 for which "dict_get_var (dict, index)" will return V.
888 V must be in a dictionary. */
890 var_get_dict_index (const struct variable *v)
892 assert (var_has_vardict (v));
893 return vardict_get_dict_index (v->vardict);
896 /* Returns V's index within the case represented by its
897 dictionary, that is, the value for which "case_data_idx (case,
898 index)" will return the data for V in that case.
899 V must be in a dictionary. */
901 var_get_case_index (const struct variable *v)
903 assert (var_has_vardict (v));
904 return vardict_get_case_index (v->vardict);
907 /* Returns V's auxiliary data, or a null pointer if none has been
910 var_get_aux (const struct variable *v)
915 /* Assign auxiliary data AUX to variable V, which must not
916 already have auxiliary data. Before V's auxiliary data is
917 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
918 may be appropriate for use as AUX_DTOR.) */
920 var_attach_aux (const struct variable *v_,
921 void *aux, void (*aux_dtor) (struct variable *))
923 struct variable *v = CONST_CAST (struct variable *, v_);
924 assert (v->aux == NULL);
925 assert (aux != NULL);
927 v->aux_dtor = aux_dtor;
931 /* Remove auxiliary data, if any, from V, and return it, without
932 calling any associated destructor. */
934 var_detach_aux (struct variable *v)
937 assert (aux != NULL);
942 /* Clears auxiliary data, if any, from V, and calls any
943 associated destructor. */
945 var_clear_aux (struct variable *v)
949 if (v->aux_dtor != NULL)
955 /* This function is appropriate for use an auxiliary data
956 destructor (passed as AUX_DTOR to var_attach_aux()) for the
957 case where the auxiliary data should be passed to free(). */
959 var_dtor_free (struct variable *v)
964 /* Returns variable V's attribute set. The caller may examine or
965 modify the attribute set, but must not destroy it. Destroying
966 V, or calling var_set_attributes() on V, will also destroy its
969 var_get_attributes (const struct variable *v)
971 return CONST_CAST (struct attrset *, &v->attributes);
974 /* Replaces variable V's attributes set by a copy of ATTRS. */
976 var_set_attributes (struct variable *v, const struct attrset *attrs)
978 attrset_destroy (&v->attributes);
979 attrset_clone (&v->attributes, attrs);
982 /* Returns true if V has any custom attributes, false if it has none. */
984 var_has_attributes (const struct variable *v)
986 return attrset_count (&v->attributes) > 0;
989 /* Returns the encoding of values of variable VAR. (This is actually a
990 property of the dictionary.) Returns null if no specific encoding has been
993 var_get_encoding (const struct variable *var)
995 return (var_has_vardict (var)
996 ? dict_get_encoding (vardict_get_dictionary (var->vardict))
1000 /* Returns V's vardict structure. */
1001 struct vardict_info *
1002 var_get_vardict (const struct variable *v)
1004 return CONST_CAST (struct vardict_info *, v->vardict);
1007 /* Sets V's vardict data to VARDICT. */
1009 var_set_vardict (struct variable *v, struct vardict_info *vardict)
1011 v->vardict = vardict;
1014 /* Returns true if V has vardict data. */
1016 var_has_vardict (const struct variable *v)
1018 return v->vardict != NULL;
1021 /* Clears V's vardict data. */
1023 var_clear_vardict (struct variable *v)