1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include <data/attributes.h>
23 #include <data/data-out.h>
24 #include <data/format.h>
25 #include <data/dictionary.h>
26 #include <data/identifier.h>
27 #include <data/missing-values.h>
28 #include <data/value-labels.h>
29 #include <data/vardict.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash-functions.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
42 #define _(msgid) gettext (msgid)
47 /* Dictionary information. */
48 char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */
49 int width; /* 0 for numeric, otherwise string width. */
50 struct missing_values miss; /* Missing values. */
51 struct fmt_spec print; /* Default format for PRINT. */
52 struct fmt_spec write; /* Default format for WRITE. */
53 struct val_labs *val_labs; /* Value labels. */
54 char *label; /* Variable label. */
56 /* GUI information. */
57 enum measure measure; /* Nominal, ordinal, or continuous. */
58 int display_width; /* Width of data editor column. */
59 enum alignment alignment; /* Alignment of data in GUI. */
61 /* Case information. */
62 bool leave; /* Leave value from case to case? */
64 /* Data for use by containing dictionary. */
65 struct vardict_info *vardict;
67 /* Used only for system and portable file input and output.
70 size_t short_name_cnt;
72 /* Each command may use these fields as needed. */
74 void (*aux_dtor) (struct variable *);
76 /* Custom attributes. */
77 struct attrset attributes;
80 /* Creates and returns a new variable with the given NAME and
81 WIDTH and other fields initialized to default values. The
82 variable is not added to a dictionary; for that, use
83 dict_create_var instead. */
85 var_create (const char *name, int width)
90 assert (width >= 0 && width <= MAX_STRING);
92 v = xmalloc (sizeof *v);
94 var_set_name (v, name);
96 mv_init (&v->miss, width);
97 v->leave = var_must_leave (v);
98 type = val_type_from_width (width);
99 v->alignment = var_default_alignment (type);
100 v->measure = var_default_measure (type);
101 v->display_width = var_default_display_width (width);
102 v->print = v->write = var_default_formats (width);
105 v->short_names = NULL;
106 v->short_name_cnt = 0;
109 attrset_init (&v->attributes);
114 /* Creates and returns a clone of OLD_VAR. Most properties of
115 the new variable are copied from OLD_VAR, except:
117 - The variable's short name is not copied, because there is
118 no reason to give a new variable with potentially a new
119 name the same short name.
121 - The new variable is not added to OLD_VAR's dictionary by
122 default. Use dict_clone_var, instead, to do that.
125 var_clone (const struct variable *old_var)
127 struct variable *new_var = var_create (var_get_name (old_var),
128 var_get_width (old_var));
130 var_set_missing_values (new_var, var_get_missing_values (old_var));
131 var_set_print_format (new_var, var_get_print_format (old_var));
132 var_set_write_format (new_var, var_get_write_format (old_var));
133 var_set_value_labels (new_var, var_get_value_labels (old_var));
134 var_set_label (new_var, var_get_label (old_var));
135 var_set_measure (new_var, var_get_measure (old_var));
136 var_set_display_width (new_var, var_get_display_width (old_var));
137 var_set_alignment (new_var, var_get_alignment (old_var));
138 var_set_leave (new_var, var_get_leave (old_var));
139 var_set_attributes (new_var, var_get_attributes (old_var));
144 /* Destroys variable V.
145 V must not belong to a dictionary. If it does, use
146 dict_delete_var instead. */
148 var_destroy (struct variable *v)
152 assert (!var_has_vardict (v));
153 mv_destroy (&v->miss);
154 var_clear_short_names (v);
156 val_labs_destroy (v->val_labs);
162 /* Variable names. */
164 /* Return variable V's name. */
166 var_get_name (const struct variable *v)
171 /* Sets V's name to NAME.
172 Do not use this function for a variable in a dictionary. Use
173 dict_rename_var instead. */
175 var_set_name (struct variable *v, const char *name)
177 assert (!var_has_vardict (v));
178 assert (var_is_plausible_name (name, false));
180 str_copy_trunc (v->name, sizeof v->name, name);
181 dict_var_changed (v);
184 /* Returns true if NAME is an acceptable name for a variable,
185 false otherwise. If ISSUE_ERROR is true, issues an
186 explanatory error message on failure. */
188 var_is_valid_name (const char *name, bool issue_error)
193 /* Note that strlen returns number of BYTES, not the number of
195 length = strlen (name);
197 plausible = var_is_plausible_name(name, issue_error);
203 if (!lex_is_id1 (name[0]))
206 msg (SE, _("Character `%c' (in %s) may not appear "
207 "as the first character in a variable name."),
213 for (i = 0; i < length; i++)
215 if (!lex_is_idn (name[i]))
218 msg (SE, _("Character `%c' (in %s) may not appear in "
228 /* Returns true if NAME is an plausible name for a variable,
229 false otherwise. If ISSUE_ERROR is true, issues an
230 explanatory error message on failure.
231 This function makes no use of LC_CTYPE.
234 var_is_plausible_name (const char *name, bool issue_error)
238 /* Note that strlen returns number of BYTES, not the number of
240 length = strlen (name);
244 msg (SE, _("Variable name cannot be empty string."));
247 else if (length > VAR_NAME_LEN)
250 msg (SE, _("Variable name %s exceeds %d-character limit."),
251 name, (int) VAR_NAME_LEN);
255 if (lex_id_to_token (ss_cstr (name)) != T_ID)
258 msg (SE, _("`%s' may not be used as a variable name because it "
259 "is a reserved word."), name);
266 /* Returns VAR's dictionary class. */
268 var_get_dict_class (const struct variable *var)
270 return dict_class_from_id (var->name);
273 /* A hsh_compare_func that orders variables A and B by their
276 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
278 const struct variable *a = a_;
279 const struct variable *b = b_;
281 return strcasecmp (a->name, b->name);
284 /* A hsh_hash_func that hashes variable V based on its name. */
286 hash_var_by_name (const void *v_, const void *aux UNUSED)
288 const struct variable *v = v_;
290 return hash_case_string (v->name, 0);
293 /* A hsh_compare_func that orders pointers to variables A and B
296 compare_var_ptrs_by_name (const void *a_, const void *b_,
297 const void *aux UNUSED)
299 struct variable *const *a = a_;
300 struct variable *const *b = b_;
302 return strcasecmp (var_get_name (*a), var_get_name (*b));
305 /* A hsh_compare_func that orders pointers to variables A and B
306 by their dictionary indexes. */
308 compare_var_ptrs_by_dict_index (const void *a_, const void *b_,
309 const void *aux UNUSED)
311 struct variable *const *a = a_;
312 struct variable *const *b = b_;
313 size_t a_index = var_get_dict_index (*a);
314 size_t b_index = var_get_dict_index (*b);
316 return a_index < b_index ? -1 : a_index > b_index;
319 /* A hsh_hash_func that hashes pointer to variable V based on its
322 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
324 struct variable *const *v = v_;
326 return hash_case_string (var_get_name (*v), 0);
329 /* Returns the type of variable V. */
331 var_get_type (const struct variable *v)
333 return val_type_from_width (v->width);
336 /* Returns the width of variable V. */
338 var_get_width (const struct variable *v)
343 /* Changes the width of V to NEW_WIDTH.
344 This function should be used cautiously. */
346 var_set_width (struct variable *v, int new_width)
348 const int old_width = v->width;
350 if (old_width == new_width)
353 if (mv_is_resizable (&v->miss, new_width))
354 mv_resize (&v->miss, new_width);
357 mv_destroy (&v->miss);
358 mv_init (&v->miss, new_width);
361 if (v->val_labs != NULL)
363 if (val_labs_can_set_width (v->val_labs, new_width))
364 val_labs_set_width (v->val_labs, new_width);
367 val_labs_destroy (v->val_labs);
372 fmt_resize (&v->print, new_width);
373 fmt_resize (&v->write, new_width);
375 v->width = new_width;
376 dict_var_resized (v, old_width);
377 dict_var_changed (v);
380 /* Returns true if variable V is numeric, false otherwise. */
382 var_is_numeric (const struct variable *v)
384 return var_get_type (v) == VAL_NUMERIC;
387 /* Returns true if variable V is a string variable, false
390 var_is_alpha (const struct variable *v)
392 return var_get_type (v) == VAL_STRING;
395 /* Returns variable V's missing values. */
396 const struct missing_values *
397 var_get_missing_values (const struct variable *v)
402 /* Sets variable V's missing values to MISS, which must be of V's
403 width or at least resizable to V's width.
404 If MISS is null, then V's missing values, if any, are
407 var_set_missing_values (struct variable *v, const struct missing_values *miss)
411 assert (mv_is_resizable (miss, v->width));
412 mv_destroy (&v->miss);
413 mv_copy (&v->miss, miss);
414 mv_resize (&v->miss, v->width);
419 dict_var_changed (v);
422 /* Sets variable V to have no user-missing values. */
424 var_clear_missing_values (struct variable *v)
426 var_set_missing_values (v, NULL);
429 /* Returns true if V has any user-missing values,
432 var_has_missing_values (const struct variable *v)
434 return !mv_is_empty (&v->miss);
437 /* Returns true if VALUE is in the given CLASS of missing values
438 in V, false otherwise. */
440 var_is_value_missing (const struct variable *v, const union value *value,
443 return mv_is_value_missing (&v->miss, value, class);
446 /* Returns true if D is in the given CLASS of missing values in
448 V must be a numeric variable. */
450 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
452 return mv_is_num_missing (&v->miss, d, class);
455 /* Returns true if S[] is a missing value for V, false otherwise.
456 S[] must contain exactly as many characters as V's width.
457 V must be a string variable. */
459 var_is_str_missing (const struct variable *v, const uint8_t s[],
462 return mv_is_str_missing (&v->miss, s, class);
465 /* Returns variable V's value labels,
466 possibly a null pointer if it has none. */
467 const struct val_labs *
468 var_get_value_labels (const struct variable *v)
473 /* Returns true if variable V has at least one value label. */
475 var_has_value_labels (const struct variable *v)
477 return val_labs_count (v->val_labs) > 0;
480 /* Sets variable V's value labels to a copy of VLS,
481 which must have a width equal to V's width or one that can be
482 changed to V's width.
483 If VLS is null, then V's value labels, if any, are removed. */
485 var_set_value_labels (struct variable *v, const struct val_labs *vls)
487 val_labs_destroy (v->val_labs);
492 assert (val_labs_can_set_width (vls, v->width));
493 v->val_labs = val_labs_clone (vls);
494 val_labs_set_width (v->val_labs, v->width);
495 dict_var_changed (v);
499 /* Makes sure that V has a set of value labels,
500 by assigning one to it if necessary. */
502 alloc_value_labels (struct variable *v)
504 if (v->val_labs == NULL)
505 v->val_labs = val_labs_create (v->width);
508 /* Attempts to add a value label with the given VALUE and LABEL
509 to V. Returns true if successful, false otherwise (probably
510 due to an existing label). */
512 var_add_value_label (struct variable *v,
513 const union value *value, const char *label)
515 alloc_value_labels (v);
516 return val_labs_add (v->val_labs, value, label);
519 /* Adds or replaces a value label with the given VALUE and LABEL
523 var_replace_value_label (struct variable *v,
524 const union value *value, const char *label)
526 alloc_value_labels (v);
527 val_labs_replace (v->val_labs, value, label);
530 /* Removes V's value labels, if any. */
532 var_clear_value_labels (struct variable *v)
534 var_set_value_labels (v, NULL);
537 /* Returns the label associated with VALUE for variable V,
538 or a null pointer if none. */
540 var_lookup_value_label (const struct variable *v, const union value *value)
542 return val_labs_find (v->val_labs, value);
545 /* Append STR with a string representing VALUE for variable V.
546 That is, if VALUE has a label, append that label,
547 otherwise format VALUE and append the formatted string.
548 STR must be a pointer to an initialised struct string.
551 var_append_value_name (const struct variable *v, const union value *value,
554 const char *name = var_lookup_value_label (v, value);
557 char *s = data_out (value, var_get_encoding (v), &v->print);
558 ds_put_cstr (str, s);
562 ds_put_cstr (str, name);
565 /* Print and write formats. */
567 /* Returns V's print format specification. */
568 const struct fmt_spec *
569 var_get_print_format (const struct variable *v)
574 /* Sets V's print format specification to PRINT, which must be a
575 valid format specification for a variable of V's width
576 (ordinarily an output format, but input formats are not
579 var_set_print_format (struct variable *v, const struct fmt_spec *print)
581 assert (fmt_check_width_compat (print, v->width));
583 dict_var_changed (v);
586 /* Returns V's write format specification. */
587 const struct fmt_spec *
588 var_get_write_format (const struct variable *v)
593 /* Sets V's write format specification to WRITE, which must be a
594 valid format specification for a variable of V's width
595 (ordinarily an output format, but input formats are not
598 var_set_write_format (struct variable *v, const struct fmt_spec *write)
600 assert (fmt_check_width_compat (write, v->width));
602 dict_var_changed (v);
605 /* Sets V's print and write format specifications to FORMAT,
606 which must be a valid format specification for a variable of
607 V's width (ordinarily an output format, but input formats are
610 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
612 var_set_print_format (v, format);
613 var_set_write_format (v, format);
616 /* Returns the default print and write format for a variable of
617 the given TYPE, as set by var_create. The return value can be
618 used to reset a variable's print and write formats to the
621 var_default_formats (int width)
624 ? fmt_for_output (FMT_F, 8, 2)
625 : fmt_for_output (FMT_A, width, 0));
628 /* Return a string representing this variable, in the form most
629 appropriate from a human factors perspective, that is, its
630 variable label if it has one, otherwise its name. */
632 var_to_string (const struct variable *v)
634 return v->label != NULL ? v->label : v->name;
637 /* Returns V's variable label, or a null pointer if it has none. */
639 var_get_label (const struct variable *v)
644 /* Sets V's variable label to LABEL, stripping off leading and
645 trailing white space and truncating to 255 characters.
646 If LABEL is a null pointer or if LABEL is an empty string
647 (after stripping white space), then V's variable label (if
650 var_set_label (struct variable *v, const char *label)
657 struct substring s = ss_cstr (label);
658 ss_trim (&s, ss_cstr (CC_SPACES));
659 ss_truncate (&s, 255);
660 if (!ss_is_empty (s))
661 v->label = ss_xstrdup (s);
663 dict_var_changed (v);
666 /* Removes any variable label from V. */
668 var_clear_label (struct variable *v)
670 var_set_label (v, NULL);
673 /* Returns true if V has a variable V,
676 var_has_label (const struct variable *v)
678 return v->label != NULL;
681 /* Returns true if M is a valid variable measurement level,
684 measure_is_valid (enum measure m)
686 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
689 /* Returns V's measurement level. */
691 var_get_measure (const struct variable *v)
696 /* Sets V's measurement level to MEASURE. */
698 var_set_measure (struct variable *v, enum measure measure)
700 assert (measure_is_valid (measure));
701 v->measure = measure;
702 dict_var_changed (v);
705 /* Returns the default measurement level for a variable of the
706 given TYPE, as set by var_create. The return value can be
707 used to reset a variable's measurement level to the
710 var_default_measure (enum val_type type)
712 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
715 /* Returns V's display width, which applies only to GUIs. */
717 var_get_display_width (const struct variable *v)
719 return v->display_width;
722 /* Sets V's display width to DISPLAY_WIDTH. */
724 var_set_display_width (struct variable *v, int new_width)
726 int old_width = v->display_width;
728 v->display_width = new_width;
730 if ( old_width != new_width)
731 dict_var_display_width_changed (v);
733 dict_var_changed (v);
736 /* Returns the default display width for a variable of the given
737 WIDTH, as set by var_create. The return value can be used to
738 reset a variable's display width to the default. */
740 var_default_display_width (int width)
742 return width == 0 ? 8 : MIN (width, 32);
745 /* Returns true if A is a valid alignment,
748 alignment_is_valid (enum alignment a)
750 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
753 /* Returns V's display alignment, which applies only to GUIs. */
755 var_get_alignment (const struct variable *v)
760 /* Sets V's display alignment to ALIGNMENT. */
762 var_set_alignment (struct variable *v, enum alignment alignment)
764 assert (alignment_is_valid (alignment));
765 v->alignment = alignment;
766 dict_var_changed (v);
769 /* Returns the default display alignment for a variable of the
770 given TYPE, as set by var_create. The return value can be
771 used to reset a variable's display alignment to the default. */
773 var_default_alignment (enum val_type type)
775 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
778 /* Whether variables' values should be preserved from case to
781 /* Returns true if variable V's value should be left from case to
782 case, instead of being reset to system-missing or blanks. */
784 var_get_leave (const struct variable *v)
789 /* Sets V's leave setting to LEAVE. */
791 var_set_leave (struct variable *v, bool leave)
793 assert (leave || !var_must_leave (v));
795 dict_var_changed (v);
798 /* Returns true if V must be left from case to case,
799 false if it can be set either way. */
801 var_must_leave (const struct variable *v)
803 return var_get_dict_class (v) == DC_SCRATCH;
806 /* Returns the number of short names stored in VAR.
808 Short names are used only for system and portable file input
809 and output. They are upper-case only, not necessarily unique,
810 and limited to SHORT_NAME_LEN characters (plus a null
811 terminator). Ordinarily a variable has at most one short
812 name, but very long string variables (longer than 255 bytes)
813 may have more. A variable might not have any short name at
814 all if it hasn't been saved to or read from a system or
817 var_get_short_name_cnt (const struct variable *var)
819 return var->short_name_cnt;
822 /* Returns VAR's short name with the given IDX, if it has one
823 with that index, or a null pointer otherwise. Short names may
824 be sparse: even if IDX is less than the number of short names
825 in VAR, this function may return a null pointer. */
827 var_get_short_name (const struct variable *var, size_t idx)
829 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
832 /* Sets VAR's short name with the given IDX to SHORT_NAME,
833 truncating it to SHORT_NAME_LEN characters and converting it
834 to uppercase in the process. Specifying a null pointer for
835 SHORT_NAME clears the specified short name. */
837 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
839 assert (short_name == NULL || var_is_plausible_name (short_name, false));
841 /* Clear old short name numbered IDX, if any. */
842 if (idx < var->short_name_cnt)
844 free (var->short_names[idx]);
845 var->short_names[idx] = NULL;
848 /* Install new short name for IDX. */
849 if (short_name != NULL)
851 if (idx >= var->short_name_cnt)
853 size_t old_cnt = var->short_name_cnt;
855 var->short_name_cnt = MAX (idx * 2, 1);
856 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
857 sizeof *var->short_names);
858 for (i = old_cnt; i < var->short_name_cnt; i++)
859 var->short_names[i] = NULL;
861 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
862 str_uppercase (var->short_names[idx]);
865 dict_var_changed (var);
868 /* Clears V's short names. */
870 var_clear_short_names (struct variable *v)
874 for (i = 0; i < v->short_name_cnt; i++)
875 free (v->short_names[i]);
876 free (v->short_names);
877 v->short_names = NULL;
878 v->short_name_cnt = 0;
881 /* Relationship with dictionary. */
883 /* Returns V's index within its dictionary, the value
884 for which "dict_get_var (dict, index)" will return V.
885 V must be in a dictionary. */
887 var_get_dict_index (const struct variable *v)
889 assert (var_has_vardict (v));
890 return vardict_get_dict_index (v->vardict);
893 /* Returns V's index within the case represented by its
894 dictionary, that is, the value for which "case_data_idx (case,
895 index)" will return the data for V in that case.
896 V must be in a dictionary. */
898 var_get_case_index (const struct variable *v)
900 assert (var_has_vardict (v));
901 return vardict_get_case_index (v->vardict);
904 /* Returns V's auxiliary data, or a null pointer if none has been
907 var_get_aux (const struct variable *v)
912 /* Assign auxiliary data AUX to variable V, which must not
913 already have auxiliary data. Before V's auxiliary data is
914 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
915 may be appropriate for use as AUX_DTOR.) */
917 var_attach_aux (const struct variable *v_,
918 void *aux, void (*aux_dtor) (struct variable *))
920 struct variable *v = CONST_CAST (struct variable *, v_);
921 assert (v->aux == NULL);
922 assert (aux != NULL);
924 v->aux_dtor = aux_dtor;
928 /* Remove auxiliary data, if any, from V, and return it, without
929 calling any associated destructor. */
931 var_detach_aux (struct variable *v)
934 assert (aux != NULL);
939 /* Clears auxiliary data, if any, from V, and calls any
940 associated destructor. */
942 var_clear_aux (struct variable *v)
946 if (v->aux_dtor != NULL)
952 /* This function is appropriate for use an auxiliary data
953 destructor (passed as AUX_DTOR to var_attach_aux()) for the
954 case where the auxiliary data should be passed to free(). */
956 var_dtor_free (struct variable *v)
961 /* Returns variable V's attribute set. The caller may examine or
962 modify the attribute set, but must not destroy it. Destroying
963 V, or calling var_set_attributes() on V, will also destroy its
966 var_get_attributes (const struct variable *v)
968 return CONST_CAST (struct attrset *, &v->attributes);
971 /* Replaces variable V's attributes set by a copy of ATTRS. */
973 var_set_attributes (struct variable *v, const struct attrset *attrs)
975 attrset_destroy (&v->attributes);
976 attrset_clone (&v->attributes, attrs);
979 /* Returns true if V has any custom attributes, false if it has none. */
981 var_has_attributes (const struct variable *v)
983 return attrset_count (&v->attributes) > 0;
986 /* Returns the encoding of values of variable VAR. (This is actually a
987 property of the dictionary.) Returns null if no specific encoding has been
990 var_get_encoding (const struct variable *var)
992 return (var_has_vardict (var)
993 ? dict_get_encoding (vardict_get_dictionary (var->vardict))
997 /* Returns V's vardict structure. */
998 struct vardict_info *
999 var_get_vardict (const struct variable *v)
1001 return CONST_CAST (struct vardict_info *, v->vardict);
1004 /* Sets V's vardict data to VARDICT. */
1006 var_set_vardict (struct variable *v, struct vardict_info *vardict)
1008 v->vardict = vardict;
1011 /* Returns true if V has vardict data. */
1013 var_has_vardict (const struct variable *v)
1015 return v->vardict != NULL;
1018 /* Clears V's vardict data. */
1020 var_clear_vardict (struct variable *v)