1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
46 /* Dictionary information. */
47 char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */
48 int width; /* 0 for numeric, otherwise string width. */
49 struct missing_values miss; /* Missing values. */
50 struct fmt_spec print; /* Default format for PRINT. */
51 struct fmt_spec write; /* Default format for WRITE. */
52 struct val_labs *val_labs; /* Value labels. */
53 char *label; /* Variable label. */
55 /* GUI information. */
56 enum measure measure; /* Nominal, ordinal, or continuous. */
57 int display_width; /* Width of data editor column. */
58 enum alignment alignment; /* Alignment of data in GUI. */
60 /* Case information. */
61 bool leave; /* Leave value from case to case? */
63 /* Data for use by containing dictionary. */
64 struct vardict_info vardict;
66 /* Used only for system and portable file input and output.
69 size_t short_name_cnt;
71 /* Each command may use these fields as needed. */
73 void (*aux_dtor) (struct variable *);
75 /* Values of a categorical variable. Procedures need
76 vectors with binary entries, so any variable of type ALPHA will
77 have its values stored here. */
78 struct cat_vals *obs_vals;
81 /* Creates and returns a new variable with the given NAME and
82 WIDTH and other fields initialized to default values. The
83 variable is not added to a dictionary; for that, use
84 dict_create_var instead. */
86 var_create (const char *name, int width)
91 assert (width >= 0 && width <= MAX_STRING);
93 v = xmalloc (sizeof *v);
94 v->vardict.dict_index = v->vardict.case_index = -1;
95 var_set_name (v, name);
97 mv_init (&v->miss, width);
98 v->leave = var_must_leave (v);
99 type = val_type_from_width (width);
100 v->alignment = var_default_alignment (type);
101 v->measure = var_default_measure (type);
102 v->display_width = var_default_display_width (width);
103 v->print = v->write = var_default_formats (width);
106 v->short_names = NULL;
107 v->short_name_cnt = 0;
115 /* Creates and returns a clone of OLD_VAR. Most properties of
116 the new variable are copied from OLD_VAR, except:
118 - The variable's short name is not copied, because there is
119 no reason to give a new variable with potentially a new
120 name the same short name.
122 - The new variable is not added to OLD_VAR's dictionary by
123 default. Use dict_clone_var, instead, to do that.
125 - Auxiliary data and obs_vals are not copied. */
127 var_clone (const struct variable *old_var)
129 struct variable *new_var = var_create (var_get_name (old_var),
130 var_get_width (old_var));
132 var_set_missing_values (new_var, var_get_missing_values (old_var));
133 var_set_print_format (new_var, var_get_print_format (old_var));
134 var_set_write_format (new_var, var_get_write_format (old_var));
135 var_set_value_labels (new_var, var_get_value_labels (old_var));
136 var_set_label (new_var, var_get_label (old_var));
137 var_set_measure (new_var, var_get_measure (old_var));
138 var_set_display_width (new_var, var_get_display_width (old_var));
139 var_set_alignment (new_var, var_get_alignment (old_var));
140 var_set_leave (new_var, var_get_leave (old_var));
145 /* Destroys variable V.
146 V must not belong to a dictionary. If it does, use
147 dict_delete_var instead. */
149 var_destroy (struct variable *v)
153 assert (!var_has_vardict (v));
154 cat_stored_values_destroy (v->obs_vals);
155 var_clear_short_names (v);
157 val_labs_destroy (v->val_labs);
163 /* Variable names. */
165 /* Return variable V's name. */
167 var_get_name (const struct variable *v)
172 /* Sets V's name to NAME.
173 Do not use this function for a variable in a dictionary. Use
174 dict_rename_var instead. */
176 var_set_name (struct variable *v, const char *name)
178 assert (v->vardict.dict_index == -1);
179 assert (var_is_plausible_name (name, false));
181 str_copy_trunc (v->name, sizeof v->name, name);
182 dict_var_changed (v);
185 /* Returns true if NAME is an acceptable name for a variable,
186 false otherwise. If ISSUE_ERROR is true, issues an
187 explanatory error message on failure. */
189 var_is_valid_name (const char *name, bool issue_error)
194 assert (name != NULL);
196 /* Note that strlen returns number of BYTES, not the number of
198 length = strlen (name);
200 plausible = var_is_plausible_name(name, issue_error);
206 if (!lex_is_id1 (name[0]))
209 msg (SE, _("Character `%c' (in %s) may not appear "
210 "as the first character in a variable name."),
216 for (i = 0; i < length; i++)
218 if (!lex_is_idn (name[i]))
221 msg (SE, _("Character `%c' (in %s) may not appear in "
231 /* Returns true if NAME is an plausible name for a variable,
232 false otherwise. If ISSUE_ERROR is true, issues an
233 explanatory error message on failure.
234 This function makes no use of LC_CTYPE.
237 var_is_plausible_name (const char *name, bool issue_error)
241 assert (name != NULL);
243 /* Note that strlen returns number of BYTES, not the number of
245 length = strlen (name);
249 msg (SE, _("Variable name cannot be empty string."));
252 else if (length > VAR_NAME_LEN)
255 msg (SE, _("Variable name %s exceeds %d-character limit."),
256 name, (int) VAR_NAME_LEN);
260 if (lex_id_to_token (ss_cstr (name)) != T_ID)
263 msg (SE, _("`%s' may not be used as a variable name because it "
264 "is a reserved word."), name);
271 /* Returns VAR's dictionary class. */
273 var_get_dict_class (const struct variable *var)
275 return dict_class_from_id (var->name);
278 /* A hsh_compare_func that orders variables A and B by their
281 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
283 const struct variable *a = a_;
284 const struct variable *b = b_;
286 return strcasecmp (a->name, b->name);
289 /* A hsh_hash_func that hashes variable V based on its name. */
291 hash_var_by_name (const void *v_, const void *aux UNUSED)
293 const struct variable *v = v_;
295 return hsh_hash_case_string (v->name);
298 /* A hsh_compare_func that orders pointers to variables A and B
301 compare_var_ptrs_by_name (const void *a_, const void *b_,
302 const void *aux UNUSED)
304 struct variable *const *a = a_;
305 struct variable *const *b = b_;
307 return strcasecmp (var_get_name (*a), var_get_name (*b));
310 /* A hsh_hash_func that hashes pointer to variable V based on its
313 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
315 struct variable *const *v = v_;
317 return hsh_hash_case_string (var_get_name (*v));
320 /* Returns the type of variable V. */
322 var_get_type (const struct variable *v)
324 return val_type_from_width (v->width);
327 /* Returns the width of variable V. */
329 var_get_width (const struct variable *v)
334 /* Changes the width of V to NEW_WIDTH.
335 This function should be used cautiously. */
337 var_set_width (struct variable *v, int new_width)
339 const int old_width = v->width;
341 if (mv_is_resizable (&v->miss, new_width))
342 mv_resize (&v->miss, new_width);
344 mv_init (&v->miss, new_width);
346 if (v->val_labs != NULL)
348 if (val_labs_can_set_width (v->val_labs, new_width))
349 val_labs_set_width (v->val_labs, new_width);
352 val_labs_destroy (v->val_labs);
357 fmt_resize (&v->print, new_width);
358 fmt_resize (&v->write, new_width);
360 v->width = new_width;
363 const int old_val_count = value_cnt_from_width (old_width);
364 const int new_val_count = value_cnt_from_width (new_width);
366 if ( old_val_count != new_val_count)
367 dict_var_resized (v, new_val_count - old_val_count);
370 dict_var_changed (v);
373 /* Returns true if variable V is numeric, false otherwise. */
375 var_is_numeric (const struct variable *v)
377 return var_get_type (v) == VAL_NUMERIC;
380 /* Returns true if variable V is a string variable, false
383 var_is_alpha (const struct variable *v)
385 return var_get_type (v) == VAL_STRING;
388 /* Returns true if variable V is a short string variable, false
391 var_is_short_string (const struct variable *v)
393 return v->width > 0 && v->width <= MAX_SHORT_STRING;
396 /* Returns true if variable V is a long string variable, false
399 var_is_long_string (const struct variable *v)
401 return v->width > MAX_SHORT_STRING;
404 /* Returns the number of "union value"s need to store a value of
407 var_get_value_cnt (const struct variable *v)
409 return value_cnt_from_width (v->width);
412 /* Returns variable V's missing values. */
413 const struct missing_values *
414 var_get_missing_values (const struct variable *v)
419 /* Sets variable V's missing values to MISS, which must be of V's
420 width or at least resizable to V's width.
421 If MISS is null, then V's missing values, if any, are
424 var_set_missing_values (struct variable *v, const struct missing_values *miss)
428 assert (mv_is_resizable (miss, v->width));
429 mv_copy (&v->miss, miss);
430 mv_resize (&v->miss, v->width);
433 mv_init (&v->miss, v->width);
435 dict_var_changed (v);
438 /* Sets variable V to have no user-missing values. */
440 var_clear_missing_values (struct variable *v)
442 var_set_missing_values (v, NULL);
445 /* Returns true if V has any user-missing values,
448 var_has_missing_values (const struct variable *v)
450 return !mv_is_empty (&v->miss);
453 /* Returns true if VALUE is in the given CLASS of missing values
454 in V, false otherwise. */
456 var_is_value_missing (const struct variable *v, const union value *value,
459 return mv_is_value_missing (&v->miss, value, class);
462 /* Returns true if D is in the given CLASS of missing values in
464 V must be a numeric variable. */
466 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
468 return mv_is_num_missing (&v->miss, d, class);
471 /* Returns true if S[] is a missing value for V, false otherwise.
472 S[] must contain exactly as many characters as V's width.
473 V must be a string variable. */
475 var_is_str_missing (const struct variable *v, const char s[],
478 return mv_is_str_missing (&v->miss, s, class);
481 /* Returns variable V's value labels,
482 possibly a null pointer if it has none. */
483 const struct val_labs *
484 var_get_value_labels (const struct variable *v)
489 /* Returns true if variable V has at least one value label. */
491 var_has_value_labels (const struct variable *v)
493 return val_labs_count (v->val_labs) > 0;
496 /* Sets variable V's value labels to a copy of VLS,
497 which must have a width equal to V's width or one that can be
498 changed to V's width.
499 If VLS is null, then V's value labels, if any, are removed. */
501 var_set_value_labels (struct variable *v, const struct val_labs *vls)
503 val_labs_destroy (v->val_labs);
508 assert (val_labs_can_set_width (vls, v->width));
509 v->val_labs = val_labs_clone (vls);
510 val_labs_set_width (v->val_labs, v->width);
511 dict_var_changed (v);
515 /* Makes sure that V has a set of value labels,
516 by assigning one to it if necessary. */
518 alloc_value_labels (struct variable *v)
520 assert (!var_is_long_string (v));
521 if (v->val_labs == NULL)
522 v->val_labs = val_labs_create (v->width);
525 /* Attempts to add a value label with the given VALUE and LABEL
526 to V. Returns true if successful, false if VALUE has an
527 existing label or if V is a long string variable. */
529 var_add_value_label (struct variable *v,
530 const union value *value, const char *label)
532 alloc_value_labels (v);
533 return val_labs_add (v->val_labs, *value, label);
536 /* Adds or replaces a value label with the given VALUE and LABEL
538 Has no effect if V is a long string variable. */
540 var_replace_value_label (struct variable *v,
541 const union value *value, const char *label)
543 alloc_value_labels (v);
544 val_labs_replace (v->val_labs, *value, label);
547 /* Removes V's value labels, if any. */
549 var_clear_value_labels (struct variable *v)
551 var_set_value_labels (v, NULL);
554 /* Returns the label associated with VALUE for variable V,
555 or a null pointer if none. */
557 var_lookup_value_label (const struct variable *v, const union value *value)
559 return val_labs_find (v->val_labs, *value);
562 /* Append STR with a string representing VALUE for variable V.
563 That is, if VALUE has a label, append that label,
564 otherwise format VALUE and append the formatted string.
565 STR must be a pointer to an initialised struct string.
568 var_append_value_name (const struct variable *v, const union value *value,
571 const char *name = var_lookup_value_label (v, value);
574 char *s = ds_put_uninit (str, v->print.w);
575 data_out (value, &v->print, s);
578 ds_put_cstr (str, name);
582 /* Print and write formats. */
584 /* Returns V's print format specification. */
585 const struct fmt_spec *
586 var_get_print_format (const struct variable *v)
591 /* Sets V's print format specification to PRINT, which must be a
592 valid format specification for a variable of V's width
593 (ordinarily an output format, but input formats are not
596 var_set_print_format (struct variable *v, const struct fmt_spec *print)
598 assert (fmt_check_width_compat (print, v->width));
600 dict_var_changed (v);
603 /* Returns V's write format specification. */
604 const struct fmt_spec *
605 var_get_write_format (const struct variable *v)
610 /* Sets V's write format specification to WRITE, which must be a
611 valid format specification for a variable of V's width
612 (ordinarily an output format, but input formats are not
615 var_set_write_format (struct variable *v, const struct fmt_spec *write)
617 assert (fmt_check_width_compat (write, v->width));
619 dict_var_changed (v);
622 /* Sets V's print and write format specifications to FORMAT,
623 which must be a valid format specification for a variable of
624 V's width (ordinarily an output format, but input formats are
627 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
629 var_set_print_format (v, format);
630 var_set_write_format (v, format);
633 /* Returns the default print and write format for a variable of
634 the given TYPE, as set by var_create. The return value can be
635 used to reset a variable's print and write formats to the
638 var_default_formats (int width)
641 ? fmt_for_output (FMT_F, 8, 2)
642 : fmt_for_output (FMT_A, width, 0));
645 /* Return a string representing this variable, in the form most
646 appropriate from a human factors perspective, that is, its
647 variable label if it has one, otherwise its name. */
649 var_to_string (const struct variable *v)
651 return v->label != NULL ? v->label : v->name;
654 /* Returns V's variable label, or a null pointer if it has none. */
656 var_get_label (const struct variable *v)
661 /* Sets V's variable label to LABEL, stripping off leading and
662 trailing white space and truncating to 255 characters.
663 If LABEL is a null pointer or if LABEL is an empty string
664 (after stripping white space), then V's variable label (if
667 var_set_label (struct variable *v, const char *label)
674 struct substring s = ss_cstr (label);
675 ss_trim (&s, ss_cstr (CC_SPACES));
676 ss_truncate (&s, 255);
677 if (!ss_is_empty (s))
678 v->label = ss_xstrdup (s);
679 dict_var_changed (v);
683 /* Removes any variable label from V. */
685 var_clear_label (struct variable *v)
687 var_set_label (v, NULL);
690 /* Returns true if V has a variable V,
693 var_has_label (const struct variable *v)
695 return v->label != NULL;
698 /* Returns true if M is a valid variable measurement level,
701 measure_is_valid (enum measure m)
703 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
706 /* Returns V's measurement level. */
708 var_get_measure (const struct variable *v)
713 /* Sets V's measurement level to MEASURE. */
715 var_set_measure (struct variable *v, enum measure measure)
717 assert (measure_is_valid (measure));
718 v->measure = measure;
719 dict_var_changed (v);
722 /* Returns the default measurement level for a variable of the
723 given TYPE, as set by var_create. The return value can be
724 used to reset a variable's measurement level to the
727 var_default_measure (enum val_type type)
729 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
732 /* Returns V's display width, which applies only to GUIs. */
734 var_get_display_width (const struct variable *v)
736 return v->display_width;
739 /* Sets V's display width to DISPLAY_WIDTH. */
741 var_set_display_width (struct variable *v, int display_width)
743 v->display_width = display_width;
744 dict_var_changed (v);
747 /* Returns the default display width for a variable of the given
748 WIDTH, as set by var_create. The return value can be used to
749 reset a variable's display width to the default. */
751 var_default_display_width (int width)
753 return width == 0 ? 8 : MIN (width, 32);
756 /* Returns true if A is a valid alignment,
759 alignment_is_valid (enum alignment a)
761 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
764 /* Returns V's display alignment, which applies only to GUIs. */
766 var_get_alignment (const struct variable *v)
771 /* Sets V's display alignment to ALIGNMENT. */
773 var_set_alignment (struct variable *v, enum alignment alignment)
775 assert (alignment_is_valid (alignment));
776 v->alignment = alignment;
777 dict_var_changed (v);
780 /* Returns the default display alignment for a variable of the
781 given TYPE, as set by var_create. The return value can be
782 used to reset a variable's display alignment to the default. */
784 var_default_alignment (enum val_type type)
786 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
789 /* Whether variables' values should be preserved from case to
792 /* Returns true if variable V's value should be left from case to
793 case, instead of being reset to system-missing or blanks. */
795 var_get_leave (const struct variable *v)
800 /* Sets V's leave setting to LEAVE. */
802 var_set_leave (struct variable *v, bool leave)
804 assert (leave || !var_must_leave (v));
806 dict_var_changed (v);
809 /* Returns true if V must be left from case to case,
810 false if it can be set either way. */
812 var_must_leave (const struct variable *v)
814 return var_get_dict_class (v) == DC_SCRATCH;
817 /* Returns the number of short names stored in VAR.
819 Short names are used only for system and portable file input
820 and output. They are upper-case only, not necessarily unique,
821 and limited to SHORT_NAME_LEN characters (plus a null
822 terminator). Ordinarily a variable has at most one short
823 name, but very long string variables (longer than 255 bytes)
824 may have more. A variable might not have any short name at
825 all if it hasn't been saved to or read from a system or
828 var_get_short_name_cnt (const struct variable *var)
830 return var->short_name_cnt;
833 /* Returns VAR's short name with the given IDX, if it has one
834 with that index, or a null pointer otherwise. Short names may
835 be sparse: even if IDX is less than the number of short names
836 in VAR, this function may return a null pointer. */
838 var_get_short_name (const struct variable *var, size_t idx)
840 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
843 /* Sets VAR's short name with the given IDX to SHORT_NAME,
844 truncating it to SHORT_NAME_LEN characters and converting it
845 to uppercase in the process. Specifying a null pointer for
846 SHORT_NAME clears the specified short name. */
848 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
850 assert (var != NULL);
851 assert (short_name == NULL || var_is_plausible_name (short_name, false));
853 /* Clear old short name numbered IDX, if any. */
854 if (idx < var->short_name_cnt)
856 free (var->short_names[idx]);
857 var->short_names[idx] = NULL;
860 /* Install new short name for IDX. */
861 if (short_name != NULL)
863 if (idx >= var->short_name_cnt)
865 size_t old_cnt = var->short_name_cnt;
867 var->short_name_cnt = MAX (idx * 2, 1);
868 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
869 sizeof *var->short_names);
870 for (i = old_cnt; i < var->short_name_cnt; i++)
871 var->short_names[i] = NULL;
873 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
874 str_uppercase (var->short_names[idx]);
877 dict_var_changed (var);
880 /* Clears V's short names. */
882 var_clear_short_names (struct variable *v)
886 for (i = 0; i < v->short_name_cnt; i++)
887 free (v->short_names[i]);
888 free (v->short_names);
889 v->short_names = NULL;
890 v->short_name_cnt = 0;
893 /* Relationship with dictionary. */
895 /* Returns V's index within its dictionary, the value
896 for which "dict_get_var (dict, index)" will return V.
897 V must be in a dictionary. */
899 var_get_dict_index (const struct variable *v)
901 assert (v->vardict.dict_index != -1);
902 return v->vardict.dict_index;
905 /* Returns V's index within the case represented by its
906 dictionary, that is, the value for which "case_data_idx (case,
907 index)" will return the data for V in that case.
908 V must be in a dictionary. */
910 var_get_case_index (const struct variable *v)
912 assert (v->vardict.case_index != -1);
913 return v->vardict.case_index;
916 /* Returns V's auxiliary data, or a null pointer if none has been
919 var_get_aux (const struct variable *v)
924 /* Assign auxiliary data AUX to variable V, which must not
925 already have auxiliary data. Before V's auxiliary data is
926 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
927 may be appropriate for use as AUX_DTOR.) */
929 var_attach_aux (const struct variable *v_,
930 void *aux, void (*aux_dtor) (struct variable *))
932 struct variable *v = (struct variable *) v_ ; /* cast away const */
933 assert (v->aux == NULL);
934 assert (aux != NULL);
936 v->aux_dtor = aux_dtor;
940 /* Remove auxiliary data, if any, from V, and return it, without
941 calling any associated destructor. */
943 var_detach_aux (struct variable *v)
946 assert (aux != NULL);
951 /* Clears auxiliary data, if any, from V, and calls any
952 associated destructor. */
954 var_clear_aux (struct variable *v)
959 if (v->aux_dtor != NULL)
965 /* This function is appropriate for use an auxiliary data
966 destructor (passed as AUX_DTOR to var_attach_aux()) for the
967 case where the auxiliary data should be passed to free(). */
969 var_dtor_free (struct variable *v)
974 /* Observed categorical values. */
976 /* Returns V's observed categorical values,
977 which V must have. */
979 var_get_obs_vals (const struct variable *v)
981 assert (v->obs_vals != NULL);
985 /* Sets V's observed categorical values to CAT_VALS.
986 V becomes the owner of CAT_VALS. */
988 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
990 struct variable *v = (struct variable *) v_ ; /* cast away const */
991 cat_stored_values_destroy (v->obs_vals);
992 v->obs_vals = cat_vals;
995 /* Returns true if V has observed categorical values,
998 var_has_obs_vals (const struct variable *v)
1000 return v->obs_vals != NULL;
1003 /* Returns V's vardict structure. */
1004 const struct vardict_info *
1005 var_get_vardict (const struct variable *v)
1007 assert (var_has_vardict (v));
1011 /* Sets V's vardict data to VARDICT. */
1013 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1015 assert (vardict->dict_index >= 0);
1016 assert (vardict->case_index >= 0);
1017 v->vardict = *vardict;
1020 /* Returns true if V has vardict data. */
1022 var_has_vardict (const struct variable *v)
1024 return v->vardict.dict_index != -1;
1027 /* Clears V's vardict data. */
1029 var_clear_vardict (struct variable *v)
1031 v->vardict.dict_index = v->vardict.case_index = -1;