1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
46 /* Dictionary information. */
47 char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */
48 int width; /* 0 for numeric, otherwise string width. */
49 struct missing_values miss; /* Missing values. */
50 struct fmt_spec print; /* Default format for PRINT. */
51 struct fmt_spec write; /* Default format for WRITE. */
52 struct val_labs *val_labs; /* Value labels. */
53 char *label; /* Variable label. */
55 /* GUI information. */
56 enum measure measure; /* Nominal, ordinal, or continuous. */
57 int display_width; /* Width of data editor column. */
58 enum alignment alignment; /* Alignment of data in GUI. */
60 /* Case information. */
61 bool leave; /* Leave value from case to case? */
63 /* Data for use by containing dictionary. */
64 struct vardict_info vardict;
66 /* Used only for system and portable file input and output.
69 size_t short_name_cnt;
71 /* Each command may use these fields as needed. */
73 void (*aux_dtor) (struct variable *);
75 /* Values of a categorical variable. Procedures need
76 vectors with binary entries, so any variable of type ALPHA will
77 have its values stored here. */
78 struct cat_vals *obs_vals;
81 /* Creates and returns a new variable with the given NAME and
82 WIDTH and other fields initialized to default values. The
83 variable is not added to a dictionary; for that, use
84 dict_create_var instead. */
86 var_create (const char *name, int width)
91 assert (width >= 0 && width <= MAX_STRING);
93 v = xmalloc (sizeof *v);
94 v->vardict.dict_index = v->vardict.case_index = -1;
95 var_set_name (v, name);
97 mv_init (&v->miss, width);
98 v->leave = var_must_leave (v);
99 type = val_type_from_width (width);
100 v->alignment = var_default_alignment (type);
101 v->measure = var_default_measure (type);
102 v->display_width = var_default_display_width (width);
103 v->print = v->write = var_default_formats (width);
106 v->short_names = NULL;
107 v->short_name_cnt = 0;
115 /* Creates and returns a clone of OLD_VAR. Most properties of
116 the new variable are copied from OLD_VAR, except:
118 - The variable's short name is not copied, because there is
119 no reason to give a new variable with potentially a new
120 name the same short name.
122 - The new variable is not added to OLD_VAR's dictionary by
123 default. Use dict_clone_var, instead, to do that.
125 - Auxiliary data and obs_vals are not copied. */
127 var_clone (const struct variable *old_var)
129 struct variable *new_var = var_create (var_get_name (old_var),
130 var_get_width (old_var));
132 var_set_missing_values (new_var, var_get_missing_values (old_var));
133 var_set_print_format (new_var, var_get_print_format (old_var));
134 var_set_write_format (new_var, var_get_write_format (old_var));
135 var_set_value_labels (new_var, var_get_value_labels (old_var));
136 var_set_label (new_var, var_get_label (old_var));
137 var_set_measure (new_var, var_get_measure (old_var));
138 var_set_display_width (new_var, var_get_display_width (old_var));
139 var_set_alignment (new_var, var_get_alignment (old_var));
140 var_set_leave (new_var, var_get_leave (old_var));
145 /* Create a variable to be used for internal calculations only */
147 var_create_internal (int case_idx)
149 struct variable *v = var_create ("$internal", 0);
151 struct vardict_info vdi;
155 vdi.case_index = case_idx;
157 var_set_vardict (v, &vdi);
162 /* Destroys variable V.
163 V must not belong to a dictionary. If it does, use
164 dict_delete_var instead. */
166 var_destroy (struct variable *v)
170 if (var_has_vardict (v))
172 const struct vardict_info *vdi = var_get_vardict (v);
173 assert (vdi->dict == NULL);
175 cat_stored_values_destroy (v->obs_vals);
176 var_clear_short_names (v);
178 val_labs_destroy (v->val_labs);
184 /* Variable names. */
186 /* Return variable V's name. */
188 var_get_name (const struct variable *v)
193 /* Sets V's name to NAME.
194 Do not use this function for a variable in a dictionary. Use
195 dict_rename_var instead. */
197 var_set_name (struct variable *v, const char *name)
199 assert (v->vardict.dict_index == -1);
200 assert (var_is_plausible_name (name, false));
202 str_copy_trunc (v->name, sizeof v->name, name);
203 dict_var_changed (v);
206 /* Returns true if NAME is an acceptable name for a variable,
207 false otherwise. If ISSUE_ERROR is true, issues an
208 explanatory error message on failure. */
210 var_is_valid_name (const char *name, bool issue_error)
215 assert (name != NULL);
217 /* Note that strlen returns number of BYTES, not the number of
219 length = strlen (name);
221 plausible = var_is_plausible_name(name, issue_error);
227 if (!lex_is_id1 (name[0]))
230 msg (SE, _("Character `%c' (in %s) may not appear "
231 "as the first character in a variable name."),
237 for (i = 0; i < length; i++)
239 if (!lex_is_idn (name[i]))
242 msg (SE, _("Character `%c' (in %s) may not appear in "
252 /* Returns true if NAME is an plausible name for a variable,
253 false otherwise. If ISSUE_ERROR is true, issues an
254 explanatory error message on failure.
255 This function makes no use of LC_CTYPE.
258 var_is_plausible_name (const char *name, bool issue_error)
262 assert (name != NULL);
264 /* Note that strlen returns number of BYTES, not the number of
266 length = strlen (name);
270 msg (SE, _("Variable name cannot be empty string."));
273 else if (length > VAR_NAME_LEN)
276 msg (SE, _("Variable name %s exceeds %d-character limit."),
277 name, (int) VAR_NAME_LEN);
281 if (lex_id_to_token (ss_cstr (name)) != T_ID)
284 msg (SE, _("`%s' may not be used as a variable name because it "
285 "is a reserved word."), name);
292 /* Returns VAR's dictionary class. */
294 var_get_dict_class (const struct variable *var)
296 return dict_class_from_id (var->name);
299 /* A hsh_compare_func that orders variables A and B by their
302 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
304 const struct variable *a = a_;
305 const struct variable *b = b_;
307 return strcasecmp (a->name, b->name);
310 /* A hsh_hash_func that hashes variable V based on its name. */
312 hash_var_by_name (const void *v_, const void *aux UNUSED)
314 const struct variable *v = v_;
316 return hsh_hash_case_string (v->name);
319 /* A hsh_compare_func that orders pointers to variables A and B
322 compare_var_ptrs_by_name (const void *a_, const void *b_,
323 const void *aux UNUSED)
325 struct variable *const *a = a_;
326 struct variable *const *b = b_;
328 return strcasecmp (var_get_name (*a), var_get_name (*b));
331 /* A hsh_hash_func that hashes pointer to variable V based on its
334 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
336 struct variable *const *v = v_;
338 return hsh_hash_case_string (var_get_name (*v));
341 /* Returns the type of variable V. */
343 var_get_type (const struct variable *v)
345 return val_type_from_width (v->width);
348 /* Returns the width of variable V. */
350 var_get_width (const struct variable *v)
355 /* Changes the width of V to NEW_WIDTH.
356 This function should be used cautiously. */
358 var_set_width (struct variable *v, int new_width)
360 const int old_width = v->width;
362 if (mv_is_resizable (&v->miss, new_width))
363 mv_resize (&v->miss, new_width);
365 mv_init (&v->miss, new_width);
367 if (v->val_labs != NULL)
369 if (val_labs_can_set_width (v->val_labs, new_width))
370 val_labs_set_width (v->val_labs, new_width);
373 val_labs_destroy (v->val_labs);
378 fmt_resize (&v->print, new_width);
379 fmt_resize (&v->write, new_width);
381 v->width = new_width;
384 const int old_val_count = value_cnt_from_width (old_width);
385 const int new_val_count = value_cnt_from_width (new_width);
387 if ( old_val_count != new_val_count)
388 dict_var_resized (v, new_val_count - old_val_count);
391 dict_var_changed (v);
394 /* Returns true if variable V is numeric, false otherwise. */
396 var_is_numeric (const struct variable *v)
398 return var_get_type (v) == VAL_NUMERIC;
401 /* Returns true if variable V is a string variable, false
404 var_is_alpha (const struct variable *v)
406 return var_get_type (v) == VAL_STRING;
409 /* Returns true if variable V is a short string variable, false
412 var_is_short_string (const struct variable *v)
414 return v->width > 0 && v->width <= MAX_SHORT_STRING;
417 /* Returns true if variable V is a long string variable, false
420 var_is_long_string (const struct variable *v)
422 return v->width > MAX_SHORT_STRING;
425 /* Returns the number of "union value"s need to store a value of
428 var_get_value_cnt (const struct variable *v)
430 return value_cnt_from_width (v->width);
433 /* Returns variable V's missing values. */
434 const struct missing_values *
435 var_get_missing_values (const struct variable *v)
440 /* Sets variable V's missing values to MISS, which must be of V's
441 width or at least resizable to V's width.
442 If MISS is null, then V's missing values, if any, are
445 var_set_missing_values (struct variable *v, const struct missing_values *miss)
449 assert (mv_is_resizable (miss, v->width));
450 mv_copy (&v->miss, miss);
451 mv_resize (&v->miss, v->width);
454 mv_init (&v->miss, v->width);
456 dict_var_changed (v);
459 /* Sets variable V to have no user-missing values. */
461 var_clear_missing_values (struct variable *v)
463 var_set_missing_values (v, NULL);
466 /* Returns true if V has any user-missing values,
469 var_has_missing_values (const struct variable *v)
471 return !mv_is_empty (&v->miss);
474 /* Returns true if VALUE is in the given CLASS of missing values
475 in V, false otherwise. */
477 var_is_value_missing (const struct variable *v, const union value *value,
480 return mv_is_value_missing (&v->miss, value, class);
483 /* Returns true if D is in the given CLASS of missing values in
485 V must be a numeric variable. */
487 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
489 return mv_is_num_missing (&v->miss, d, class);
492 /* Returns true if S[] is a missing value for V, false otherwise.
493 S[] must contain exactly as many characters as V's width.
494 V must be a string variable. */
496 var_is_str_missing (const struct variable *v, const char s[],
499 return mv_is_str_missing (&v->miss, s, class);
502 /* Returns variable V's value labels,
503 possibly a null pointer if it has none. */
504 const struct val_labs *
505 var_get_value_labels (const struct variable *v)
510 /* Returns true if variable V has at least one value label. */
512 var_has_value_labels (const struct variable *v)
514 return val_labs_count (v->val_labs) > 0;
517 /* Sets variable V's value labels to a copy of VLS,
518 which must have a width equal to V's width or one that can be
519 changed to V's width.
520 If VLS is null, then V's value labels, if any, are removed. */
522 var_set_value_labels (struct variable *v, const struct val_labs *vls)
524 val_labs_destroy (v->val_labs);
529 assert (val_labs_can_set_width (vls, v->width));
530 v->val_labs = val_labs_clone (vls);
531 val_labs_set_width (v->val_labs, v->width);
532 dict_var_changed (v);
536 /* Makes sure that V has a set of value labels,
537 by assigning one to it if necessary. */
539 alloc_value_labels (struct variable *v)
541 assert (!var_is_long_string (v));
542 if (v->val_labs == NULL)
543 v->val_labs = val_labs_create (v->width);
546 /* Attempts to add a value label with the given VALUE and LABEL
547 to V. Returns true if successful, false if VALUE has an
548 existing label or if V is a long string variable. */
550 var_add_value_label (struct variable *v,
551 const union value *value, const char *label)
553 alloc_value_labels (v);
554 return val_labs_add (v->val_labs, *value, label);
557 /* Adds or replaces a value label with the given VALUE and LABEL
559 Has no effect if V is a long string variable. */
561 var_replace_value_label (struct variable *v,
562 const union value *value, const char *label)
564 alloc_value_labels (v);
565 val_labs_replace (v->val_labs, *value, label);
568 /* Removes V's value labels, if any. */
570 var_clear_value_labels (struct variable *v)
572 var_set_value_labels (v, NULL);
575 /* Returns the label associated with VALUE for variable V,
576 or a null pointer if none. */
578 var_lookup_value_label (const struct variable *v, const union value *value)
580 return val_labs_find (v->val_labs, *value);
583 /* Append STR with a string representing VALUE for variable V.
584 That is, if VALUE has a label, append that label,
585 otherwise format VALUE and append the formatted string.
586 STR must be a pointer to an initialised struct string.
589 var_append_value_name (const struct variable *v, const union value *value,
592 const char *name = var_lookup_value_label (v, value);
595 char *s = ds_put_uninit (str, v->print.w);
596 data_out (value, &v->print, s);
599 ds_put_cstr (str, name);
603 /* Print and write formats. */
605 /* Returns V's print format specification. */
606 const struct fmt_spec *
607 var_get_print_format (const struct variable *v)
612 /* Sets V's print format specification to PRINT, which must be a
613 valid format specification for a variable of V's width
614 (ordinarily an output format, but input formats are not
617 var_set_print_format (struct variable *v, const struct fmt_spec *print)
619 assert (fmt_check_width_compat (print, v->width));
621 dict_var_changed (v);
624 /* Returns V's write format specification. */
625 const struct fmt_spec *
626 var_get_write_format (const struct variable *v)
631 /* Sets V's write format specification to WRITE, which must be a
632 valid format specification for a variable of V's width
633 (ordinarily an output format, but input formats are not
636 var_set_write_format (struct variable *v, const struct fmt_spec *write)
638 assert (fmt_check_width_compat (write, v->width));
640 dict_var_changed (v);
643 /* Sets V's print and write format specifications to FORMAT,
644 which must be a valid format specification for a variable of
645 V's width (ordinarily an output format, but input formats are
648 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
650 var_set_print_format (v, format);
651 var_set_write_format (v, format);
654 /* Returns the default print and write format for a variable of
655 the given TYPE, as set by var_create. The return value can be
656 used to reset a variable's print and write formats to the
659 var_default_formats (int width)
662 ? fmt_for_output (FMT_F, 8, 2)
663 : fmt_for_output (FMT_A, width, 0));
666 /* Return a string representing this variable, in the form most
667 appropriate from a human factors perspective, that is, its
668 variable label if it has one, otherwise its name. */
670 var_to_string (const struct variable *v)
672 return v->label != NULL ? v->label : v->name;
675 /* Returns V's variable label, or a null pointer if it has none. */
677 var_get_label (const struct variable *v)
682 /* Sets V's variable label to LABEL, stripping off leading and
683 trailing white space and truncating to 255 characters.
684 If LABEL is a null pointer or if LABEL is an empty string
685 (after stripping white space), then V's variable label (if
688 var_set_label (struct variable *v, const char *label)
695 struct substring s = ss_cstr (label);
696 ss_trim (&s, ss_cstr (CC_SPACES));
697 ss_truncate (&s, 255);
698 if (!ss_is_empty (s))
699 v->label = ss_xstrdup (s);
700 dict_var_changed (v);
704 /* Removes any variable label from V. */
706 var_clear_label (struct variable *v)
708 var_set_label (v, NULL);
711 /* Returns true if V has a variable V,
714 var_has_label (const struct variable *v)
716 return v->label != NULL;
719 /* Returns true if M is a valid variable measurement level,
722 measure_is_valid (enum measure m)
724 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
727 /* Returns V's measurement level. */
729 var_get_measure (const struct variable *v)
734 /* Sets V's measurement level to MEASURE. */
736 var_set_measure (struct variable *v, enum measure measure)
738 assert (measure_is_valid (measure));
739 v->measure = measure;
740 dict_var_changed (v);
743 /* Returns the default measurement level for a variable of the
744 given TYPE, as set by var_create. The return value can be
745 used to reset a variable's measurement level to the
748 var_default_measure (enum val_type type)
750 return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
753 /* Returns V's display width, which applies only to GUIs. */
755 var_get_display_width (const struct variable *v)
757 return v->display_width;
760 /* Sets V's display width to DISPLAY_WIDTH. */
762 var_set_display_width (struct variable *v, int display_width)
764 v->display_width = display_width;
765 dict_var_changed (v);
768 /* Returns the default display width for a variable of the given
769 WIDTH, as set by var_create. The return value can be used to
770 reset a variable's display width to the default. */
772 var_default_display_width (int width)
774 return width == 0 ? 8 : MIN (width, 32);
777 /* Returns true if A is a valid alignment,
780 alignment_is_valid (enum alignment a)
782 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
785 /* Returns V's display alignment, which applies only to GUIs. */
787 var_get_alignment (const struct variable *v)
792 /* Sets V's display alignment to ALIGNMENT. */
794 var_set_alignment (struct variable *v, enum alignment alignment)
796 assert (alignment_is_valid (alignment));
797 v->alignment = alignment;
798 dict_var_changed (v);
801 /* Returns the default display alignment for a variable of the
802 given TYPE, as set by var_create. The return value can be
803 used to reset a variable's display alignment to the default. */
805 var_default_alignment (enum val_type type)
807 return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
810 /* Whether variables' values should be preserved from case to
813 /* Returns true if variable V's value should be left from case to
814 case, instead of being reset to system-missing or blanks. */
816 var_get_leave (const struct variable *v)
821 /* Sets V's leave setting to LEAVE. */
823 var_set_leave (struct variable *v, bool leave)
825 assert (leave || !var_must_leave (v));
827 dict_var_changed (v);
830 /* Returns true if V must be left from case to case,
831 false if it can be set either way. */
833 var_must_leave (const struct variable *v)
835 return var_get_dict_class (v) == DC_SCRATCH;
838 /* Returns the number of short names stored in VAR.
840 Short names are used only for system and portable file input
841 and output. They are upper-case only, not necessarily unique,
842 and limited to SHORT_NAME_LEN characters (plus a null
843 terminator). Ordinarily a variable has at most one short
844 name, but very long string variables (longer than 255 bytes)
845 may have more. A variable might not have any short name at
846 all if it hasn't been saved to or read from a system or
849 var_get_short_name_cnt (const struct variable *var)
851 return var->short_name_cnt;
854 /* Returns VAR's short name with the given IDX, if it has one
855 with that index, or a null pointer otherwise. Short names may
856 be sparse: even if IDX is less than the number of short names
857 in VAR, this function may return a null pointer. */
859 var_get_short_name (const struct variable *var, size_t idx)
861 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
864 /* Sets VAR's short name with the given IDX to SHORT_NAME,
865 truncating it to SHORT_NAME_LEN characters and converting it
866 to uppercase in the process. Specifying a null pointer for
867 SHORT_NAME clears the specified short name. */
869 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
871 assert (var != NULL);
872 assert (short_name == NULL || var_is_plausible_name (short_name, false));
874 /* Clear old short name numbered IDX, if any. */
875 if (idx < var->short_name_cnt)
877 free (var->short_names[idx]);
878 var->short_names[idx] = NULL;
881 /* Install new short name for IDX. */
882 if (short_name != NULL)
884 if (idx >= var->short_name_cnt)
886 size_t old_cnt = var->short_name_cnt;
888 var->short_name_cnt = MAX (idx * 2, 1);
889 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
890 sizeof *var->short_names);
891 for (i = old_cnt; i < var->short_name_cnt; i++)
892 var->short_names[i] = NULL;
894 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
895 str_uppercase (var->short_names[idx]);
898 dict_var_changed (var);
901 /* Clears V's short names. */
903 var_clear_short_names (struct variable *v)
907 for (i = 0; i < v->short_name_cnt; i++)
908 free (v->short_names[i]);
909 free (v->short_names);
910 v->short_names = NULL;
911 v->short_name_cnt = 0;
914 /* Relationship with dictionary. */
916 /* Returns V's index within its dictionary, the value
917 for which "dict_get_var (dict, index)" will return V.
918 V must be in a dictionary. */
920 var_get_dict_index (const struct variable *v)
922 assert (v->vardict.dict_index != -1);
923 return v->vardict.dict_index;
926 /* Returns V's index within the case represented by its
927 dictionary, that is, the value for which "case_data_idx (case,
928 index)" will return the data for V in that case.
929 V must be in a dictionary. */
931 var_get_case_index (const struct variable *v)
933 assert (v->vardict.case_index != -1);
934 return v->vardict.case_index;
937 /* Returns V's auxiliary data, or a null pointer if none has been
940 var_get_aux (const struct variable *v)
945 /* Assign auxiliary data AUX to variable V, which must not
946 already have auxiliary data. Before V's auxiliary data is
947 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
948 may be appropriate for use as AUX_DTOR.) */
950 var_attach_aux (const struct variable *v_,
951 void *aux, void (*aux_dtor) (struct variable *))
953 struct variable *v = (struct variable *) v_ ; /* cast away const */
954 assert (v->aux == NULL);
955 assert (aux != NULL);
957 v->aux_dtor = aux_dtor;
961 /* Remove auxiliary data, if any, from V, and return it, without
962 calling any associated destructor. */
964 var_detach_aux (struct variable *v)
967 assert (aux != NULL);
972 /* Clears auxiliary data, if any, from V, and calls any
973 associated destructor. */
975 var_clear_aux (struct variable *v)
980 if (v->aux_dtor != NULL)
986 /* This function is appropriate for use an auxiliary data
987 destructor (passed as AUX_DTOR to var_attach_aux()) for the
988 case where the auxiliary data should be passed to free(). */
990 var_dtor_free (struct variable *v)
995 /* Observed categorical values. */
997 /* Returns V's observed categorical values,
998 which V must have. */
1000 var_get_obs_vals (const struct variable *v)
1002 assert (v->obs_vals != NULL);
1006 /* Sets V's observed categorical values to CAT_VALS.
1007 V becomes the owner of CAT_VALS. */
1009 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
1011 struct variable *v = (struct variable *) v_ ; /* cast away const */
1012 cat_stored_values_destroy (v->obs_vals);
1013 v->obs_vals = cat_vals;
1016 /* Returns true if V has observed categorical values,
1019 var_has_obs_vals (const struct variable *v)
1021 return v->obs_vals != NULL;
1024 /* Returns V's vardict structure. */
1025 const struct vardict_info *
1026 var_get_vardict (const struct variable *v)
1028 assert (var_has_vardict (v));
1032 /* Sets V's vardict data to VARDICT. */
1034 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1036 assert (vardict->dict_index >= 0);
1037 assert (vardict->case_index >= 0);
1038 v->vardict = *vardict;
1041 /* Returns true if V has vardict data. */
1043 var_has_vardict (const struct variable *v)
1045 return v->vardict.dict_index != -1;
1048 /* Clears V's vardict data. */
1050 var_clear_vardict (struct variable *v)
1052 v->vardict.dict_index = v->vardict.case_index = -1;