1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
46 /* Dictionary information. */
47 char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */
48 int width; /* 0 for numeric, otherwise string width. */
49 struct missing_values miss; /* Missing values. */
50 struct fmt_spec print; /* Default format for PRINT. */
51 struct fmt_spec write; /* Default format for WRITE. */
52 struct val_labs *val_labs; /* Value labels. */
53 char *label; /* Variable label. */
55 /* GUI information. */
56 enum measure measure; /* Nominal, ordinal, or continuous. */
57 int display_width; /* Width of data editor column. */
58 enum alignment alignment; /* Alignment of data in GUI. */
60 /* Case information. */
61 bool leave; /* Leave value from case to case? */
63 /* Data for use by containing dictionary. */
64 struct vardict_info vardict;
66 /* Used only for system and portable file input and output.
69 size_t short_name_cnt;
71 /* Each command may use these fields as needed. */
73 void (*aux_dtor) (struct variable *);
75 /* Values of a categorical variable. Procedures need
76 vectors with binary entries, so any variable of type ALPHA will
77 have its values stored here. */
78 struct cat_vals *obs_vals;
81 /* Creates and returns a new variable with the given NAME and
82 WIDTH and other fields initialized to default values. The
83 variable is not added to a dictionary; for that, use
84 dict_create_var instead. */
86 var_create (const char *name, int width)
90 assert (width >= 0 && width <= MAX_STRING);
92 v = xmalloc (sizeof *v);
93 v->vardict.dict_index = v->vardict.case_index = -1;
94 var_set_name (v, name);
96 mv_init (&v->miss, width);
97 v->leave = var_must_leave (v);
98 if (var_is_numeric (v))
100 v->print = fmt_for_output (FMT_F, 8, 2);
101 v->alignment = ALIGN_RIGHT;
102 v->measure = MEASURE_SCALE;
106 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
107 v->alignment = ALIGN_LEFT;
108 v->measure = MEASURE_NOMINAL;
110 v->display_width = var_default_display_width (width);
114 v->short_names = NULL;
115 v->short_name_cnt = 0;
123 /* Creates and returns a clone of OLD_VAR. Most properties of
124 the new variable are copied from OLD_VAR, except:
126 - The variable's short name is not copied, because there is
127 no reason to give a new variable with potentially a new
128 name the same short name.
130 - The new variable is not added to OLD_VAR's dictionary by
131 default. Use dict_clone_var, instead, to do that.
133 - Auxiliary data and obs_vals are not copied. */
135 var_clone (const struct variable *old_var)
137 struct variable *new_var = var_create (var_get_name (old_var),
138 var_get_width (old_var));
140 var_set_missing_values (new_var, var_get_missing_values (old_var));
141 var_set_print_format (new_var, var_get_print_format (old_var));
142 var_set_write_format (new_var, var_get_write_format (old_var));
143 var_set_value_labels (new_var, var_get_value_labels (old_var));
144 var_set_label (new_var, var_get_label (old_var));
145 var_set_measure (new_var, var_get_measure (old_var));
146 var_set_display_width (new_var, var_get_display_width (old_var));
147 var_set_alignment (new_var, var_get_alignment (old_var));
148 var_set_leave (new_var, var_get_leave (old_var));
153 /* Destroys variable V.
154 V must not belong to a dictionary. If it does, use
155 dict_delete_var instead. */
157 var_destroy (struct variable *v)
161 assert (!var_has_vardict (v));
162 cat_stored_values_destroy (v->obs_vals);
163 var_clear_short_names (v);
165 val_labs_destroy (v->val_labs);
171 /* Variable names. */
173 /* Return variable V's name. */
175 var_get_name (const struct variable *v)
180 /* Sets V's name to NAME.
181 Do not use this function for a variable in a dictionary. Use
182 dict_rename_var instead. */
184 var_set_name (struct variable *v, const char *name)
186 assert (v->vardict.dict_index == -1);
187 assert (var_is_plausible_name (name, false));
189 str_copy_trunc (v->name, sizeof v->name, name);
190 dict_var_changed (v);
193 /* Returns true if NAME is an acceptable name for a variable,
194 false otherwise. If ISSUE_ERROR is true, issues an
195 explanatory error message on failure. */
197 var_is_valid_name (const char *name, bool issue_error)
202 assert (name != NULL);
204 /* Note that strlen returns number of BYTES, not the number of
206 length = strlen (name);
208 plausible = var_is_plausible_name(name, issue_error);
214 if (!lex_is_id1 (name[0]))
217 msg (SE, _("Character `%c' (in %s) may not appear "
218 "as the first character in a variable name."),
224 for (i = 0; i < length; i++)
226 if (!lex_is_idn (name[i]))
229 msg (SE, _("Character `%c' (in %s) may not appear in "
239 /* Returns true if NAME is an plausible name for a variable,
240 false otherwise. If ISSUE_ERROR is true, issues an
241 explanatory error message on failure.
242 This function makes no use of LC_CTYPE.
245 var_is_plausible_name (const char *name, bool issue_error)
249 assert (name != NULL);
251 /* Note that strlen returns number of BYTES, not the number of
253 length = strlen (name);
257 msg (SE, _("Variable name cannot be empty string."));
260 else if (length > VAR_NAME_LEN)
263 msg (SE, _("Variable name %s exceeds %d-character limit."),
264 name, (int) VAR_NAME_LEN);
268 if (lex_id_to_token (ss_cstr (name)) != T_ID)
271 msg (SE, _("`%s' may not be used as a variable name because it "
272 "is a reserved word."), name);
279 /* Returns VAR's dictionary class. */
281 var_get_dict_class (const struct variable *var)
283 return dict_class_from_id (var->name);
286 /* A hsh_compare_func that orders variables A and B by their
289 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
291 const struct variable *a = a_;
292 const struct variable *b = b_;
294 return strcasecmp (a->name, b->name);
297 /* A hsh_hash_func that hashes variable V based on its name. */
299 hash_var_by_name (const void *v_, const void *aux UNUSED)
301 const struct variable *v = v_;
303 return hsh_hash_case_string (v->name);
306 /* A hsh_compare_func that orders pointers to variables A and B
309 compare_var_ptrs_by_name (const void *a_, const void *b_,
310 const void *aux UNUSED)
312 struct variable *const *a = a_;
313 struct variable *const *b = b_;
315 return strcasecmp (var_get_name (*a), var_get_name (*b));
318 /* A hsh_hash_func that hashes pointer to variable V based on its
321 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
323 struct variable *const *v = v_;
325 return hsh_hash_case_string (var_get_name (*v));
328 /* Returns the type of variable V. */
330 var_get_type (const struct variable *v)
332 return val_type_from_width (v->width);
335 /* Returns the width of variable V. */
337 var_get_width (const struct variable *v)
342 /* Changes the width of V to NEW_WIDTH.
343 This function should be used cautiously. */
345 var_set_width (struct variable *v, int new_width)
347 const int old_width = v->width;
349 if (mv_is_resizable (&v->miss, new_width))
350 mv_resize (&v->miss, new_width);
352 mv_init (&v->miss, new_width);
354 if (v->val_labs != NULL)
356 if (val_labs_can_set_width (v->val_labs, new_width))
357 val_labs_set_width (v->val_labs, new_width);
360 val_labs_destroy (v->val_labs);
365 fmt_resize (&v->print, new_width);
366 fmt_resize (&v->write, new_width);
368 v->width = new_width;
371 const int old_val_count = value_cnt_from_width (old_width);
372 const int new_val_count = value_cnt_from_width (new_width);
374 if ( old_val_count != new_val_count)
375 dict_var_resized (v, new_val_count - old_val_count);
378 dict_var_changed (v);
381 /* Returns true if variable V is numeric, false otherwise. */
383 var_is_numeric (const struct variable *v)
385 return var_get_type (v) == VAL_NUMERIC;
388 /* Returns true if variable V is a string variable, false
391 var_is_alpha (const struct variable *v)
393 return var_get_type (v) == VAL_STRING;
396 /* Returns true if variable V is a short string variable, false
399 var_is_short_string (const struct variable *v)
401 return v->width > 0 && v->width <= MAX_SHORT_STRING;
404 /* Returns true if variable V is a long string variable, false
407 var_is_long_string (const struct variable *v)
409 return v->width > MAX_SHORT_STRING;
412 /* Returns the number of "union value"s need to store a value of
415 var_get_value_cnt (const struct variable *v)
417 return value_cnt_from_width (v->width);
420 /* Returns variable V's missing values. */
421 const struct missing_values *
422 var_get_missing_values (const struct variable *v)
427 /* Sets variable V's missing values to MISS, which must be of V's
428 width or at least resizable to V's width.
429 If MISS is null, then V's missing values, if any, are
432 var_set_missing_values (struct variable *v, const struct missing_values *miss)
436 assert (mv_is_resizable (miss, v->width));
437 mv_copy (&v->miss, miss);
438 mv_resize (&v->miss, v->width);
441 mv_init (&v->miss, v->width);
443 dict_var_changed (v);
446 /* Sets variable V to have no user-missing values. */
448 var_clear_missing_values (struct variable *v)
450 var_set_missing_values (v, NULL);
453 /* Returns true if V has any user-missing values,
456 var_has_missing_values (const struct variable *v)
458 return !mv_is_empty (&v->miss);
461 /* Returns true if VALUE is in the given CLASS of missing values
462 in V, false otherwise. */
464 var_is_value_missing (const struct variable *v, const union value *value,
467 return mv_is_value_missing (&v->miss, value, class);
470 /* Returns true if D is in the given CLASS of missing values in
472 V must be a numeric variable. */
474 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
476 return mv_is_num_missing (&v->miss, d, class);
479 /* Returns true if S[] is a missing value for V, false otherwise.
480 S[] must contain exactly as many characters as V's width.
481 V must be a string variable. */
483 var_is_str_missing (const struct variable *v, const char s[],
486 return mv_is_str_missing (&v->miss, s, class);
489 /* Returns variable V's value labels,
490 possibly a null pointer if it has none. */
491 const struct val_labs *
492 var_get_value_labels (const struct variable *v)
497 /* Returns true if variable V has at least one value label. */
499 var_has_value_labels (const struct variable *v)
501 return val_labs_count (v->val_labs) > 0;
504 /* Sets variable V's value labels to a copy of VLS,
505 which must have a width equal to V's width or one that can be
506 changed to V's width.
507 If VLS is null, then V's value labels, if any, are removed. */
509 var_set_value_labels (struct variable *v, const struct val_labs *vls)
511 val_labs_destroy (v->val_labs);
516 assert (val_labs_can_set_width (vls, v->width));
517 v->val_labs = val_labs_clone (vls);
518 val_labs_set_width (v->val_labs, v->width);
519 dict_var_changed (v);
523 /* Makes sure that V has a set of value labels,
524 by assigning one to it if necessary. */
526 alloc_value_labels (struct variable *v)
528 assert (!var_is_long_string (v));
529 if (v->val_labs == NULL)
530 v->val_labs = val_labs_create (v->width);
533 /* Attempts to add a value label with the given VALUE and LABEL
534 to V. Returns true if successful, false if VALUE has an
535 existing label or if V is a long string variable. */
537 var_add_value_label (struct variable *v,
538 const union value *value, const char *label)
540 alloc_value_labels (v);
541 return val_labs_add (v->val_labs, *value, label);
544 /* Adds or replaces a value label with the given VALUE and LABEL
546 Has no effect if V is a long string variable. */
548 var_replace_value_label (struct variable *v,
549 const union value *value, const char *label)
551 alloc_value_labels (v);
552 val_labs_replace (v->val_labs, *value, label);
555 /* Removes V's value labels, if any. */
557 var_clear_value_labels (struct variable *v)
559 var_set_value_labels (v, NULL);
562 /* Returns the label associated with VALUE for variable V,
563 or a null pointer if none. */
565 var_lookup_value_label (const struct variable *v, const union value *value)
567 return val_labs_find (v->val_labs, *value);
570 /* Append STR with a string representing VALUE for variable V.
571 That is, if VALUE has a label, append that label,
572 otherwise format VALUE and append the formatted string.
573 STR must be a pointer to an initialised struct string.
576 var_append_value_name (const struct variable *v, const union value *value,
579 const char *name = var_lookup_value_label (v, value);
582 char *s = ds_put_uninit (str, v->print.w);
583 data_out (value, &v->print, s);
586 ds_put_cstr (str, name);
590 /* Print and write formats. */
592 /* Returns V's print format specification. */
593 const struct fmt_spec *
594 var_get_print_format (const struct variable *v)
599 /* Sets V's print format specification to PRINT, which must be a
600 valid format specification for outputting a variable of V's
603 var_set_print_format (struct variable *v, const struct fmt_spec *print)
605 assert (fmt_check_width_compat (print, v->width));
607 dict_var_changed (v);
610 /* Returns V's write format specification. */
611 const struct fmt_spec *
612 var_get_write_format (const struct variable *v)
617 /* Sets V's write format specification to WRITE, which must be a
618 valid format specification for outputting a variable of V's
621 var_set_write_format (struct variable *v, const struct fmt_spec *write)
623 assert (fmt_check_width_compat (write, v->width));
625 dict_var_changed (v);
628 /* Sets V's print and write format specifications to FORMAT,
629 which must be a valid format specification for outputting a
630 variable of V's width. */
632 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
634 var_set_print_format (v, format);
635 var_set_write_format (v, format);
638 /* Return a string representing this variable, in the form most
639 appropriate from a human factors perspective, that is, its
640 variable label if it has one, otherwise its name. */
642 var_to_string (const struct variable *v)
644 return v->label != NULL ? v->label : v->name;
647 /* Returns V's variable label, or a null pointer if it has none. */
649 var_get_label (const struct variable *v)
654 /* Sets V's variable label to LABEL, stripping off leading and
655 trailing white space and truncating to 255 characters.
656 If LABEL is a null pointer or if LABEL is an empty string
657 (after stripping white space), then V's variable label (if
660 var_set_label (struct variable *v, const char *label)
667 struct substring s = ss_cstr (label);
668 ss_trim (&s, ss_cstr (CC_SPACES));
669 ss_truncate (&s, 255);
670 if (!ss_is_empty (s))
671 v->label = ss_xstrdup (s);
672 dict_var_changed (v);
676 /* Removes any variable label from V. */
678 var_clear_label (struct variable *v)
680 var_set_label (v, NULL);
683 /* Returns true if V has a variable V,
686 var_has_label (const struct variable *v)
688 return v->label != NULL;
691 /* Returns true if M is a valid variable measurement level,
694 measure_is_valid (enum measure m)
696 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
699 /* Returns V's measurement level. */
701 var_get_measure (const struct variable *v)
706 /* Sets V's measurement level to MEASURE. */
708 var_set_measure (struct variable *v, enum measure measure)
710 assert (measure_is_valid (measure));
711 v->measure = measure;
712 dict_var_changed (v);
715 /* Returns V's display width, which applies only to GUIs. */
717 var_get_display_width (const struct variable *v)
719 return v->display_width;
722 /* Sets V's display width to DISPLAY_WIDTH. */
724 var_set_display_width (struct variable *v, int display_width)
726 v->display_width = display_width;
727 dict_var_changed (v);
730 /* Returns the default display width for a variable of the given
731 WIDTH, as set by var_create. The return value can be used to
732 reset a variable's display width to the default. */
734 var_default_display_width (int width)
736 return width == 0 ? 8 : MIN (width, 32);
739 /* Returns true if A is a valid alignment,
742 alignment_is_valid (enum alignment a)
744 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
747 /* Returns V's display alignment, which applies only to GUIs. */
749 var_get_alignment (const struct variable *v)
754 /* Sets V's display alignment to ALIGNMENT. */
756 var_set_alignment (struct variable *v, enum alignment alignment)
758 assert (alignment_is_valid (alignment));
759 v->alignment = alignment;
760 dict_var_changed (v);
763 /* Whether variables' values should be preserved from case to
766 /* Returns true if variable V's value should be left from case to
767 case, instead of being reset to system-missing or blanks. */
769 var_get_leave (const struct variable *v)
774 /* Sets V's leave setting to LEAVE. */
776 var_set_leave (struct variable *v, bool leave)
778 assert (leave || !var_must_leave (v));
780 dict_var_changed (v);
783 /* Returns true if V must be left from case to case,
784 false if it can be set either way. */
786 var_must_leave (const struct variable *v)
788 return var_get_dict_class (v) == DC_SCRATCH;
791 /* Returns the number of short names stored in VAR.
793 Short names are used only for system and portable file input
794 and output. They are upper-case only, not necessarily unique,
795 and limited to SHORT_NAME_LEN characters (plus a null
796 terminator). Ordinarily a variable has at most one short
797 name, but very long string variables (longer than 255 bytes)
798 may have more. A variable might not have any short name at
799 all if it hasn't been saved to or read from a system or
802 var_get_short_name_cnt (const struct variable *var)
804 return var->short_name_cnt;
807 /* Returns VAR's short name with the given IDX, if it has one
808 with that index, or a null pointer otherwise. Short names may
809 be sparse: even if IDX is less than the number of short names
810 in VAR, this function may return a null pointer. */
812 var_get_short_name (const struct variable *var, size_t idx)
814 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
817 /* Sets VAR's short name with the given IDX to SHORT_NAME,
818 truncating it to SHORT_NAME_LEN characters and converting it
819 to uppercase in the process. Specifying a null pointer for
820 SHORT_NAME clears the specified short name. */
822 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
824 assert (var != NULL);
825 assert (short_name == NULL || var_is_plausible_name (short_name, false));
827 /* Clear old short name numbered IDX, if any. */
828 if (idx < var->short_name_cnt)
830 free (var->short_names[idx]);
831 var->short_names[idx] = NULL;
834 /* Install new short name for IDX. */
835 if (short_name != NULL)
837 if (idx >= var->short_name_cnt)
839 size_t old_cnt = var->short_name_cnt;
841 var->short_name_cnt = MAX (idx * 2, 1);
842 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
843 sizeof *var->short_names);
844 for (i = old_cnt; i < var->short_name_cnt; i++)
845 var->short_names[i] = NULL;
847 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
848 str_uppercase (var->short_names[idx]);
851 dict_var_changed (var);
854 /* Clears V's short names. */
856 var_clear_short_names (struct variable *v)
860 for (i = 0; i < v->short_name_cnt; i++)
861 free (v->short_names[i]);
862 free (v->short_names);
863 v->short_names = NULL;
864 v->short_name_cnt = 0;
867 /* Relationship with dictionary. */
869 /* Returns V's index within its dictionary, the value
870 for which "dict_get_var (dict, index)" will return V.
871 V must be in a dictionary. */
873 var_get_dict_index (const struct variable *v)
875 assert (v->vardict.dict_index != -1);
876 return v->vardict.dict_index;
879 /* Returns V's index within the case represented by its
880 dictionary, that is, the value for which "case_data_idx (case,
881 index)" will return the data for V in that case.
882 V must be in a dictionary. */
884 var_get_case_index (const struct variable *v)
886 assert (v->vardict.case_index != -1);
887 return v->vardict.case_index;
890 /* Returns V's auxiliary data, or a null pointer if none has been
893 var_get_aux (const struct variable *v)
898 /* Assign auxiliary data AUX to variable V, which must not
899 already have auxiliary data. Before V's auxiliary data is
900 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
901 may be appropriate for use as AUX_DTOR.) */
903 var_attach_aux (const struct variable *v_,
904 void *aux, void (*aux_dtor) (struct variable *))
906 struct variable *v = (struct variable *) v_ ; /* cast away const */
907 assert (v->aux == NULL);
908 assert (aux != NULL);
910 v->aux_dtor = aux_dtor;
914 /* Remove auxiliary data, if any, from V, and return it, without
915 calling any associated destructor. */
917 var_detach_aux (struct variable *v)
920 assert (aux != NULL);
925 /* Clears auxiliary data, if any, from V, and calls any
926 associated destructor. */
928 var_clear_aux (struct variable *v)
933 if (v->aux_dtor != NULL)
939 /* This function is appropriate for use an auxiliary data
940 destructor (passed as AUX_DTOR to var_attach_aux()) for the
941 case where the auxiliary data should be passed to free(). */
943 var_dtor_free (struct variable *v)
948 /* Observed categorical values. */
950 /* Returns V's observed categorical values,
951 which V must have. */
953 var_get_obs_vals (const struct variable *v)
955 assert (v->obs_vals != NULL);
959 /* Sets V's observed categorical values to CAT_VALS.
960 V becomes the owner of CAT_VALS. */
962 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
964 struct variable *v = (struct variable *) v_ ; /* cast away const */
965 cat_stored_values_destroy (v->obs_vals);
966 v->obs_vals = cat_vals;
969 /* Returns true if V has observed categorical values,
972 var_has_obs_vals (const struct variable *v)
974 return v->obs_vals != NULL;
977 /* Returns V's vardict structure. */
978 const struct vardict_info *
979 var_get_vardict (const struct variable *v)
981 assert (var_has_vardict (v));
985 /* Sets V's vardict data to VARDICT. */
987 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
989 assert (vardict->dict_index >= 0);
990 assert (vardict->case_index >= 0);
991 v->vardict = *vardict;
994 /* Returns true if V has vardict data. */
996 var_has_vardict (const struct variable *v)
998 return v->vardict.dict_index != -1;
1001 /* Clears V's vardict data. */
1003 var_clear_vardict (struct variable *v)
1005 v->vardict.dict_index = v->vardict.case_index = -1;