1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/assertion.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
46 /* Dictionary information. */
47 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
48 int width; /* 0 for numeric, otherwise string width. */
49 struct missing_values miss; /* Missing values. */
50 struct fmt_spec print; /* Default format for PRINT. */
51 struct fmt_spec write; /* Default format for WRITE. */
52 struct val_labs *val_labs; /* Value labels. */
53 char *label; /* Variable label. */
55 /* GUI information. */
56 enum measure measure; /* Nominal, ordinal, or continuous. */
57 int display_width; /* Width of data editor column. */
58 enum alignment alignment; /* Alignment of data in GUI. */
60 /* Case information. */
61 bool leave; /* Leave value from case to case? */
63 /* Data for use by containing dictionary. */
64 struct vardict_info vardict;
66 /* Used only for system and portable file input and output.
69 size_t short_name_cnt;
71 /* Each command may use these fields as needed. */
73 void (*aux_dtor) (struct variable *);
75 /* Values of a categorical variable. Procedures need
76 vectors with binary entries, so any variable of type ALPHA will
77 have its values stored here. */
78 struct cat_vals *obs_vals;
81 /* Returns true if VAR_TYPE is a valid variable type. */
83 var_type_is_valid (enum var_type var_type)
85 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
88 /* Returns the variable type for the given width. */
90 var_type_from_width (int width)
92 return width != 0 ? VAR_STRING : VAR_NUMERIC;
95 /* Creates and returns a new variable with the given NAME and
96 WIDTH and other fields initialized to default values. The
97 variable is not added to a dictionary; for that, use
98 dict_create_var instead. */
100 var_create (const char *name, int width)
104 assert (width >= 0 && width <= MAX_STRING);
106 v = xmalloc (sizeof *v);
107 v->vardict.dict_index = v->vardict.case_index = -1;
108 var_set_name (v, name);
110 mv_init (&v->miss, width);
111 v->leave = var_must_leave (v);
112 if (var_is_numeric (v))
114 v->print = fmt_for_output (FMT_F, 8, 2);
115 v->alignment = ALIGN_RIGHT;
116 v->measure = MEASURE_SCALE;
120 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
121 v->alignment = ALIGN_LEFT;
122 v->measure = MEASURE_NOMINAL;
124 v->display_width = var_default_display_width (width);
128 v->short_names = NULL;
129 v->short_name_cnt = 0;
137 /* Creates and returns a clone of OLD_VAR. Most properties of
138 the new variable are copied from OLD_VAR, except:
140 - The variable's short name is not copied, because there is
141 no reason to give a new variable with potentially a new
142 name the same short name.
144 - The new variable is not added to OLD_VAR's dictionary by
145 default. Use dict_clone_var, instead, to do that.
147 - Auxiliary data and obs_vals are not copied. */
149 var_clone (const struct variable *old_var)
151 struct variable *new_var = var_create (var_get_name (old_var),
152 var_get_width (old_var));
154 var_set_missing_values (new_var, var_get_missing_values (old_var));
155 var_set_print_format (new_var, var_get_print_format (old_var));
156 var_set_write_format (new_var, var_get_write_format (old_var));
157 var_set_value_labels (new_var, var_get_value_labels (old_var));
158 var_set_label (new_var, var_get_label (old_var));
159 var_set_measure (new_var, var_get_measure (old_var));
160 var_set_display_width (new_var, var_get_display_width (old_var));
161 var_set_alignment (new_var, var_get_alignment (old_var));
162 var_set_leave (new_var, var_get_leave (old_var));
167 /* Destroys variable V.
168 V must not belong to a dictionary. If it does, use
169 dict_delete_var instead. */
171 var_destroy (struct variable *v)
175 assert (!var_has_vardict (v));
176 cat_stored_values_destroy (v->obs_vals);
177 var_clear_short_names (v);
179 val_labs_destroy (v->val_labs);
185 /* Variable names. */
187 /* Return variable V's name. */
189 var_get_name (const struct variable *v)
194 /* Sets V's name to NAME.
195 Do not use this function for a variable in a dictionary. Use
196 dict_rename_var instead. */
198 var_set_name (struct variable *v, const char *name)
200 assert (v->vardict.dict_index == -1);
201 assert (var_is_plausible_name (name, false));
203 str_copy_trunc (v->name, sizeof v->name, name);
204 dict_var_changed (v);
207 /* Returns true if NAME is an acceptable name for a variable,
208 false otherwise. If ISSUE_ERROR is true, issues an
209 explanatory error message on failure. */
211 var_is_valid_name (const char *name, bool issue_error)
216 assert (name != NULL);
218 /* Note that strlen returns number of BYTES, not the number of
220 length = strlen (name);
222 plausible = var_is_plausible_name(name, issue_error);
228 if (!lex_is_id1 (name[0]))
231 msg (SE, _("Character `%c' (in %s) may not appear "
232 "as the first character in a variable name."),
238 for (i = 0; i < length; i++)
240 if (!lex_is_idn (name[i]))
243 msg (SE, _("Character `%c' (in %s) may not appear in "
253 /* Returns true if NAME is an plausible name for a variable,
254 false otherwise. If ISSUE_ERROR is true, issues an
255 explanatory error message on failure.
256 This function makes no use of LC_CTYPE.
259 var_is_plausible_name (const char *name, bool issue_error)
263 assert (name != NULL);
265 /* Note that strlen returns number of BYTES, not the number of
267 length = strlen (name);
271 msg (SE, _("Variable name cannot be empty string."));
274 else if (length > LONG_NAME_LEN)
277 msg (SE, _("Variable name %s exceeds %d-character limit."),
278 name, (int) LONG_NAME_LEN);
282 if (lex_id_to_token (ss_cstr (name)) != T_ID)
285 msg (SE, _("`%s' may not be used as a variable name because it "
286 "is a reserved word."), name);
293 /* A hsh_compare_func that orders variables A and B by their
296 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
298 const struct variable *a = a_;
299 const struct variable *b = b_;
301 return strcasecmp (a->name, b->name);
304 /* A hsh_hash_func that hashes variable V based on its name. */
306 hash_var_by_name (const void *v_, const void *aux UNUSED)
308 const struct variable *v = v_;
310 return hsh_hash_case_string (v->name);
313 /* A hsh_compare_func that orders pointers to variables A and B
316 compare_var_ptrs_by_name (const void *a_, const void *b_,
317 const void *aux UNUSED)
319 struct variable *const *a = a_;
320 struct variable *const *b = b_;
322 return strcasecmp (var_get_name (*a), var_get_name (*b));
325 /* A hsh_hash_func that hashes pointer to variable V based on its
328 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
330 struct variable *const *v = v_;
332 return hsh_hash_case_string (var_get_name (*v));
335 /* Returns the type of variable V. */
337 var_get_type (const struct variable *v)
339 return var_type_from_width (v->width);
342 /* Returns the width of variable V. */
344 var_get_width (const struct variable *v)
349 /* Sets the width of V to WIDTH. */
351 var_set_width (struct variable *v, int new_width)
353 const int old_width = v->width;
355 if (mv_is_resizable (&v->miss, new_width))
356 mv_resize (&v->miss, new_width);
358 mv_init (&v->miss, new_width);
360 if (v->val_labs != NULL)
362 if (val_labs_can_set_width (v->val_labs, new_width))
363 val_labs_set_width (v->val_labs, new_width);
366 val_labs_destroy (v->val_labs);
371 fmt_resize (&v->print, new_width);
372 fmt_resize (&v->write, new_width);
374 v->width = new_width;
377 const int old_val_count = value_cnt_from_width (old_width);
378 const int new_val_count = value_cnt_from_width (new_width);
380 if ( old_val_count != new_val_count)
381 dict_var_resized (v, new_val_count - old_val_count);
384 dict_var_changed (v);
387 /* Returns true if variable V is numeric, false otherwise. */
389 var_is_numeric (const struct variable *v)
391 return var_get_type (v) == VAR_NUMERIC;
394 /* Returns true if variable V is a string variable, false
397 var_is_alpha (const struct variable *v)
399 return var_get_type (v) == VAR_STRING;
402 /* Returns true if variable V is a short string variable, false
405 var_is_short_string (const struct variable *v)
407 return v->width > 0 && v->width <= MAX_SHORT_STRING;
410 /* Returns true if variable V is a long string variable, false
413 var_is_long_string (const struct variable *v)
415 return v->width > MAX_SHORT_STRING;
418 /* Returns the number of "union value"s need to store a value of
421 var_get_value_cnt (const struct variable *v)
423 return value_cnt_from_width (v->width);
426 /* Returns variable V's missing values. */
427 const struct missing_values *
428 var_get_missing_values (const struct variable *v)
433 /* Sets variable V's missing values to MISS, which must be of V's
434 width or at least resizable to V's width.
435 If MISS is null, then V's missing values, if any, are
438 var_set_missing_values (struct variable *v, const struct missing_values *miss)
442 assert (mv_is_resizable (miss, v->width));
443 mv_copy (&v->miss, miss);
444 mv_resize (&v->miss, v->width);
447 mv_init (&v->miss, v->width);
449 dict_var_changed (v);
452 /* Sets variable V to have no user-missing values. */
454 var_clear_missing_values (struct variable *v)
456 var_set_missing_values (v, NULL);
459 /* Returns true if V has any user-missing values,
462 var_has_missing_values (const struct variable *v)
464 return !mv_is_empty (&v->miss);
467 /* Returns true if VALUE is in the given CLASS of missing values
468 in V, false otherwise. */
470 var_is_value_missing (const struct variable *v, const union value *value,
473 return mv_is_value_missing (&v->miss, value, class);
476 /* Returns true if D is in the given CLASS of missing values in
478 V must be a numeric variable. */
480 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
482 return mv_is_num_missing (&v->miss, d, class);
485 /* Returns true if S[] is a missing value for V, false otherwise.
486 S[] must contain exactly as many characters as V's width.
487 V must be a string variable. */
489 var_is_str_missing (const struct variable *v, const char s[],
492 return mv_is_str_missing (&v->miss, s, class);
495 /* Returns variable V's value labels,
496 possibly a null pointer if it has none. */
497 const struct val_labs *
498 var_get_value_labels (const struct variable *v)
503 /* Returns true if variable V has at least one value label. */
505 var_has_value_labels (const struct variable *v)
507 return val_labs_count (v->val_labs) > 0;
510 /* Sets variable V's value labels to a copy of VLS,
511 which must have a width equal to V's width or one that can be
512 changed to V's width.
513 If VLS is null, then V's value labels, if any, are removed. */
515 var_set_value_labels (struct variable *v, const struct val_labs *vls)
517 val_labs_destroy (v->val_labs);
522 assert (val_labs_can_set_width (vls, v->width));
523 v->val_labs = val_labs_copy (vls);
524 val_labs_set_width (v->val_labs, v->width);
525 dict_var_changed (v);
529 /* Makes sure that V has a set of value labels,
530 by assigning one to it if necessary. */
532 alloc_value_labels (struct variable *v)
534 assert (!var_is_long_string (v));
535 if (v->val_labs == NULL)
536 v->val_labs = val_labs_create (v->width);
539 /* Attempts to add a value label with the given VALUE and LABEL
540 to V. Returns true if successful, false if VALUE has an
542 V must not be a long string variable. */
544 var_add_value_label (struct variable *v,
545 const union value *value, const char *label)
547 alloc_value_labels (v);
548 return val_labs_add (v->val_labs, *value, label);
551 /* Adds or replaces a value label with the given VALUE and LABEL
553 V must not be a long string variable. */
555 var_replace_value_label (struct variable *v,
556 const union value *value, const char *label)
558 alloc_value_labels (v);
559 val_labs_replace (v->val_labs, *value, label);
562 /* Removes V's value labels, if any. */
564 var_clear_value_labels (struct variable *v)
566 var_set_value_labels (v, NULL);
569 /* Returns the label associated with VALUE for variable V,
570 or a null pointer if none. */
572 var_lookup_value_label (const struct variable *v, const union value *value)
574 return val_labs_find (v->val_labs, *value);
577 /* Get a string representing VALUE for variable V.
578 That is, if VALUE has a label, return that label,
579 otherwise format VALUE and return the formatted string. */
581 var_get_value_name (const struct variable *v, const union value *value)
583 const char *name = var_lookup_value_label (v, value);
586 static char buf[MAX_STRING + 1];
587 data_out (value, &v->print, buf);
588 buf[v->print.w] = '\0';
594 /* Print and write formats. */
596 /* Returns V's print format specification. */
597 const struct fmt_spec *
598 var_get_print_format (const struct variable *v)
603 /* Sets V's print format specification to PRINT, which must be a
604 valid format specification for outputting a variable of V's
607 var_set_print_format (struct variable *v, const struct fmt_spec *print)
609 assert (fmt_check_width_compat (print, v->width));
611 dict_var_changed (v);
614 /* Returns V's write format specification. */
615 const struct fmt_spec *
616 var_get_write_format (const struct variable *v)
621 /* Sets V's write format specification to WRITE, which must be a
622 valid format specification for outputting a variable of V's
625 var_set_write_format (struct variable *v, const struct fmt_spec *write)
627 assert (fmt_check_width_compat (write, v->width));
629 dict_var_changed (v);
632 /* Sets V's print and write format specifications to FORMAT,
633 which must be a valid format specification for outputting a
634 variable of V's width. */
636 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
638 var_set_print_format (v, format);
639 var_set_write_format (v, format);
642 /* Return a string representing this variable, in the form most
643 appropriate from a human factors perspective, that is, its
644 variable label if it has one, otherwise its name. */
646 var_to_string (const struct variable *v)
648 return v->label != NULL ? v->label : v->name;
651 /* Returns V's variable label, or a null pointer if it has none. */
653 var_get_label (const struct variable *v)
658 /* Sets V's variable label to LABEL, stripping off leading and
659 trailing white space and truncating to 255 characters.
660 If LABEL is a null pointer or if LABEL is an empty string
661 (after stripping white space), then V's variable label (if
664 var_set_label (struct variable *v, const char *label)
671 struct substring s = ss_cstr (label);
672 ss_trim (&s, ss_cstr (CC_SPACES));
673 ss_truncate (&s, 255);
674 if (!ss_is_empty (s))
675 v->label = ss_xstrdup (s);
676 dict_var_changed (v);
680 /* Removes any variable label from V. */
682 var_clear_label (struct variable *v)
684 var_set_label (v, NULL);
687 /* Returns true if V has a variable V,
690 var_has_label (const struct variable *v)
692 return v->label != NULL;
695 /* Returns true if M is a valid variable measurement level,
698 measure_is_valid (enum measure m)
700 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
703 /* Returns V's measurement level. */
705 var_get_measure (const struct variable *v)
710 /* Sets V's measurement level to MEASURE. */
712 var_set_measure (struct variable *v, enum measure measure)
714 assert (measure_is_valid (measure));
715 v->measure = measure;
716 dict_var_changed (v);
719 /* Returns V's display width, which applies only to GUIs. */
721 var_get_display_width (const struct variable *v)
723 return v->display_width;
726 /* Sets V's display width to DISPLAY_WIDTH. */
728 var_set_display_width (struct variable *v, int display_width)
730 v->display_width = display_width;
731 dict_var_changed (v);
734 /* Returns the default display width for a variable of the given
735 WIDTH, as set by var_create. The return value can be used to
736 reset a variable's display width to the default. */
738 var_default_display_width (int width)
740 return width == 0 ? 8 : MIN (width, 32);
743 /* Returns true if A is a valid alignment,
746 alignment_is_valid (enum alignment a)
748 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
751 /* Returns V's display alignment, which applies only to GUIs. */
753 var_get_alignment (const struct variable *v)
758 /* Sets V's display alignment to ALIGNMENT. */
760 var_set_alignment (struct variable *v, enum alignment alignment)
762 assert (alignment_is_valid (alignment));
763 v->alignment = alignment;
764 dict_var_changed (v);
767 /* Whether variables' values should be preserved from case to
770 /* Returns true if variable V's value should be left from case to
771 case, instead of being reset to 0, system-missing, or blanks. */
773 var_get_leave (const struct variable *v)
778 /* Sets V's leave setting to LEAVE. */
780 var_set_leave (struct variable *v, bool leave)
782 assert (leave || !var_must_leave (v));
784 dict_var_changed (v);
787 /* Returns true if V must be left from case to case,
788 false if it can be set either way. */
790 var_must_leave (const struct variable *v)
792 return dict_class_from_id (v->name) == DC_SCRATCH;
795 /* Returns the number of short names stored in VAR.
797 Short names are used only for system and portable file input
798 and output. They are upper-case only, not necessarily unique,
799 and limited to SHORT_NAME_LEN characters (plus a null
800 terminator). Ordinarily a variable has at most one short
801 name, but very long string variables (longer than 255 bytes)
802 may have more. A variable might not have any short name at
803 all if it hasn't been saved to or read from a system or
806 var_get_short_name_cnt (const struct variable *var)
808 return var->short_name_cnt;
811 /* Returns VAR's short name with the given IDX, if it has one
812 with that index, or a null pointer otherwise. Short names may
813 be sparse: even if IDX is less than the number of short names
814 in VAR, this function may return a null pointer. */
816 var_get_short_name (const struct variable *var, size_t idx)
818 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
821 /* Sets VAR's short name with the given IDX to SHORT_NAME,
822 truncating it to SHORT_NAME_LEN characters and converting it
823 to uppercase in the process. Specifying a null pointer for
824 SHORT_NAME clears the specified short name. */
826 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
828 assert (var != NULL);
829 assert (short_name == NULL || var_is_plausible_name (short_name, false));
831 /* Clear old short name numbered IDX, if any. */
832 if (idx < var->short_name_cnt)
834 free (var->short_names[idx]);
835 var->short_names[idx] = NULL;
838 /* Install new short name for IDX. */
839 if (short_name != NULL)
841 if (idx >= var->short_name_cnt)
843 size_t old_cnt = var->short_name_cnt;
845 var->short_name_cnt = MAX (idx * 2, 1);
846 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
847 sizeof *var->short_names);
848 for (i = old_cnt; i < var->short_name_cnt; i++)
849 var->short_names[i] = NULL;
851 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
852 str_uppercase (var->short_names[idx]);
855 dict_var_changed (var);
858 /* Clears V's short names. */
860 var_clear_short_names (struct variable *v)
864 for (i = 0; i < v->short_name_cnt; i++)
865 free (v->short_names[i]);
866 free (v->short_names);
867 v->short_names = NULL;
868 v->short_name_cnt = 0;
871 /* Relationship with dictionary. */
873 /* Returns V's index within its dictionary, the value
874 for which "dict_get_var (dict, index)" will return V.
875 V must be in a dictionary. */
877 var_get_dict_index (const struct variable *v)
879 assert (v->vardict.dict_index != -1);
880 return v->vardict.dict_index;
883 /* Returns V's index within the case represented by its
884 dictionary, that is, the value for which "case_data_idx (case,
885 index)" will return the data for V in that case.
886 V must be in a dictionary. */
888 var_get_case_index (const struct variable *v)
890 assert (v->vardict.case_index != -1);
891 return v->vardict.case_index;
894 /* Returns V's auxiliary data, or a null pointer if none has been
897 var_get_aux (const struct variable *v)
902 /* Assign auxiliary data AUX to variable V, which must not
903 already have auxiliary data. Before V's auxiliary data is
904 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
905 may be appropriate for use as AUX_DTOR.) */
907 var_attach_aux (const struct variable *v_,
908 void *aux, void (*aux_dtor) (struct variable *))
910 struct variable *v = (struct variable *) v_ ; /* cast away const */
911 assert (v->aux == NULL);
912 assert (aux != NULL);
914 v->aux_dtor = aux_dtor;
918 /* Remove auxiliary data, if any, from V, and return it, without
919 calling any associated destructor. */
921 var_detach_aux (struct variable *v)
924 assert (aux != NULL);
929 /* Clears auxiliary data, if any, from V, and calls any
930 associated destructor. */
932 var_clear_aux (struct variable *v)
937 if (v->aux_dtor != NULL)
943 /* This function is appropriate for use an auxiliary data
944 destructor (passed as AUX_DTOR to var_attach_aux()) for the
945 case where the auxiliary data should be passed to free(). */
947 var_dtor_free (struct variable *v)
952 /* Observed categorical values. */
954 /* Returns V's observed categorical values,
955 which V must have. */
957 var_get_obs_vals (const struct variable *v)
959 assert (v->obs_vals != NULL);
963 /* Sets V's observed categorical values to CAT_VALS. */
965 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
967 struct variable *v = (struct variable *) v_ ; /* cast away const */
968 cat_stored_values_destroy (v->obs_vals);
969 v->obs_vals = cat_vals;
972 /* Returns true if V has observed categorical values,
975 var_has_obs_vals (const struct variable *v)
977 return v->obs_vals != NULL;
980 /* Returns the dictionary class corresponding to a variable named
983 dict_class_from_id (const char *name)
996 /* Returns the name of dictionary class DICT_CLASS. */
998 dict_class_to_name (enum dict_class dict_class)
1003 return _("ordinary");
1007 return _("scratch");
1013 /* Returns V's vardict structure. */
1014 const struct vardict_info *
1015 var_get_vardict (const struct variable *v)
1017 assert (var_has_vardict (v));
1021 /* Sets V's vardict data to VARDICT. */
1023 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1025 assert (vardict->dict_index >= 0);
1026 assert (vardict->case_index >= 0);
1027 v->vardict = *vardict;
1030 /* Returns true if V has vardict data. */
1032 var_has_vardict (const struct variable *v)
1034 return v->vardict.dict_index != -1;
1037 /* Clears V's vardict data. */
1039 var_clear_vardict (struct variable *v)
1041 v->vardict.dict_index = v->vardict.case_index = -1;