1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "dictionary.h"
27 #include "identifier.h"
28 #include "missing-values.h"
29 #include "value-labels.h"
32 #include <libpspp/misc.h>
33 #include <libpspp/alloc.h>
34 #include <libpspp/assertion.h>
35 #include <libpspp/compiler.h>
36 #include <libpspp/hash.h>
37 #include <libpspp/message.h>
38 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
46 /* Dictionary information. */
47 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
48 int width; /* 0 for numeric, otherwise string width. */
49 struct missing_values miss; /* Missing values. */
50 struct fmt_spec print; /* Default format for PRINT. */
51 struct fmt_spec write; /* Default format for WRITE. */
52 struct val_labs *val_labs; /* Value labels. */
53 char *label; /* Variable label. */
55 /* GUI information. */
56 enum measure measure; /* Nominal, ordinal, or continuous. */
57 int display_width; /* Width of data editor column. */
58 enum alignment alignment; /* Alignment of data in GUI. */
60 /* Case information. */
61 bool leave; /* Leave value from case to case? */
63 /* Data for use by containing dictionary. */
64 struct vardict_info vardict;
66 /* Short name, used only for system and portable file input
67 and output. Upper case only. Short names are not necessarily
68 unique. Any variable may have no short name, indicated by an
70 char short_name[SHORT_NAME_LEN + 1];
72 /* Each command may use these fields as needed. */
74 void (*aux_dtor) (struct variable *);
76 /* Values of a categorical variable. Procedures need
77 vectors with binary entries, so any variable of type ALPHA will
78 have its values stored here. */
79 struct cat_vals *obs_vals;
82 /* Returns true if VAR_TYPE is a valid variable type. */
84 var_type_is_valid (enum var_type var_type)
86 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
89 /* Returns the variable type for the given width. */
91 var_type_from_width (int width)
93 return width != 0 ? VAR_STRING : VAR_NUMERIC;
96 /* Creates and returns a new variable with the given NAME and
97 WIDTH and other fields initialized to default values. The
98 variable is not added to a dictionary; for that, use
99 dict_create_var instead. */
101 var_create (const char *name, int width)
105 assert (width >= 0 && width <= MAX_STRING);
107 v = xmalloc (sizeof *v);
108 v->vardict.dict_index = v->vardict.case_index = -1;
109 var_set_name (v, name);
111 mv_init (&v->miss, width);
112 v->leave = var_must_leave (v);
113 if (var_is_numeric (v))
115 v->print = fmt_for_output (FMT_F, 8, 2);
116 v->alignment = ALIGN_RIGHT;
117 v->display_width = 8;
118 v->measure = MEASURE_SCALE;
122 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
123 v->alignment = ALIGN_LEFT;
124 v->display_width = 8;
125 v->measure = MEASURE_NOMINAL;
130 var_clear_short_name (v);
138 /* Creates and returns a clone of OLD_VAR. Most properties of
139 the new variable are copied from OLD_VAR, except:
141 - The variable's short name is not copied, because there is
142 no reason to give a new variable with potentially a new
143 name the same short name.
145 - The new variable is not added to OLD_VAR's dictionary by
146 default. Use dict_clone_var, instead, to do that.
148 - Auxiliary data and obs_vals are not copied. */
150 var_clone (const struct variable *old_var)
152 struct variable *new_var = var_create (var_get_name (old_var),
153 var_get_width (old_var));
155 var_set_missing_values (new_var, var_get_missing_values (old_var));
156 var_set_print_format (new_var, var_get_print_format (old_var));
157 var_set_write_format (new_var, var_get_write_format (old_var));
158 var_set_value_labels (new_var, var_get_value_labels (old_var));
159 var_set_label (new_var, var_get_label (old_var));
160 var_set_measure (new_var, var_get_measure (old_var));
161 var_set_display_width (new_var, var_get_display_width (old_var));
162 var_set_alignment (new_var, var_get_alignment (old_var));
163 var_set_leave (new_var, var_get_leave (old_var));
168 /* Destroys variable V.
169 V must not belong to a dictionary. If it does, use
170 dict_delete_var instead. */
172 var_destroy (struct variable *v)
176 assert (!var_has_vardict (v));
177 cat_stored_values_destroy (v->obs_vals);
179 val_labs_destroy (v->val_labs);
185 /* Variable names. */
187 /* Return variable V's name. */
189 var_get_name (const struct variable *v)
194 /* Sets V's name to NAME.
195 Do not use this function for a variable in a dictionary. Use
196 dict_rename_var instead. */
198 var_set_name (struct variable *v, const char *name)
200 assert (v->vardict.dict_index == -1);
201 assert (var_is_plausible_name (name, false));
203 str_copy_trunc (v->name, sizeof v->name, name);
204 dict_var_changed (v);
207 /* Returns true if NAME is an acceptable name for a variable,
208 false otherwise. If ISSUE_ERROR is true, issues an
209 explanatory error message on failure. */
211 var_is_valid_name (const char *name, bool issue_error)
216 assert (name != NULL);
218 /* Note that strlen returns number of BYTES, not the number of
220 length = strlen (name);
222 plausible = var_is_plausible_name(name, issue_error);
228 if (!lex_is_id1 (name[0]))
231 msg (SE, _("Character `%c' (in %s) may not appear "
232 "as the first character in a variable name."),
238 for (i = 0; i < length; i++)
240 if (!lex_is_idn (name[i]))
243 msg (SE, _("Character `%c' (in %s) may not appear in "
253 /* Returns true if NAME is an plausible name for a variable,
254 false otherwise. If ISSUE_ERROR is true, issues an
255 explanatory error message on failure.
256 This function makes no use of LC_CTYPE.
259 var_is_plausible_name (const char *name, bool issue_error)
263 assert (name != NULL);
265 /* Note that strlen returns number of BYTES, not the number of
267 length = strlen (name);
271 msg (SE, _("Variable name cannot be empty string."));
274 else if (length > LONG_NAME_LEN)
277 msg (SE, _("Variable name %s exceeds %d-character limit."),
278 name, (int) LONG_NAME_LEN);
282 if (lex_id_to_token (ss_cstr (name)) != T_ID)
285 msg (SE, _("`%s' may not be used as a variable name because it "
286 "is a reserved word."), name);
293 /* A hsh_compare_func that orders variables A and B by their
296 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
298 const struct variable *a = a_;
299 const struct variable *b = b_;
301 return strcasecmp (a->name, b->name);
304 /* A hsh_hash_func that hashes variable V based on its name. */
306 hash_var_by_name (const void *v_, const void *aux UNUSED)
308 const struct variable *v = v_;
310 return hsh_hash_case_string (v->name);
313 /* A hsh_compare_func that orders pointers to variables A and B
316 compare_var_ptrs_by_name (const void *a_, const void *b_,
317 const void *aux UNUSED)
319 struct variable *const *a = a_;
320 struct variable *const *b = b_;
322 return strcasecmp (var_get_name (*a), var_get_name (*b));
325 /* A hsh_hash_func that hashes pointer to variable V based on its
328 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
330 struct variable *const *v = v_;
332 return hsh_hash_case_string (var_get_name (*v));
335 /* Returns the type of variable V. */
337 var_get_type (const struct variable *v)
339 return var_type_from_width (v->width);
342 /* Returns the width of variable V. */
344 var_get_width (const struct variable *v)
349 /* Sets the width of V to WIDTH. */
351 var_set_width (struct variable *v, int new_width)
353 const int old_width = v->width;
354 enum var_type new_type = var_type_from_width (new_width);
356 if (mv_is_resizable (&v->miss, new_width))
357 mv_resize (&v->miss, new_width);
359 mv_init (&v->miss, new_width);
361 if (v->val_labs != NULL)
363 if (val_labs_can_set_width (v->val_labs, new_width))
364 val_labs_set_width (v->val_labs, new_width);
367 val_labs_destroy (v->val_labs);
372 if (var_get_type (v) != new_type)
374 v->print = (new_type == VAR_NUMERIC
375 ? fmt_for_output (FMT_F, 8, 2)
376 : fmt_for_output (FMT_A, new_width, 0));
379 else if (new_type == VAR_STRING)
381 v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width;
382 v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width;
385 v->width = new_width;
388 const int old_val_count = value_cnt_from_width (old_width);
389 const int new_val_count = value_cnt_from_width (new_width);
391 if ( old_val_count != new_val_count)
392 dict_var_resized (v, new_val_count - old_val_count);
395 dict_var_changed (v);
398 /* Returns true if variable V is numeric, false otherwise. */
400 var_is_numeric (const struct variable *v)
402 return var_get_type (v) == VAR_NUMERIC;
405 /* Returns true if variable V is a string variable, false
408 var_is_alpha (const struct variable *v)
410 return var_get_type (v) == VAR_STRING;
413 /* Returns true if variable V is a short string variable, false
416 var_is_short_string (const struct variable *v)
418 return v->width > 0 && v->width <= MAX_SHORT_STRING;
421 /* Returns true if variable V is a long string variable, false
424 var_is_long_string (const struct variable *v)
426 return v->width > MAX_SHORT_STRING;
429 /* Returns the number of "union value"s need to store a value of
432 var_get_value_cnt (const struct variable *v)
434 return value_cnt_from_width (v->width);
437 /* Returns variable V's missing values. */
438 const struct missing_values *
439 var_get_missing_values (const struct variable *v)
444 /* Sets variable V's missing values to MISS, which must be of V's
445 width or at least resizable to V's width.
446 If MISS is null, then V's missing values, if any, are
449 var_set_missing_values (struct variable *v, const struct missing_values *miss)
453 assert (mv_is_resizable (miss, v->width));
454 mv_copy (&v->miss, miss);
455 mv_resize (&v->miss, v->width);
458 mv_init (&v->miss, v->width);
460 dict_var_changed (v);
463 /* Sets variable V to have no user-missing values. */
465 var_clear_missing_values (struct variable *v)
467 var_set_missing_values (v, NULL);
470 /* Returns true if V has any user-missing values,
473 var_has_missing_values (const struct variable *v)
475 return !mv_is_empty (&v->miss);
478 /* Returns true if VALUE is in the given CLASS of missing values
479 in V, false otherwise. */
481 var_is_value_missing (const struct variable *v, const union value *value,
484 return mv_is_value_missing (&v->miss, value, class);
487 /* Returns true if D is in the given CLASS of missing values in
489 V must be a numeric variable. */
491 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
493 return mv_is_num_missing (&v->miss, d, class);
496 /* Returns true if S[] is a missing value for V, false otherwise.
497 S[] must contain exactly as many characters as V's width.
498 V must be a string variable. */
500 var_is_str_missing (const struct variable *v, const char s[],
503 return mv_is_str_missing (&v->miss, s, class);
506 /* Returns variable V's value labels,
507 possibly a null pointer if it has none. */
508 const struct val_labs *
509 var_get_value_labels (const struct variable *v)
514 /* Returns true if variable V has at least one value label. */
516 var_has_value_labels (const struct variable *v)
518 return val_labs_count (v->val_labs) > 0;
521 /* Sets variable V's value labels to a copy of VLS,
522 which must have a width equal to V's width or one that can be
523 changed to V's width.
524 If VLS is null, then V's value labels, if any, are removed. */
526 var_set_value_labels (struct variable *v, const struct val_labs *vls)
528 val_labs_destroy (v->val_labs);
533 assert (val_labs_can_set_width (vls, v->width));
534 v->val_labs = val_labs_copy (vls);
535 val_labs_set_width (v->val_labs, v->width);
536 dict_var_changed (v);
540 /* Makes sure that V has a set of value labels,
541 by assigning one to it if necessary. */
543 alloc_value_labels (struct variable *v)
545 assert (!var_is_long_string (v));
546 if (v->val_labs == NULL)
547 v->val_labs = val_labs_create (v->width);
550 /* Attempts to add a value label with the given VALUE and LABEL
551 to V. Returns true if successful, false if VALUE has an
553 V must not be a long string variable. */
555 var_add_value_label (struct variable *v,
556 const union value *value, const char *label)
558 alloc_value_labels (v);
559 return val_labs_add (v->val_labs, *value, label);
562 /* Adds or replaces a value label with the given VALUE and LABEL
564 V must not be a long string variable. */
566 var_replace_value_label (struct variable *v,
567 const union value *value, const char *label)
569 alloc_value_labels (v);
570 val_labs_replace (v->val_labs, *value, label);
573 /* Removes V's value labels, if any. */
575 var_clear_value_labels (struct variable *v)
577 var_set_value_labels (v, NULL);
580 /* Returns the label associated with VALUE for variable V,
581 or a null pointer if none. */
583 var_lookup_value_label (const struct variable *v, const union value *value)
585 return val_labs_find (v->val_labs, *value);
588 /* Get a string representing VALUE for variable V.
589 That is, if VALUE has a label, return that label,
590 otherwise format VALUE and return the formatted string. */
592 var_get_value_name (const struct variable *v, const union value *value)
594 const char *name = var_lookup_value_label (v, value);
597 static char buf[MAX_STRING + 1];
598 data_out (value, &v->print, buf);
599 buf[v->print.w] = '\0';
605 /* Print and write formats. */
607 /* Returns V's print format specification. */
608 const struct fmt_spec *
609 var_get_print_format (const struct variable *v)
614 /* Sets V's print format specification to PRINT, which must be a
615 valid format specification for outputting a variable of V's
618 var_set_print_format (struct variable *v, const struct fmt_spec *print)
620 assert (fmt_check_width_compat (print, v->width));
622 dict_var_changed (v);
625 /* Returns V's write format specification. */
626 const struct fmt_spec *
627 var_get_write_format (const struct variable *v)
632 /* Sets V's write format specification to WRITE, which must be a
633 valid format specification for outputting a variable of V's
636 var_set_write_format (struct variable *v, const struct fmt_spec *write)
638 assert (fmt_check_width_compat (write, v->width));
640 dict_var_changed (v);
643 /* Sets V's print and write format specifications to FORMAT,
644 which must be a valid format specification for outputting a
645 variable of V's width. */
647 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
649 var_set_print_format (v, format);
650 var_set_write_format (v, format);
653 /* Return a string representing this variable, in the form most
654 appropriate from a human factors perspective, that is, its
655 variable label if it has one, otherwise its name. */
657 var_to_string (const struct variable *v)
659 return v->label != NULL ? v->label : v->name;
662 /* Returns V's variable label, or a null pointer if it has none. */
664 var_get_label (const struct variable *v)
669 /* Sets V's variable label to LABEL, stripping off leading and
670 trailing white space and truncating to 255 characters.
671 If LABEL is a null pointer or if LABEL is an empty string
672 (after stripping white space), then V's variable label (if
675 var_set_label (struct variable *v, const char *label)
682 struct substring s = ss_cstr (label);
683 ss_trim (&s, ss_cstr (CC_SPACES));
684 ss_truncate (&s, 255);
685 if (!ss_is_empty (s))
686 v->label = ss_xstrdup (s);
687 dict_var_changed (v);
691 /* Removes any variable label from V. */
693 var_clear_label (struct variable *v)
695 var_set_label (v, NULL);
698 /* Returns true if V has a variable V,
701 var_has_label (const struct variable *v)
703 return v->label != NULL;
706 /* Returns true if M is a valid variable measurement level,
709 measure_is_valid (enum measure m)
711 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
714 /* Returns V's measurement level. */
716 var_get_measure (const struct variable *v)
721 /* Sets V's measurement level to MEASURE. */
723 var_set_measure (struct variable *v, enum measure measure)
725 assert (measure_is_valid (measure));
726 v->measure = measure;
727 dict_var_changed (v);
730 /* Returns V's display width, which applies only to GUIs. */
732 var_get_display_width (const struct variable *v)
734 return v->display_width;
740 /* Sets V's display width to DISPLAY_WIDTH. */
742 var_set_display_width (struct variable *v, int display_width)
744 v->display_width = display_width;
745 dict_var_changed (v);
748 /* Returns true if A is a valid alignment,
751 alignment_is_valid (enum alignment a)
753 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
756 /* Returns V's display alignment, which applies only to GUIs. */
758 var_get_alignment (const struct variable *v)
763 /* Sets V's display alignment to ALIGNMENT. */
765 var_set_alignment (struct variable *v, enum alignment alignment)
767 assert (alignment_is_valid (alignment));
768 v->alignment = alignment;
769 dict_var_changed (v);
772 /* Whether variables' values should be preserved from case to
775 /* Returns true if variable V's value should be left from case to
776 case, instead of being reset to 0, system-missing, or blanks. */
778 var_get_leave (const struct variable *v)
783 /* Sets V's leave setting to LEAVE. */
785 var_set_leave (struct variable *v, bool leave)
787 assert (leave || !var_must_leave (v));
789 dict_var_changed (v);
792 /* Returns true if V must be left from case to case,
793 false if it can be set either way. */
795 var_must_leave (const struct variable *v)
797 return dict_class_from_id (v->name) == DC_SCRATCH;
800 /* Returns V's short name, if it has one, or a null pointer
803 Short names are used only for system and portable file input
804 and output. They are upper-case only, not necessarily unique,
805 and limited to SHORT_NAME_LEN characters (plus a null
806 terminator). Any variable may have no short name, indicated
807 by returning a null pointer. */
809 var_get_short_name (const struct variable *v)
811 return v->short_name[0] != '\0' ? v->short_name : NULL;
814 /* Sets V's short_name to SHORT_NAME, truncating it to
815 SHORT_NAME_LEN characters and converting it to uppercase in
816 the process. Specifying a null pointer for SHORT_NAME clears
817 the variable's short name. */
819 var_set_short_name (struct variable *v, const char *short_name)
822 assert (short_name == NULL || var_is_plausible_name (short_name, false));
824 if (short_name != NULL)
826 str_copy_trunc (v->short_name, sizeof v->short_name, short_name);
827 str_uppercase (v->short_name);
830 v->short_name[0] = '\0';
831 dict_var_changed (v);
834 /* Clears V's short name. */
836 var_clear_short_name (struct variable *v)
840 v->short_name[0] = '\0';
843 /* Relationship with dictionary. */
845 /* Returns V's index within its dictionary, the value
846 for which "dict_get_var (dict, index)" will return V.
847 V must be in a dictionary. */
849 var_get_dict_index (const struct variable *v)
851 assert (v->vardict.dict_index != -1);
852 return v->vardict.dict_index;
855 /* Returns V's index within the case represented by its
856 dictionary, that is, the value for which "case_data_idx (case,
857 index)" will return the data for V in that case.
858 V must be in a dictionary. */
860 var_get_case_index (const struct variable *v)
862 assert (v->vardict.case_index != -1);
863 return v->vardict.case_index;
866 /* Returns V's auxiliary data, or a null pointer if none has been
869 var_get_aux (const struct variable *v)
874 /* Assign auxiliary data AUX to variable V, which must not
875 already have auxiliary data. Before V's auxiliary data is
876 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
877 may be appropriate for use as AUX_DTOR.) */
879 var_attach_aux (const struct variable *v_,
880 void *aux, void (*aux_dtor) (struct variable *))
882 struct variable *v = (struct variable *) v_ ; /* cast away const */
883 assert (v->aux == NULL);
884 assert (aux != NULL);
886 v->aux_dtor = aux_dtor;
890 /* Remove auxiliary data, if any, from V, and return it, without
891 calling any associated destructor. */
893 var_detach_aux (struct variable *v)
896 assert (aux != NULL);
901 /* Clears auxiliary data, if any, from V, and calls any
902 associated destructor. */
904 var_clear_aux (struct variable *v)
909 if (v->aux_dtor != NULL)
915 /* This function is appropriate for use an auxiliary data
916 destructor (passed as AUX_DTOR to var_attach_aux()) for the
917 case where the auxiliary data should be passed to free(). */
919 var_dtor_free (struct variable *v)
924 /* Observed categorical values. */
926 /* Returns V's observed categorical values,
927 which V must have. */
929 var_get_obs_vals (const struct variable *v)
931 assert (v->obs_vals != NULL);
935 /* Sets V's observed categorical values to CAT_VALS. */
937 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
939 struct variable *v = (struct variable *) v_ ; /* cast away const */
940 cat_stored_values_destroy (v->obs_vals);
941 v->obs_vals = cat_vals;
944 /* Returns true if V has observed categorical values,
947 var_has_obs_vals (const struct variable *v)
949 return v->obs_vals != NULL;
952 /* Returns the dictionary class corresponding to a variable named
955 dict_class_from_id (const char *name)
968 /* Returns the name of dictionary class DICT_CLASS. */
970 dict_class_to_name (enum dict_class dict_class)
975 return _("ordinary");
985 /* Returns V's vardict structure. */
986 const struct vardict_info *
987 var_get_vardict (const struct variable *v)
989 assert (var_has_vardict (v));
993 /* Sets V's vardict data to VARDICT. */
995 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
997 assert (vardict->dict_index >= 0);
998 assert (vardict->case_index >= 0);
999 v->vardict = *vardict;
1002 /* Returns true if V has vardict data. */
1004 var_has_vardict (const struct variable *v)
1006 return v->vardict.dict_index != -1;
1009 /* Clears V's vardict data. */
1011 var_clear_vardict (struct variable *v)
1013 v->vardict.dict_index = v->vardict.case_index = -1;