1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/alloc.h>
33 #include <libpspp/assertion.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/message.h>
37 #include <libpspp/str.h>
40 #define _(msgid) gettext (msgid)
45 /* Dictionary information. */
46 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
47 int width; /* 0 for numeric, otherwise string width. */
48 struct missing_values miss; /* Missing values. */
49 struct fmt_spec print; /* Default format for PRINT. */
50 struct fmt_spec write; /* Default format for WRITE. */
51 struct val_labs *val_labs; /* Value labels. */
52 char *label; /* Variable label. */
54 /* GUI information. */
55 enum measure measure; /* Nominal, ordinal, or continuous. */
56 int display_width; /* Width of data editor column. */
57 enum alignment alignment; /* Alignment of data in GUI. */
59 /* Case information. */
60 bool leave; /* Leave value from case to case? */
62 /* Data for use by containing dictionary. */
63 struct vardict_info vardict;
65 /* Used only for system and portable file input and output.
68 size_t short_name_cnt;
70 /* Each command may use these fields as needed. */
72 void (*aux_dtor) (struct variable *);
74 /* Values of a categorical variable. Procedures need
75 vectors with binary entries, so any variable of type ALPHA will
76 have its values stored here. */
77 struct cat_vals *obs_vals;
80 /* Returns true if VAR_TYPE is a valid variable type. */
82 var_type_is_valid (enum var_type var_type)
84 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
87 /* Returns the variable type for the given width. */
89 var_type_from_width (int width)
91 return width != 0 ? VAR_STRING : VAR_NUMERIC;
94 /* Creates and returns a new variable with the given NAME and
95 WIDTH and other fields initialized to default values. The
96 variable is not added to a dictionary; for that, use
97 dict_create_var instead. */
99 var_create (const char *name, int width)
103 assert (width >= 0 && width <= MAX_STRING);
105 v = xmalloc (sizeof *v);
106 v->vardict.dict_index = v->vardict.case_index = -1;
107 var_set_name (v, name);
109 mv_init (&v->miss, width);
110 v->leave = var_must_leave (v);
111 if (var_is_numeric (v))
113 v->print = fmt_for_output (FMT_F, 8, 2);
114 v->alignment = ALIGN_RIGHT;
115 v->measure = MEASURE_SCALE;
119 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
120 v->alignment = ALIGN_LEFT;
121 v->measure = MEASURE_NOMINAL;
123 v->display_width = var_default_display_width (width);
127 v->short_names = NULL;
128 v->short_name_cnt = 0;
136 /* Creates and returns a clone of OLD_VAR. Most properties of
137 the new variable are copied from OLD_VAR, except:
139 - The variable's short name is not copied, because there is
140 no reason to give a new variable with potentially a new
141 name the same short name.
143 - The new variable is not added to OLD_VAR's dictionary by
144 default. Use dict_clone_var, instead, to do that.
146 - Auxiliary data and obs_vals are not copied. */
148 var_clone (const struct variable *old_var)
150 struct variable *new_var = var_create (var_get_name (old_var),
151 var_get_width (old_var));
153 var_set_missing_values (new_var, var_get_missing_values (old_var));
154 var_set_print_format (new_var, var_get_print_format (old_var));
155 var_set_write_format (new_var, var_get_write_format (old_var));
156 var_set_value_labels (new_var, var_get_value_labels (old_var));
157 var_set_label (new_var, var_get_label (old_var));
158 var_set_measure (new_var, var_get_measure (old_var));
159 var_set_display_width (new_var, var_get_display_width (old_var));
160 var_set_alignment (new_var, var_get_alignment (old_var));
161 var_set_leave (new_var, var_get_leave (old_var));
166 /* Destroys variable V.
167 V must not belong to a dictionary. If it does, use
168 dict_delete_var instead. */
170 var_destroy (struct variable *v)
174 assert (!var_has_vardict (v));
175 cat_stored_values_destroy (v->obs_vals);
177 val_labs_destroy (v->val_labs);
183 /* Variable names. */
185 /* Return variable V's name. */
187 var_get_name (const struct variable *v)
192 /* Sets V's name to NAME.
193 Do not use this function for a variable in a dictionary. Use
194 dict_rename_var instead. */
196 var_set_name (struct variable *v, const char *name)
198 assert (v->vardict.dict_index == -1);
199 assert (var_is_plausible_name (name, false));
201 str_copy_trunc (v->name, sizeof v->name, name);
202 dict_var_changed (v);
205 /* Returns true if NAME is an acceptable name for a variable,
206 false otherwise. If ISSUE_ERROR is true, issues an
207 explanatory error message on failure. */
209 var_is_valid_name (const char *name, bool issue_error)
214 assert (name != NULL);
216 /* Note that strlen returns number of BYTES, not the number of
218 length = strlen (name);
220 plausible = var_is_plausible_name(name, issue_error);
226 if (!lex_is_id1 (name[0]))
229 msg (SE, _("Character `%c' (in %s) may not appear "
230 "as the first character in a variable name."),
236 for (i = 0; i < length; i++)
238 if (!lex_is_idn (name[i]))
241 msg (SE, _("Character `%c' (in %s) may not appear in "
251 /* Returns true if NAME is an plausible name for a variable,
252 false otherwise. If ISSUE_ERROR is true, issues an
253 explanatory error message on failure.
254 This function makes no use of LC_CTYPE.
257 var_is_plausible_name (const char *name, bool issue_error)
261 assert (name != NULL);
263 /* Note that strlen returns number of BYTES, not the number of
265 length = strlen (name);
269 msg (SE, _("Variable name cannot be empty string."));
272 else if (length > LONG_NAME_LEN)
275 msg (SE, _("Variable name %s exceeds %d-character limit."),
276 name, (int) LONG_NAME_LEN);
280 if (lex_id_to_token (ss_cstr (name)) != T_ID)
283 msg (SE, _("`%s' may not be used as a variable name because it "
284 "is a reserved word."), name);
291 /* A hsh_compare_func that orders variables A and B by their
294 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
296 const struct variable *a = a_;
297 const struct variable *b = b_;
299 return strcasecmp (a->name, b->name);
302 /* A hsh_hash_func that hashes variable V based on its name. */
304 hash_var_by_name (const void *v_, const void *aux UNUSED)
306 const struct variable *v = v_;
308 return hsh_hash_case_string (v->name);
311 /* A hsh_compare_func that orders pointers to variables A and B
314 compare_var_ptrs_by_name (const void *a_, const void *b_,
315 const void *aux UNUSED)
317 struct variable *const *a = a_;
318 struct variable *const *b = b_;
320 return strcasecmp (var_get_name (*a), var_get_name (*b));
323 /* A hsh_hash_func that hashes pointer to variable V based on its
326 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
328 struct variable *const *v = v_;
330 return hsh_hash_case_string (var_get_name (*v));
333 /* Returns the type of variable V. */
335 var_get_type (const struct variable *v)
337 return var_type_from_width (v->width);
340 /* Returns the width of variable V. */
342 var_get_width (const struct variable *v)
347 /* Sets the width of V to WIDTH. */
349 var_set_width (struct variable *v, int new_width)
351 const int old_width = v->width;
353 if (mv_is_resizable (&v->miss, new_width))
354 mv_resize (&v->miss, new_width);
356 mv_init (&v->miss, new_width);
358 if (v->val_labs != NULL)
360 if (val_labs_can_set_width (v->val_labs, new_width))
361 val_labs_set_width (v->val_labs, new_width);
364 val_labs_destroy (v->val_labs);
369 fmt_resize (&v->print, new_width);
370 fmt_resize (&v->write, new_width);
372 v->width = new_width;
375 const int old_val_count = value_cnt_from_width (old_width);
376 const int new_val_count = value_cnt_from_width (new_width);
378 if ( old_val_count != new_val_count)
379 dict_var_resized (v, new_val_count - old_val_count);
382 dict_var_changed (v);
385 /* Returns true if variable V is numeric, false otherwise. */
387 var_is_numeric (const struct variable *v)
389 return var_get_type (v) == VAR_NUMERIC;
392 /* Returns true if variable V is a string variable, false
395 var_is_alpha (const struct variable *v)
397 return var_get_type (v) == VAR_STRING;
400 /* Returns true if variable V is a short string variable, false
403 var_is_short_string (const struct variable *v)
405 return v->width > 0 && v->width <= MAX_SHORT_STRING;
408 /* Returns true if variable V is a long string variable, false
411 var_is_long_string (const struct variable *v)
413 return v->width > MAX_SHORT_STRING;
416 /* Returns the number of "union value"s need to store a value of
419 var_get_value_cnt (const struct variable *v)
421 return value_cnt_from_width (v->width);
424 /* Returns variable V's missing values. */
425 const struct missing_values *
426 var_get_missing_values (const struct variable *v)
431 /* Sets variable V's missing values to MISS, which must be of V's
432 width or at least resizable to V's width.
433 If MISS is null, then V's missing values, if any, are
436 var_set_missing_values (struct variable *v, const struct missing_values *miss)
440 assert (mv_is_resizable (miss, v->width));
441 mv_copy (&v->miss, miss);
442 mv_resize (&v->miss, v->width);
445 mv_init (&v->miss, v->width);
447 dict_var_changed (v);
450 /* Sets variable V to have no user-missing values. */
452 var_clear_missing_values (struct variable *v)
454 var_set_missing_values (v, NULL);
457 /* Returns true if V has any user-missing values,
460 var_has_missing_values (const struct variable *v)
462 return !mv_is_empty (&v->miss);
465 /* Returns true if VALUE is in the given CLASS of missing values
466 in V, false otherwise. */
468 var_is_value_missing (const struct variable *v, const union value *value,
471 return mv_is_value_missing (&v->miss, value, class);
474 /* Returns true if D is in the given CLASS of missing values in
476 V must be a numeric variable. */
478 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
480 return mv_is_num_missing (&v->miss, d, class);
483 /* Returns true if S[] is a missing value for V, false otherwise.
484 S[] must contain exactly as many characters as V's width.
485 V must be a string variable. */
487 var_is_str_missing (const struct variable *v, const char s[],
490 return mv_is_str_missing (&v->miss, s, class);
493 /* Returns variable V's value labels,
494 possibly a null pointer if it has none. */
495 const struct val_labs *
496 var_get_value_labels (const struct variable *v)
501 /* Returns true if variable V has at least one value label. */
503 var_has_value_labels (const struct variable *v)
505 return val_labs_count (v->val_labs) > 0;
508 /* Sets variable V's value labels to a copy of VLS,
509 which must have a width equal to V's width or one that can be
510 changed to V's width.
511 If VLS is null, then V's value labels, if any, are removed. */
513 var_set_value_labels (struct variable *v, const struct val_labs *vls)
515 val_labs_destroy (v->val_labs);
520 assert (val_labs_can_set_width (vls, v->width));
521 v->val_labs = val_labs_copy (vls);
522 val_labs_set_width (v->val_labs, v->width);
523 dict_var_changed (v);
527 /* Makes sure that V has a set of value labels,
528 by assigning one to it if necessary. */
530 alloc_value_labels (struct variable *v)
532 assert (!var_is_long_string (v));
533 if (v->val_labs == NULL)
534 v->val_labs = val_labs_create (v->width);
537 /* Attempts to add a value label with the given VALUE and LABEL
538 to V. Returns true if successful, false if VALUE has an
540 V must not be a long string variable. */
542 var_add_value_label (struct variable *v,
543 const union value *value, const char *label)
545 alloc_value_labels (v);
546 return val_labs_add (v->val_labs, *value, label);
549 /* Adds or replaces a value label with the given VALUE and LABEL
551 V must not be a long string variable. */
553 var_replace_value_label (struct variable *v,
554 const union value *value, const char *label)
556 alloc_value_labels (v);
557 val_labs_replace (v->val_labs, *value, label);
560 /* Removes V's value labels, if any. */
562 var_clear_value_labels (struct variable *v)
564 var_set_value_labels (v, NULL);
567 /* Returns the label associated with VALUE for variable V,
568 or a null pointer if none. */
570 var_lookup_value_label (const struct variable *v, const union value *value)
572 return val_labs_find (v->val_labs, *value);
575 /* Get a string representing VALUE for variable V.
576 That is, if VALUE has a label, return that label,
577 otherwise format VALUE and return the formatted string. */
579 var_get_value_name (const struct variable *v, const union value *value)
581 const char *name = var_lookup_value_label (v, value);
584 static char buf[MAX_STRING + 1];
585 data_out (value, &v->print, buf);
586 buf[v->print.w] = '\0';
592 /* Print and write formats. */
594 /* Returns V's print format specification. */
595 const struct fmt_spec *
596 var_get_print_format (const struct variable *v)
601 /* Sets V's print format specification to PRINT, which must be a
602 valid format specification for outputting a variable of V's
605 var_set_print_format (struct variable *v, const struct fmt_spec *print)
607 assert (fmt_check_width_compat (print, v->width));
609 dict_var_changed (v);
612 /* Returns V's write format specification. */
613 const struct fmt_spec *
614 var_get_write_format (const struct variable *v)
619 /* Sets V's write format specification to WRITE, which must be a
620 valid format specification for outputting a variable of V's
623 var_set_write_format (struct variable *v, const struct fmt_spec *write)
625 assert (fmt_check_width_compat (write, v->width));
627 dict_var_changed (v);
630 /* Sets V's print and write format specifications to FORMAT,
631 which must be a valid format specification for outputting a
632 variable of V's width. */
634 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
636 var_set_print_format (v, format);
637 var_set_write_format (v, format);
640 /* Return a string representing this variable, in the form most
641 appropriate from a human factors perspective, that is, its
642 variable label if it has one, otherwise its name. */
644 var_to_string (const struct variable *v)
646 return v->label != NULL ? v->label : v->name;
649 /* Returns V's variable label, or a null pointer if it has none. */
651 var_get_label (const struct variable *v)
656 /* Sets V's variable label to LABEL, stripping off leading and
657 trailing white space and truncating to 255 characters.
658 If LABEL is a null pointer or if LABEL is an empty string
659 (after stripping white space), then V's variable label (if
662 var_set_label (struct variable *v, const char *label)
669 struct substring s = ss_cstr (label);
670 ss_trim (&s, ss_cstr (CC_SPACES));
671 ss_truncate (&s, 255);
672 if (!ss_is_empty (s))
673 v->label = ss_xstrdup (s);
674 dict_var_changed (v);
678 /* Removes any variable label from V. */
680 var_clear_label (struct variable *v)
682 var_set_label (v, NULL);
685 /* Returns true if V has a variable V,
688 var_has_label (const struct variable *v)
690 return v->label != NULL;
693 /* Returns true if M is a valid variable measurement level,
696 measure_is_valid (enum measure m)
698 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
701 /* Returns V's measurement level. */
703 var_get_measure (const struct variable *v)
708 /* Sets V's measurement level to MEASURE. */
710 var_set_measure (struct variable *v, enum measure measure)
712 assert (measure_is_valid (measure));
713 v->measure = measure;
714 dict_var_changed (v);
717 /* Returns V's display width, which applies only to GUIs. */
719 var_get_display_width (const struct variable *v)
721 return v->display_width;
724 /* Sets V's display width to DISPLAY_WIDTH. */
726 var_set_display_width (struct variable *v, int display_width)
728 v->display_width = display_width;
729 dict_var_changed (v);
732 /* Returns the default display width for a variable of the given
733 WIDTH, as set by var_create. The return value can be used to
734 reset a variable's display width to the default. */
736 var_default_display_width (int width)
738 return width == 0 ? 8 : MIN (width, 32);
741 /* Returns true if A is a valid alignment,
744 alignment_is_valid (enum alignment a)
746 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
749 /* Returns V's display alignment, which applies only to GUIs. */
751 var_get_alignment (const struct variable *v)
756 /* Sets V's display alignment to ALIGNMENT. */
758 var_set_alignment (struct variable *v, enum alignment alignment)
760 assert (alignment_is_valid (alignment));
761 v->alignment = alignment;
762 dict_var_changed (v);
765 /* Whether variables' values should be preserved from case to
768 /* Returns true if variable V's value should be left from case to
769 case, instead of being reset to 0, system-missing, or blanks. */
771 var_get_leave (const struct variable *v)
776 /* Sets V's leave setting to LEAVE. */
778 var_set_leave (struct variable *v, bool leave)
780 assert (leave || !var_must_leave (v));
782 dict_var_changed (v);
785 /* Returns true if V must be left from case to case,
786 false if it can be set either way. */
788 var_must_leave (const struct variable *v)
790 return dict_class_from_id (v->name) == DC_SCRATCH;
793 /* Returns the number of short names stored in VAR.
795 Short names are used only for system and portable file input
796 and output. They are upper-case only, not necessarily unique,
797 and limited to SHORT_NAME_LEN characters (plus a null
798 terminator). Ordinarily a variable has at most one short
799 name, but very long string variables (longer than 255 bytes)
800 may have more. A variable might not have any short name at
801 all if it hasn't been saved to or read from a system or
804 var_get_short_name_cnt (const struct variable *var)
806 return var->short_name_cnt;
809 /* Returns VAR's short name with the given IDX, if it has one
810 with that index, or a null pointer otherwise. Short names may
811 be sparse: even if IDX is less than the number of short names
812 in VAR, this function may return a null pointer. */
814 var_get_short_name (const struct variable *var, size_t idx)
816 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
819 /* Sets VAR's short name with the given IDX to SHORT_NAME,
820 truncating it to SHORT_NAME_LEN characters and converting it
821 to uppercase in the process. Specifying a null pointer for
822 SHORT_NAME clears the specified short name. */
824 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
826 assert (var != NULL);
827 assert (short_name == NULL || var_is_plausible_name (short_name, false));
829 /* Clear old short name numbered IDX, if any. */
830 if (idx < var->short_name_cnt)
832 free (var->short_names[idx]);
833 var->short_names[idx] = NULL;
836 /* Install new short name for IDX. */
837 if (short_name != NULL)
839 if (idx >= var->short_name_cnt)
841 size_t old_cnt = var->short_name_cnt;
844 var->short_name_cnt = MAX (idx * 2, 1);
845 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
846 sizeof *var->short_names);
847 for (i = old_cnt; i < var->short_name_cnt; i++)
848 var->short_names[i] = NULL;
850 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
851 str_uppercase (var->short_names[idx]);
854 dict_var_changed (var);
857 /* Clears V's short names. */
859 var_clear_short_names (struct variable *v)
863 for (i = 0; i < v->short_name_cnt; i++)
864 free (v->short_names[i]);
865 v->short_names = NULL;
866 v->short_name_cnt = 0;
869 /* Relationship with dictionary. */
871 /* Returns V's index within its dictionary, the value
872 for which "dict_get_var (dict, index)" will return V.
873 V must be in a dictionary. */
875 var_get_dict_index (const struct variable *v)
877 assert (v->vardict.dict_index != -1);
878 return v->vardict.dict_index;
881 /* Returns V's index within the case represented by its
882 dictionary, that is, the value for which "case_data_idx (case,
883 index)" will return the data for V in that case.
884 V must be in a dictionary. */
886 var_get_case_index (const struct variable *v)
888 assert (v->vardict.case_index != -1);
889 return v->vardict.case_index;
892 /* Returns V's auxiliary data, or a null pointer if none has been
895 var_get_aux (const struct variable *v)
900 /* Assign auxiliary data AUX to variable V, which must not
901 already have auxiliary data. Before V's auxiliary data is
902 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
903 may be appropriate for use as AUX_DTOR.) */
905 var_attach_aux (const struct variable *v_,
906 void *aux, void (*aux_dtor) (struct variable *))
908 struct variable *v = (struct variable *) v_ ; /* cast away const */
909 assert (v->aux == NULL);
910 assert (aux != NULL);
912 v->aux_dtor = aux_dtor;
916 /* Remove auxiliary data, if any, from V, and return it, without
917 calling any associated destructor. */
919 var_detach_aux (struct variable *v)
922 assert (aux != NULL);
927 /* Clears auxiliary data, if any, from V, and calls any
928 associated destructor. */
930 var_clear_aux (struct variable *v)
935 if (v->aux_dtor != NULL)
941 /* This function is appropriate for use an auxiliary data
942 destructor (passed as AUX_DTOR to var_attach_aux()) for the
943 case where the auxiliary data should be passed to free(). */
945 var_dtor_free (struct variable *v)
950 /* Observed categorical values. */
952 /* Returns V's observed categorical values,
953 which V must have. */
955 var_get_obs_vals (const struct variable *v)
957 assert (v->obs_vals != NULL);
961 /* Sets V's observed categorical values to CAT_VALS. */
963 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
965 struct variable *v = (struct variable *) v_ ; /* cast away const */
966 cat_stored_values_destroy (v->obs_vals);
967 v->obs_vals = cat_vals;
970 /* Returns true if V has observed categorical values,
973 var_has_obs_vals (const struct variable *v)
975 return v->obs_vals != NULL;
978 /* Returns the dictionary class corresponding to a variable named
981 dict_class_from_id (const char *name)
994 /* Returns the name of dictionary class DICT_CLASS. */
996 dict_class_to_name (enum dict_class dict_class)
1001 return _("ordinary");
1005 return _("scratch");
1011 /* Returns V's vardict structure. */
1012 const struct vardict_info *
1013 var_get_vardict (const struct variable *v)
1015 assert (var_has_vardict (v));
1019 /* Sets V's vardict data to VARDICT. */
1021 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1023 assert (vardict->dict_index >= 0);
1024 assert (vardict->case_index >= 0);
1025 v->vardict = *vardict;
1028 /* Returns true if V has vardict data. */
1030 var_has_vardict (const struct variable *v)
1032 return v->vardict.dict_index != -1;
1035 /* Clears V's vardict data. */
1037 var_clear_vardict (struct variable *v)
1039 v->vardict.dict_index = v->vardict.case_index = -1;