1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "dictionary.h"
26 #include "identifier.h"
27 #include "missing-values.h"
28 #include "value-labels.h"
31 #include <libpspp/misc.h>
32 #include <libpspp/alloc.h>
33 #include <libpspp/assertion.h>
34 #include <libpspp/compiler.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/message.h>
37 #include <libpspp/str.h>
40 #define _(msgid) gettext (msgid)
45 /* Dictionary information. */
46 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
47 int width; /* 0 for numeric, otherwise string width. */
48 struct missing_values miss; /* Missing values. */
49 struct fmt_spec print; /* Default format for PRINT. */
50 struct fmt_spec write; /* Default format for WRITE. */
51 struct val_labs *val_labs; /* Value labels. */
52 char *label; /* Variable label. */
54 /* GUI information. */
55 enum measure measure; /* Nominal, ordinal, or continuous. */
56 int display_width; /* Width of data editor column. */
57 enum alignment alignment; /* Alignment of data in GUI. */
59 /* Case information. */
60 bool leave; /* Leave value from case to case? */
62 /* Data for use by containing dictionary. */
63 struct vardict_info vardict;
65 /* Used only for system and portable file input and output.
68 size_t short_name_cnt;
70 /* Each command may use these fields as needed. */
72 void (*aux_dtor) (struct variable *);
74 /* Values of a categorical variable. Procedures need
75 vectors with binary entries, so any variable of type ALPHA will
76 have its values stored here. */
77 struct cat_vals *obs_vals;
80 /* Returns true if VAR_TYPE is a valid variable type. */
82 var_type_is_valid (enum var_type var_type)
84 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
87 /* Returns the variable type for the given width. */
89 var_type_from_width (int width)
91 return width != 0 ? VAR_STRING : VAR_NUMERIC;
94 /* Creates and returns a new variable with the given NAME and
95 WIDTH and other fields initialized to default values. The
96 variable is not added to a dictionary; for that, use
97 dict_create_var instead. */
99 var_create (const char *name, int width)
103 assert (width >= 0 && width <= MAX_STRING);
105 v = xmalloc (sizeof *v);
106 v->vardict.dict_index = v->vardict.case_index = -1;
107 var_set_name (v, name);
109 mv_init (&v->miss, width);
110 v->leave = var_must_leave (v);
111 if (var_is_numeric (v))
113 v->print = fmt_for_output (FMT_F, 8, 2);
114 v->alignment = ALIGN_RIGHT;
115 v->measure = MEASURE_SCALE;
119 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
120 v->alignment = ALIGN_LEFT;
121 v->measure = MEASURE_NOMINAL;
123 v->display_width = var_default_display_width (width);
127 v->short_names = NULL;
128 v->short_name_cnt = 0;
136 /* Creates and returns a clone of OLD_VAR. Most properties of
137 the new variable are copied from OLD_VAR, except:
139 - The variable's short name is not copied, because there is
140 no reason to give a new variable with potentially a new
141 name the same short name.
143 - The new variable is not added to OLD_VAR's dictionary by
144 default. Use dict_clone_var, instead, to do that.
146 - Auxiliary data and obs_vals are not copied. */
148 var_clone (const struct variable *old_var)
150 struct variable *new_var = var_create (var_get_name (old_var),
151 var_get_width (old_var));
153 var_set_missing_values (new_var, var_get_missing_values (old_var));
154 var_set_print_format (new_var, var_get_print_format (old_var));
155 var_set_write_format (new_var, var_get_write_format (old_var));
156 var_set_value_labels (new_var, var_get_value_labels (old_var));
157 var_set_label (new_var, var_get_label (old_var));
158 var_set_measure (new_var, var_get_measure (old_var));
159 var_set_display_width (new_var, var_get_display_width (old_var));
160 var_set_alignment (new_var, var_get_alignment (old_var));
161 var_set_leave (new_var, var_get_leave (old_var));
166 /* Destroys variable V.
167 V must not belong to a dictionary. If it does, use
168 dict_delete_var instead. */
170 var_destroy (struct variable *v)
174 assert (!var_has_vardict (v));
175 cat_stored_values_destroy (v->obs_vals);
176 var_clear_short_names (v);
178 val_labs_destroy (v->val_labs);
184 /* Variable names. */
186 /* Return variable V's name. */
188 var_get_name (const struct variable *v)
193 /* Sets V's name to NAME.
194 Do not use this function for a variable in a dictionary. Use
195 dict_rename_var instead. */
197 var_set_name (struct variable *v, const char *name)
199 assert (v->vardict.dict_index == -1);
200 assert (var_is_plausible_name (name, false));
202 str_copy_trunc (v->name, sizeof v->name, name);
203 dict_var_changed (v);
206 /* Returns true if NAME is an acceptable name for a variable,
207 false otherwise. If ISSUE_ERROR is true, issues an
208 explanatory error message on failure. */
210 var_is_valid_name (const char *name, bool issue_error)
215 assert (name != NULL);
217 /* Note that strlen returns number of BYTES, not the number of
219 length = strlen (name);
221 plausible = var_is_plausible_name(name, issue_error);
227 if (!lex_is_id1 (name[0]))
230 msg (SE, _("Character `%c' (in %s) may not appear "
231 "as the first character in a variable name."),
237 for (i = 0; i < length; i++)
239 if (!lex_is_idn (name[i]))
242 msg (SE, _("Character `%c' (in %s) may not appear in "
252 /* Returns true if NAME is an plausible name for a variable,
253 false otherwise. If ISSUE_ERROR is true, issues an
254 explanatory error message on failure.
255 This function makes no use of LC_CTYPE.
258 var_is_plausible_name (const char *name, bool issue_error)
262 assert (name != NULL);
264 /* Note that strlen returns number of BYTES, not the number of
266 length = strlen (name);
270 msg (SE, _("Variable name cannot be empty string."));
273 else if (length > LONG_NAME_LEN)
276 msg (SE, _("Variable name %s exceeds %d-character limit."),
277 name, (int) LONG_NAME_LEN);
281 if (lex_id_to_token (ss_cstr (name)) != T_ID)
284 msg (SE, _("`%s' may not be used as a variable name because it "
285 "is a reserved word."), name);
292 /* A hsh_compare_func that orders variables A and B by their
295 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
297 const struct variable *a = a_;
298 const struct variable *b = b_;
300 return strcasecmp (a->name, b->name);
303 /* A hsh_hash_func that hashes variable V based on its name. */
305 hash_var_by_name (const void *v_, const void *aux UNUSED)
307 const struct variable *v = v_;
309 return hsh_hash_case_string (v->name);
312 /* A hsh_compare_func that orders pointers to variables A and B
315 compare_var_ptrs_by_name (const void *a_, const void *b_,
316 const void *aux UNUSED)
318 struct variable *const *a = a_;
319 struct variable *const *b = b_;
321 return strcasecmp (var_get_name (*a), var_get_name (*b));
324 /* A hsh_hash_func that hashes pointer to variable V based on its
327 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
329 struct variable *const *v = v_;
331 return hsh_hash_case_string (var_get_name (*v));
334 /* Returns the type of variable V. */
336 var_get_type (const struct variable *v)
338 return var_type_from_width (v->width);
341 /* Returns the width of variable V. */
343 var_get_width (const struct variable *v)
348 /* Sets the width of V to WIDTH. */
350 var_set_width (struct variable *v, int new_width)
352 const int old_width = v->width;
354 if (mv_is_resizable (&v->miss, new_width))
355 mv_resize (&v->miss, new_width);
357 mv_init (&v->miss, new_width);
359 if (v->val_labs != NULL)
361 if (val_labs_can_set_width (v->val_labs, new_width))
362 val_labs_set_width (v->val_labs, new_width);
365 val_labs_destroy (v->val_labs);
370 fmt_resize (&v->print, new_width);
371 fmt_resize (&v->write, new_width);
373 v->width = new_width;
376 const int old_val_count = value_cnt_from_width (old_width);
377 const int new_val_count = value_cnt_from_width (new_width);
379 if ( old_val_count != new_val_count)
380 dict_var_resized (v, new_val_count - old_val_count);
383 dict_var_changed (v);
386 /* Returns true if variable V is numeric, false otherwise. */
388 var_is_numeric (const struct variable *v)
390 return var_get_type (v) == VAR_NUMERIC;
393 /* Returns true if variable V is a string variable, false
396 var_is_alpha (const struct variable *v)
398 return var_get_type (v) == VAR_STRING;
401 /* Returns true if variable V is a short string variable, false
404 var_is_short_string (const struct variable *v)
406 return v->width > 0 && v->width <= MAX_SHORT_STRING;
409 /* Returns true if variable V is a long string variable, false
412 var_is_long_string (const struct variable *v)
414 return v->width > MAX_SHORT_STRING;
417 /* Returns the number of "union value"s need to store a value of
420 var_get_value_cnt (const struct variable *v)
422 return value_cnt_from_width (v->width);
425 /* Returns variable V's missing values. */
426 const struct missing_values *
427 var_get_missing_values (const struct variable *v)
432 /* Sets variable V's missing values to MISS, which must be of V's
433 width or at least resizable to V's width.
434 If MISS is null, then V's missing values, if any, are
437 var_set_missing_values (struct variable *v, const struct missing_values *miss)
441 assert (mv_is_resizable (miss, v->width));
442 mv_copy (&v->miss, miss);
443 mv_resize (&v->miss, v->width);
446 mv_init (&v->miss, v->width);
448 dict_var_changed (v);
451 /* Sets variable V to have no user-missing values. */
453 var_clear_missing_values (struct variable *v)
455 var_set_missing_values (v, NULL);
458 /* Returns true if V has any user-missing values,
461 var_has_missing_values (const struct variable *v)
463 return !mv_is_empty (&v->miss);
466 /* Returns true if VALUE is in the given CLASS of missing values
467 in V, false otherwise. */
469 var_is_value_missing (const struct variable *v, const union value *value,
472 return mv_is_value_missing (&v->miss, value, class);
475 /* Returns true if D is in the given CLASS of missing values in
477 V must be a numeric variable. */
479 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
481 return mv_is_num_missing (&v->miss, d, class);
484 /* Returns true if S[] is a missing value for V, false otherwise.
485 S[] must contain exactly as many characters as V's width.
486 V must be a string variable. */
488 var_is_str_missing (const struct variable *v, const char s[],
491 return mv_is_str_missing (&v->miss, s, class);
494 /* Returns variable V's value labels,
495 possibly a null pointer if it has none. */
496 const struct val_labs *
497 var_get_value_labels (const struct variable *v)
502 /* Returns true if variable V has at least one value label. */
504 var_has_value_labels (const struct variable *v)
506 return val_labs_count (v->val_labs) > 0;
509 /* Sets variable V's value labels to a copy of VLS,
510 which must have a width equal to V's width or one that can be
511 changed to V's width.
512 If VLS is null, then V's value labels, if any, are removed. */
514 var_set_value_labels (struct variable *v, const struct val_labs *vls)
516 val_labs_destroy (v->val_labs);
521 assert (val_labs_can_set_width (vls, v->width));
522 v->val_labs = val_labs_copy (vls);
523 val_labs_set_width (v->val_labs, v->width);
524 dict_var_changed (v);
528 /* Makes sure that V has a set of value labels,
529 by assigning one to it if necessary. */
531 alloc_value_labels (struct variable *v)
533 assert (!var_is_long_string (v));
534 if (v->val_labs == NULL)
535 v->val_labs = val_labs_create (v->width);
538 /* Attempts to add a value label with the given VALUE and LABEL
539 to V. Returns true if successful, false if VALUE has an
541 V must not be a long string variable. */
543 var_add_value_label (struct variable *v,
544 const union value *value, const char *label)
546 alloc_value_labels (v);
547 return val_labs_add (v->val_labs, *value, label);
550 /* Adds or replaces a value label with the given VALUE and LABEL
552 V must not be a long string variable. */
554 var_replace_value_label (struct variable *v,
555 const union value *value, const char *label)
557 alloc_value_labels (v);
558 val_labs_replace (v->val_labs, *value, label);
561 /* Removes V's value labels, if any. */
563 var_clear_value_labels (struct variable *v)
565 var_set_value_labels (v, NULL);
568 /* Returns the label associated with VALUE for variable V,
569 or a null pointer if none. */
571 var_lookup_value_label (const struct variable *v, const union value *value)
573 return val_labs_find (v->val_labs, *value);
576 /* Get a string representing VALUE for variable V.
577 That is, if VALUE has a label, return that label,
578 otherwise format VALUE and return the formatted string. */
580 var_get_value_name (const struct variable *v, const union value *value)
582 const char *name = var_lookup_value_label (v, value);
585 static char buf[MAX_STRING + 1];
586 data_out (value, &v->print, buf);
587 buf[v->print.w] = '\0';
593 /* Print and write formats. */
595 /* Returns V's print format specification. */
596 const struct fmt_spec *
597 var_get_print_format (const struct variable *v)
602 /* Sets V's print format specification to PRINT, which must be a
603 valid format specification for outputting a variable of V's
606 var_set_print_format (struct variable *v, const struct fmt_spec *print)
608 assert (fmt_check_width_compat (print, v->width));
610 dict_var_changed (v);
613 /* Returns V's write format specification. */
614 const struct fmt_spec *
615 var_get_write_format (const struct variable *v)
620 /* Sets V's write format specification to WRITE, which must be a
621 valid format specification for outputting a variable of V's
624 var_set_write_format (struct variable *v, const struct fmt_spec *write)
626 assert (fmt_check_width_compat (write, v->width));
628 dict_var_changed (v);
631 /* Sets V's print and write format specifications to FORMAT,
632 which must be a valid format specification for outputting a
633 variable of V's width. */
635 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
637 var_set_print_format (v, format);
638 var_set_write_format (v, format);
641 /* Return a string representing this variable, in the form most
642 appropriate from a human factors perspective, that is, its
643 variable label if it has one, otherwise its name. */
645 var_to_string (const struct variable *v)
647 return v->label != NULL ? v->label : v->name;
650 /* Returns V's variable label, or a null pointer if it has none. */
652 var_get_label (const struct variable *v)
657 /* Sets V's variable label to LABEL, stripping off leading and
658 trailing white space and truncating to 255 characters.
659 If LABEL is a null pointer or if LABEL is an empty string
660 (after stripping white space), then V's variable label (if
663 var_set_label (struct variable *v, const char *label)
670 struct substring s = ss_cstr (label);
671 ss_trim (&s, ss_cstr (CC_SPACES));
672 ss_truncate (&s, 255);
673 if (!ss_is_empty (s))
674 v->label = ss_xstrdup (s);
675 dict_var_changed (v);
679 /* Removes any variable label from V. */
681 var_clear_label (struct variable *v)
683 var_set_label (v, NULL);
686 /* Returns true if V has a variable V,
689 var_has_label (const struct variable *v)
691 return v->label != NULL;
694 /* Returns true if M is a valid variable measurement level,
697 measure_is_valid (enum measure m)
699 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
702 /* Returns V's measurement level. */
704 var_get_measure (const struct variable *v)
709 /* Sets V's measurement level to MEASURE. */
711 var_set_measure (struct variable *v, enum measure measure)
713 assert (measure_is_valid (measure));
714 v->measure = measure;
715 dict_var_changed (v);
718 /* Returns V's display width, which applies only to GUIs. */
720 var_get_display_width (const struct variable *v)
722 return v->display_width;
725 /* Sets V's display width to DISPLAY_WIDTH. */
727 var_set_display_width (struct variable *v, int display_width)
729 v->display_width = display_width;
730 dict_var_changed (v);
733 /* Returns the default display width for a variable of the given
734 WIDTH, as set by var_create. The return value can be used to
735 reset a variable's display width to the default. */
737 var_default_display_width (int width)
739 return width == 0 ? 8 : MIN (width, 32);
742 /* Returns true if A is a valid alignment,
745 alignment_is_valid (enum alignment a)
747 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
750 /* Returns V's display alignment, which applies only to GUIs. */
752 var_get_alignment (const struct variable *v)
757 /* Sets V's display alignment to ALIGNMENT. */
759 var_set_alignment (struct variable *v, enum alignment alignment)
761 assert (alignment_is_valid (alignment));
762 v->alignment = alignment;
763 dict_var_changed (v);
766 /* Whether variables' values should be preserved from case to
769 /* Returns true if variable V's value should be left from case to
770 case, instead of being reset to 0, system-missing, or blanks. */
772 var_get_leave (const struct variable *v)
777 /* Sets V's leave setting to LEAVE. */
779 var_set_leave (struct variable *v, bool leave)
781 assert (leave || !var_must_leave (v));
783 dict_var_changed (v);
786 /* Returns true if V must be left from case to case,
787 false if it can be set either way. */
789 var_must_leave (const struct variable *v)
791 return dict_class_from_id (v->name) == DC_SCRATCH;
794 /* Returns the number of short names stored in VAR.
796 Short names are used only for system and portable file input
797 and output. They are upper-case only, not necessarily unique,
798 and limited to SHORT_NAME_LEN characters (plus a null
799 terminator). Ordinarily a variable has at most one short
800 name, but very long string variables (longer than 255 bytes)
801 may have more. A variable might not have any short name at
802 all if it hasn't been saved to or read from a system or
805 var_get_short_name_cnt (const struct variable *var)
807 return var->short_name_cnt;
810 /* Returns VAR's short name with the given IDX, if it has one
811 with that index, or a null pointer otherwise. Short names may
812 be sparse: even if IDX is less than the number of short names
813 in VAR, this function may return a null pointer. */
815 var_get_short_name (const struct variable *var, size_t idx)
817 return idx < var->short_name_cnt ? var->short_names[idx] : NULL;
820 /* Sets VAR's short name with the given IDX to SHORT_NAME,
821 truncating it to SHORT_NAME_LEN characters and converting it
822 to uppercase in the process. Specifying a null pointer for
823 SHORT_NAME clears the specified short name. */
825 var_set_short_name (struct variable *var, size_t idx, const char *short_name)
827 assert (var != NULL);
828 assert (short_name == NULL || var_is_plausible_name (short_name, false));
830 /* Clear old short name numbered IDX, if any. */
831 if (idx < var->short_name_cnt)
833 free (var->short_names[idx]);
834 var->short_names[idx] = NULL;
837 /* Install new short name for IDX. */
838 if (short_name != NULL)
840 if (idx >= var->short_name_cnt)
842 size_t old_cnt = var->short_name_cnt;
844 var->short_name_cnt = MAX (idx * 2, 1);
845 var->short_names = xnrealloc (var->short_names, var->short_name_cnt,
846 sizeof *var->short_names);
847 for (i = old_cnt; i < var->short_name_cnt; i++)
848 var->short_names[i] = NULL;
850 var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING);
851 str_uppercase (var->short_names[idx]);
854 dict_var_changed (var);
857 /* Clears V's short names. */
859 var_clear_short_names (struct variable *v)
863 for (i = 0; i < v->short_name_cnt; i++)
864 free (v->short_names[i]);
865 free (v->short_names);
866 v->short_names = NULL;
867 v->short_name_cnt = 0;
870 /* Relationship with dictionary. */
872 /* Returns V's index within its dictionary, the value
873 for which "dict_get_var (dict, index)" will return V.
874 V must be in a dictionary. */
876 var_get_dict_index (const struct variable *v)
878 assert (v->vardict.dict_index != -1);
879 return v->vardict.dict_index;
882 /* Returns V's index within the case represented by its
883 dictionary, that is, the value for which "case_data_idx (case,
884 index)" will return the data for V in that case.
885 V must be in a dictionary. */
887 var_get_case_index (const struct variable *v)
889 assert (v->vardict.case_index != -1);
890 return v->vardict.case_index;
893 /* Returns V's auxiliary data, or a null pointer if none has been
896 var_get_aux (const struct variable *v)
901 /* Assign auxiliary data AUX to variable V, which must not
902 already have auxiliary data. Before V's auxiliary data is
903 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
904 may be appropriate for use as AUX_DTOR.) */
906 var_attach_aux (const struct variable *v_,
907 void *aux, void (*aux_dtor) (struct variable *))
909 struct variable *v = (struct variable *) v_ ; /* cast away const */
910 assert (v->aux == NULL);
911 assert (aux != NULL);
913 v->aux_dtor = aux_dtor;
917 /* Remove auxiliary data, if any, from V, and return it, without
918 calling any associated destructor. */
920 var_detach_aux (struct variable *v)
923 assert (aux != NULL);
928 /* Clears auxiliary data, if any, from V, and calls any
929 associated destructor. */
931 var_clear_aux (struct variable *v)
936 if (v->aux_dtor != NULL)
942 /* This function is appropriate for use an auxiliary data
943 destructor (passed as AUX_DTOR to var_attach_aux()) for the
944 case where the auxiliary data should be passed to free(). */
946 var_dtor_free (struct variable *v)
951 /* Observed categorical values. */
953 /* Returns V's observed categorical values,
954 which V must have. */
956 var_get_obs_vals (const struct variable *v)
958 assert (v->obs_vals != NULL);
962 /* Sets V's observed categorical values to CAT_VALS. */
964 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
966 struct variable *v = (struct variable *) v_ ; /* cast away const */
967 cat_stored_values_destroy (v->obs_vals);
968 v->obs_vals = cat_vals;
971 /* Returns true if V has observed categorical values,
974 var_has_obs_vals (const struct variable *v)
976 return v->obs_vals != NULL;
979 /* Returns the dictionary class corresponding to a variable named
982 dict_class_from_id (const char *name)
995 /* Returns the name of dictionary class DICT_CLASS. */
997 dict_class_to_name (enum dict_class dict_class)
1002 return _("ordinary");
1006 return _("scratch");
1012 /* Returns V's vardict structure. */
1013 const struct vardict_info *
1014 var_get_vardict (const struct variable *v)
1016 assert (var_has_vardict (v));
1020 /* Sets V's vardict data to VARDICT. */
1022 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1024 assert (vardict->dict_index >= 0);
1025 assert (vardict->case_index >= 0);
1026 v->vardict = *vardict;
1029 /* Returns true if V has vardict data. */
1031 var_has_vardict (const struct variable *v)
1033 return v->vardict.dict_index != -1;
1036 /* Clears V's vardict data. */
1038 var_clear_vardict (struct variable *v)
1040 v->vardict.dict_index = v->vardict.case_index = -1;