1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include "dictionary.h"
29 #include "identifier.h"
30 #include "missing-values.h"
31 #include "value-labels.h"
34 #include <libpspp/alloc.h>
35 #include <libpspp/assertion.h>
36 #include <libpspp/compiler.h>
37 #include <libpspp/hash.h>
38 #include <libpspp/message.h>
39 #include <libpspp/str.h>
42 #define _(msgid) gettext (msgid)
47 /* Dictionary information. */
48 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
49 int width; /* 0 for numeric, otherwise string width. */
50 struct missing_values miss; /* Missing values. */
51 struct fmt_spec print; /* Default format for PRINT. */
52 struct fmt_spec write; /* Default format for WRITE. */
53 struct val_labs *val_labs; /* Value labels. */
54 char *label; /* Variable label. */
56 /* GUI information. */
57 enum measure measure; /* Nominal, ordinal, or continuous. */
58 int display_width; /* Width of data editor column. */
59 enum alignment alignment; /* Alignment of data in GUI. */
61 /* Case information. */
62 bool leave; /* Leave value from case to case? */
64 /* Data for use by containing dictionary. */
65 struct vardict_info vardict;
67 /* Short name, used only for system and portable file input
68 and output. Upper case only. Short names are not necessarily
69 unique. Any variable may have no short name, indicated by an
71 char short_name[SHORT_NAME_LEN + 1];
73 /* Each command may use these fields as needed. */
75 void (*aux_dtor) (struct variable *);
77 /* Values of a categorical variable. Procedures need
78 vectors with binary entries, so any variable of type ALPHA will
79 have its values stored here. */
80 struct cat_vals *obs_vals;
83 /* Returns true if VAR_TYPE is a valid variable type. */
85 var_type_is_valid (enum var_type var_type)
87 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
90 /* Returns the variable type for the given width. */
92 var_type_from_width (int width)
94 return width != 0 ? VAR_STRING : VAR_NUMERIC;
97 /* Creates and returns a new variable with the given NAME and
98 WIDTH and other fields initialized to default values. The
99 variable is not added to a dictionary; for that, use
100 dict_create_var instead. */
102 var_create (const char *name, int width)
106 assert (width >= 0 && width <= MAX_STRING);
108 v = xmalloc (sizeof *v);
109 v->vardict.dict_index = v->vardict.case_index = -1;
110 var_set_name (v, name);
112 mv_init (&v->miss, width);
113 v->leave = var_must_leave (v);
114 if (var_is_numeric (v))
116 v->print = fmt_for_output (FMT_F, 8, 2);
117 v->alignment = ALIGN_RIGHT;
118 v->display_width = 8;
119 v->measure = MEASURE_SCALE;
123 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
124 v->alignment = ALIGN_LEFT;
125 v->display_width = 8;
126 v->measure = MEASURE_NOMINAL;
131 var_clear_short_name (v);
139 /* Creates and returns a clone of OLD_VAR. Most properties of
140 the new variable are copied from OLD_VAR, except:
142 - The variable's short name is not copied, because there is
143 no reason to give a new variable with potentially a new
144 name the same short name.
146 - The new variable is not added to OLD_VAR's dictionary by
147 default. Use dict_clone_var, instead, to do that.
149 - Auxiliary data and obs_vals are not copied. */
151 var_clone (const struct variable *old_var)
153 struct variable *new_var = var_create (var_get_name (old_var),
154 var_get_width (old_var));
156 var_set_missing_values (new_var, var_get_missing_values (old_var));
157 var_set_print_format (new_var, var_get_print_format (old_var));
158 var_set_write_format (new_var, var_get_write_format (old_var));
159 var_set_value_labels (new_var, var_get_value_labels (old_var));
160 var_set_label (new_var, var_get_label (old_var));
161 var_set_measure (new_var, var_get_measure (old_var));
162 var_set_display_width (new_var, var_get_display_width (old_var));
163 var_set_alignment (new_var, var_get_alignment (old_var));
164 var_set_leave (new_var, var_get_leave (old_var));
169 /* Destroys variable V.
170 V must not belong to a dictionary. If it does, use
171 dict_delete_var instead. */
173 var_destroy (struct variable *v)
177 assert (!var_has_vardict (v));
178 cat_stored_values_destroy (v->obs_vals);
180 val_labs_destroy (v->val_labs);
186 /* Variable names. */
188 /* Return variable V's name. */
190 var_get_name (const struct variable *v)
195 /* Sets V's name to NAME.
196 Do not use this function for a variable in a dictionary. Use
197 dict_rename_var instead. */
199 var_set_name (struct variable *v, const char *name)
201 assert (v->vardict.dict_index == -1);
202 assert (var_is_plausible_name (name, false));
204 str_copy_trunc (v->name, sizeof v->name, name);
205 dict_var_changed (v);
208 /* Returns true if NAME is an acceptable name for a variable,
209 false otherwise. If ISSUE_ERROR is true, issues an
210 explanatory error message on failure. */
212 var_is_valid_name (const char *name, bool issue_error)
217 assert (name != NULL);
219 /* Note that strlen returns number of BYTES, not the number of
221 length = strlen (name);
223 plausible = var_is_plausible_name(name, issue_error);
229 if (!lex_is_id1 (name[0]))
232 msg (SE, _("Character `%c' (in %s) may not appear "
233 "as the first character in a variable name."),
239 for (i = 0; i < length; i++)
241 if (!lex_is_idn (name[i]))
244 msg (SE, _("Character `%c' (in %s) may not appear in "
254 /* Returns true if NAME is an plausible name for a variable,
255 false otherwise. If ISSUE_ERROR is true, issues an
256 explanatory error message on failure.
257 This function makes no use of LC_CTYPE.
260 var_is_plausible_name (const char *name, bool issue_error)
264 assert (name != NULL);
266 /* Note that strlen returns number of BYTES, not the number of
268 length = strlen (name);
272 msg (SE, _("Variable name cannot be empty string."));
275 else if (length > LONG_NAME_LEN)
278 msg (SE, _("Variable name %s exceeds %d-character limit."),
279 name, (int) LONG_NAME_LEN);
283 if (lex_id_to_token (ss_cstr (name)) != T_ID)
286 msg (SE, _("`%s' may not be used as a variable name because it "
287 "is a reserved word."), name);
294 /* A hsh_compare_func that orders variables A and B by their
297 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
299 const struct variable *a = a_;
300 const struct variable *b = b_;
302 return strcasecmp (a->name, b->name);
305 /* A hsh_hash_func that hashes variable V based on its name. */
307 hash_var_by_name (const void *v_, const void *aux UNUSED)
309 const struct variable *v = v_;
311 return hsh_hash_case_string (v->name);
314 /* A hsh_compare_func that orders pointers to variables A and B
317 compare_var_ptrs_by_name (const void *a_, const void *b_,
318 const void *aux UNUSED)
320 struct variable *const *a = a_;
321 struct variable *const *b = b_;
323 return strcasecmp (var_get_name (*a), var_get_name (*b));
326 /* A hsh_hash_func that hashes pointer to variable V based on its
329 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
331 struct variable *const *v = v_;
333 return hsh_hash_case_string (var_get_name (*v));
336 /* Returns the type of variable V. */
338 var_get_type (const struct variable *v)
340 return var_type_from_width (v->width);
343 /* Returns the width of variable V. */
345 var_get_width (const struct variable *v)
350 /* Sets the width of V to WIDTH. */
352 var_set_width (struct variable *v, int new_width)
354 enum var_type new_type = var_type_from_width (new_width);
356 if (mv_is_resizable (&v->miss, new_width))
357 mv_resize (&v->miss, new_width);
359 mv_init (&v->miss, new_width);
361 if (v->val_labs != NULL)
363 if (val_labs_can_set_width (v->val_labs, new_width))
364 val_labs_set_width (v->val_labs, new_width);
367 val_labs_destroy (v->val_labs);
372 if (var_get_type (v) != new_type)
374 v->print = (new_type == VAR_NUMERIC
375 ? fmt_for_output (FMT_F, 8, 2)
376 : fmt_for_output (FMT_A, new_width, 0));
379 else if (new_type == VAR_STRING)
381 v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width;
382 v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width;
385 v->width = new_width;
387 dict_var_changed (v);
390 /* Returns true if variable V is numeric, false otherwise. */
392 var_is_numeric (const struct variable *v)
394 return var_get_type (v) == VAR_NUMERIC;
397 /* Returns true if variable V is a string variable, false
400 var_is_alpha (const struct variable *v)
402 return var_get_type (v) == VAR_STRING;
405 /* Returns true if variable V is a short string variable, false
408 var_is_short_string (const struct variable *v)
410 return v->width > 0 && v->width <= MAX_SHORT_STRING;
413 /* Returns true if variable V is a long string variable, false
416 var_is_long_string (const struct variable *v)
418 return v->width > MAX_SHORT_STRING;
421 /* Returns the number of "union value"s need to store a value of
424 var_get_value_cnt (const struct variable *v)
426 return v->width == 0 ? 1 : DIV_RND_UP (v->width, MAX_SHORT_STRING);
429 /* Returns variable V's missing values. */
430 const struct missing_values *
431 var_get_missing_values (const struct variable *v)
436 /* Sets variable V's missing values to MISS, which must be of V's
437 width or at least resizable to V's width.
438 If MISS is null, then V's missing values, if any, are
441 var_set_missing_values (struct variable *v, const struct missing_values *miss)
445 assert (mv_is_resizable (miss, v->width));
446 mv_copy (&v->miss, miss);
447 mv_resize (&v->miss, v->width);
450 mv_init (&v->miss, v->width);
452 dict_var_changed (v);
455 /* Sets variable V to have no user-missing values. */
457 var_clear_missing_values (struct variable *v)
459 var_set_missing_values (v, NULL);
462 /* Returns true if V has any user-missing values,
465 var_has_missing_values (const struct variable *v)
467 return !mv_is_empty (&v->miss);
470 /* Returns true if VALUE is in the given CLASS of missing values
471 in V, false otherwise. */
473 var_is_value_missing (const struct variable *v, const union value *value,
476 return mv_is_value_missing (&v->miss, value, class);
479 /* Returns true if D is in the given CLASS of missing values in
481 V must be a numeric variable. */
483 var_is_num_missing (const struct variable *v, double d, enum mv_class class)
485 return mv_is_num_missing (&v->miss, d, class);
488 /* Returns true if S[] is a missing value for V, false otherwise.
489 S[] must contain exactly as many characters as V's width.
490 V must be a string variable. */
492 var_is_str_missing (const struct variable *v, const char s[],
495 return mv_is_str_missing (&v->miss, s, class);
498 /* Returns variable V's value labels,
499 possibly a null pointer if it has none. */
500 const struct val_labs *
501 var_get_value_labels (const struct variable *v)
506 /* Returns true if variable V has at least one value label. */
508 var_has_value_labels (const struct variable *v)
510 return val_labs_count (v->val_labs) > 0;
513 /* Sets variable V's value labels to a copy of VLS,
514 which must have a width equal to V's width or one that can be
515 changed to V's width.
516 If VLS is null, then V's value labels, if any, are removed. */
518 var_set_value_labels (struct variable *v, const struct val_labs *vls)
520 val_labs_destroy (v->val_labs);
525 assert (val_labs_can_set_width (vls, v->width));
526 v->val_labs = val_labs_copy (vls);
527 val_labs_set_width (v->val_labs, v->width);
528 dict_var_changed (v);
532 /* Makes sure that V has a set of value labels,
533 by assigning one to it if necessary. */
535 alloc_value_labels (struct variable *v)
537 assert (!var_is_long_string (v));
538 if (v->val_labs == NULL)
539 v->val_labs = val_labs_create (v->width);
542 /* Attempts to add a value label with the given VALUE and LABEL
543 to V. Returns true if successful, false if VALUE has an
545 V must not be a long string variable. */
547 var_add_value_label (struct variable *v,
548 const union value *value, const char *label)
550 alloc_value_labels (v);
551 return val_labs_add (v->val_labs, *value, label);
554 /* Adds or replaces a value label with the given VALUE and LABEL
556 V must not be a long string variable. */
558 var_replace_value_label (struct variable *v,
559 const union value *value, const char *label)
561 alloc_value_labels (v);
562 val_labs_replace (v->val_labs, *value, label);
565 /* Removes V's value labels, if any. */
567 var_clear_value_labels (struct variable *v)
569 var_set_value_labels (v, NULL);
572 /* Returns the label associated with VALUE for variable V,
573 or a null pointer if none. */
575 var_lookup_value_label (const struct variable *v, const union value *value)
577 return val_labs_find (v->val_labs, *value);
580 /* Get a string representing VALUE for variable V.
581 That is, if VALUE has a label, return that label,
582 otherwise format VALUE and return the formatted string. */
584 var_get_value_name (const struct variable *v, const union value *value)
586 const char *name = var_lookup_value_label (v, value);
589 static char buf[MAX_STRING + 1];
590 data_out (value, &v->print, buf);
591 buf[v->print.w] = '\0';
597 /* Print and write formats. */
599 /* Returns V's print format specification. */
600 const struct fmt_spec *
601 var_get_print_format (const struct variable *v)
606 /* Sets V's print format specification to PRINT, which must be a
607 valid format specification for outputting a variable of V's
610 var_set_print_format (struct variable *v, const struct fmt_spec *print)
612 assert (fmt_check_width_compat (print, v->width));
614 dict_var_changed (v);
617 /* Returns V's write format specification. */
618 const struct fmt_spec *
619 var_get_write_format (const struct variable *v)
624 /* Sets V's write format specification to WRITE, which must be a
625 valid format specification for outputting a variable of V's
628 var_set_write_format (struct variable *v, const struct fmt_spec *write)
630 assert (fmt_check_width_compat (write, v->width));
632 dict_var_changed (v);
635 /* Sets V's print and write format specifications to FORMAT,
636 which must be a valid format specification for outputting a
637 variable of V's width. */
639 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
641 var_set_print_format (v, format);
642 var_set_write_format (v, format);
645 /* Return a string representing this variable, in the form most
646 appropriate from a human factors perspective, that is, its
647 variable label if it has one, otherwise its name. */
649 var_to_string (const struct variable *v)
651 return v->label != NULL ? v->label : v->name;
654 /* Returns V's variable label, or a null pointer if it has none. */
656 var_get_label (const struct variable *v)
661 /* Sets V's variable label to LABEL, stripping off leading and
662 trailing white space and truncating to 255 characters.
663 If LABEL is a null pointer or if LABEL is an empty string
664 (after stripping white space), then V's variable label (if
667 var_set_label (struct variable *v, const char *label)
674 struct substring s = ss_cstr (label);
675 ss_trim (&s, ss_cstr (CC_SPACES));
676 ss_truncate (&s, 255);
677 if (!ss_is_empty (s))
678 v->label = ss_xstrdup (s);
679 dict_var_changed (v);
683 /* Removes any variable label from V. */
685 var_clear_label (struct variable *v)
687 var_set_label (v, NULL);
690 /* Returns true if V has a variable V,
693 var_has_label (const struct variable *v)
695 return v->label != NULL;
698 /* Returns true if M is a valid variable measurement level,
701 measure_is_valid (enum measure m)
703 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
706 /* Returns V's measurement level. */
708 var_get_measure (const struct variable *v)
713 /* Sets V's measurement level to MEASURE. */
715 var_set_measure (struct variable *v, enum measure measure)
717 assert (measure_is_valid (measure));
718 v->measure = measure;
719 dict_var_changed (v);
722 /* Returns V's display width, which applies only to GUIs. */
724 var_get_display_width (const struct variable *v)
726 return v->display_width;
732 /* Sets V's display width to DISPLAY_WIDTH. */
734 var_set_display_width (struct variable *v, int display_width)
736 v->display_width = display_width;
737 dict_var_changed (v);
740 /* Returns true if A is a valid alignment,
743 alignment_is_valid (enum alignment a)
745 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
748 /* Returns V's display alignment, which applies only to GUIs. */
750 var_get_alignment (const struct variable *v)
755 /* Sets V's display alignment to ALIGNMENT. */
757 var_set_alignment (struct variable *v, enum alignment alignment)
759 assert (alignment_is_valid (alignment));
760 v->alignment = alignment;
761 dict_var_changed (v);
764 /* Whether variables' values should be preserved from case to
767 /* Returns true if variable V's value should be left from case to
768 case, instead of being reset to 0, system-missing, or blanks. */
770 var_get_leave (const struct variable *v)
775 /* Sets V's leave setting to LEAVE. */
777 var_set_leave (struct variable *v, bool leave)
779 assert (leave || !var_must_leave (v));
781 dict_var_changed (v);
784 /* Returns true if V must be left from case to case,
785 false if it can be set either way. */
787 var_must_leave (const struct variable *v)
789 return dict_class_from_id (v->name) == DC_SCRATCH;
792 /* Returns V's short name, if it has one, or a null pointer
795 Short names are used only for system and portable file input
796 and output. They are upper-case only, not necessarily unique,
797 and limited to SHORT_NAME_LEN characters (plus a null
798 terminator). Any variable may have no short name, indicated
799 by returning a null pointer. */
801 var_get_short_name (const struct variable *v)
803 return v->short_name[0] != '\0' ? v->short_name : NULL;
806 /* Sets V's short_name to SHORT_NAME, truncating it to
807 SHORT_NAME_LEN characters and converting it to uppercase in
808 the process. Specifying a null pointer for SHORT_NAME clears
809 the variable's short name. */
811 var_set_short_name (struct variable *v, const char *short_name)
814 assert (short_name == NULL || var_is_plausible_name (short_name, false));
816 if (short_name != NULL)
818 str_copy_trunc (v->short_name, sizeof v->short_name, short_name);
819 str_uppercase (v->short_name);
822 v->short_name[0] = '\0';
823 dict_var_changed (v);
826 /* Clears V's short name. */
828 var_clear_short_name (struct variable *v)
832 v->short_name[0] = '\0';
835 /* Relationship with dictionary. */
837 /* Returns V's index within its dictionary, the value
838 for which "dict_get_var (dict, index)" will return V.
839 V must be in a dictionary. */
841 var_get_dict_index (const struct variable *v)
843 assert (v->vardict.dict_index != -1);
844 return v->vardict.dict_index;
847 /* Returns V's index within the case represented by its
848 dictionary, that is, the value for which "case_data_idx (case,
849 index)" will return the data for V in that case.
850 V must be in a dictionary. */
852 var_get_case_index (const struct variable *v)
854 assert (v->vardict.case_index != -1);
855 return v->vardict.case_index;
858 /* Returns V's auxiliary data, or a null pointer if none has been
861 var_get_aux (const struct variable *v)
866 /* Assign auxiliary data AUX to variable V, which must not
867 already have auxiliary data. Before V's auxiliary data is
868 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
869 may be appropriate for use as AUX_DTOR.) */
871 var_attach_aux (const struct variable *v_,
872 void *aux, void (*aux_dtor) (struct variable *))
874 struct variable *v = (struct variable *) v_ ; /* cast away const */
875 assert (v->aux == NULL);
876 assert (aux != NULL);
878 v->aux_dtor = aux_dtor;
882 /* Remove auxiliary data, if any, from V, and return it, without
883 calling any associated destructor. */
885 var_detach_aux (struct variable *v)
888 assert (aux != NULL);
893 /* Clears auxiliary data, if any, from V, and calls any
894 associated destructor. */
896 var_clear_aux (struct variable *v)
901 if (v->aux_dtor != NULL)
907 /* This function is appropriate for use an auxiliary data
908 destructor (passed as AUX_DTOR to var_attach_aux()) for the
909 case where the auxiliary data should be passed to free(). */
911 var_dtor_free (struct variable *v)
916 /* Observed categorical values. */
918 /* Returns V's observed categorical values,
919 which V must have. */
921 var_get_obs_vals (const struct variable *v)
923 assert (v->obs_vals != NULL);
927 /* Sets V's observed categorical values to CAT_VALS. */
929 var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
931 struct variable *v = (struct variable *) v_ ; /* cast away const */
932 cat_stored_values_destroy (v->obs_vals);
933 v->obs_vals = cat_vals;
936 /* Returns true if V has observed categorical values,
939 var_has_obs_vals (const struct variable *v)
941 return v->obs_vals != NULL;
944 /* Returns the dictionary class corresponding to a variable named
947 dict_class_from_id (const char *name)
960 /* Returns the name of dictionary class DICT_CLASS. */
962 dict_class_to_name (enum dict_class dict_class)
967 return _("ordinary");
977 /* Returns V's vardict structure. */
978 const struct vardict_info *
979 var_get_vardict (const struct variable *v)
981 assert (var_has_vardict (v));
985 /* Sets V's vardict data to VARDICT. */
987 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
989 assert (vardict->dict_index >= 0);
990 assert (vardict->case_index >= 0);
991 v->vardict = *vardict;
994 /* Returns true if V has vardict data. */
996 var_has_vardict (const struct variable *v)
998 return v->vardict.dict_index != -1;
1001 /* Clears V's vardict data. */
1003 var_clear_vardict (struct variable *v)
1005 v->vardict.dict_index = v->vardict.case_index = -1;