1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 #include "cat-routines.h"
28 #include "dictionary.h"
30 #include "identifier.h"
31 #include "missing-values.h"
33 #include "value-labels.h"
36 #include <libpspp/alloc.h>
37 #include <libpspp/assertion.h>
38 #include <libpspp/compiler.h>
39 #include <libpspp/hash.h>
40 #include <libpspp/message.h>
41 #include <libpspp/misc.h>
42 #include <libpspp/str.h>
47 #define _(msgid) gettext (msgid)
52 /* Dictionary information. */
53 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
54 int width; /* 0 for numeric, otherwise string width. */
55 struct missing_values miss; /* Missing values. */
56 struct fmt_spec print; /* Default format for PRINT. */
57 struct fmt_spec write; /* Default format for WRITE. */
58 struct val_labs *val_labs; /* Value labels. */
59 char *label; /* Variable label. */
61 /* GUI information. */
62 enum measure measure; /* Nominal, ordinal, or continuous. */
63 int display_width; /* Width of data editor column. */
64 enum alignment alignment; /* Alignment of data in GUI. */
66 /* Case information. */
67 bool leave; /* Leave value from case to case? */
69 /* Data for use by containing dictionary. */
70 struct vardict_info vardict;
72 /* Short name, used only for system and portable file input
73 and output. Upper case only. There is no index for short
74 names. Short names are not necessarily unique. Any
75 variable may have no short name, indicated by an empty
77 char short_name[SHORT_NAME_LEN + 1];
79 /* Each command may use these fields as needed. */
81 void (*aux_dtor) (struct variable *);
83 /* Values of a categorical variable. Procedures need
84 vectors with binary entries, so any variable of type ALPHA will
85 have its values stored here. */
86 struct cat_vals *obs_vals;
89 /* Returns true if VAR_TYPE is a valid variable type. */
91 var_type_is_valid (enum var_type var_type)
93 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
96 /* Returns the variable type for the given width. */
98 var_type_from_width (int width)
100 return width != 0 ? VAR_STRING : VAR_NUMERIC;
103 /* Creates and returns a new variable with the given NAME and
104 WIDTH and other fields initialized to default values. The
105 variable is not added to a dictionary; for that, use
106 dict_create_var instead. */
108 var_create (const char *name, int width)
112 assert (width >= 0 && width <= MAX_STRING);
114 v = xmalloc (sizeof *v);
115 v->vardict.dict_index = v->vardict.case_index = -1;
116 var_set_name (v, name);
118 mv_init (&v->miss, width);
119 v->leave = var_must_leave (v);
120 if (var_is_numeric (v))
122 v->print = fmt_for_output (FMT_F, 8, 2);
123 v->alignment = ALIGN_RIGHT;
124 v->display_width = 8;
125 v->measure = MEASURE_SCALE;
129 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
130 v->alignment = ALIGN_LEFT;
131 v->display_width = 8;
132 v->measure = MEASURE_NOMINAL;
137 var_clear_short_name (v);
145 /* Creates and returns a clone of OLD_VAR. Most properties of
146 the new variable are copied from OLD_VAR, except:
148 - The variable's short name is not copied, because there is
149 no reason to give a new variable with potentially a new
150 name the same short name.
152 - The new variable is not added to OLD_VAR's dictionary by
153 default. Use dict_clone_var, instead, to do that.
155 - Auxiliary data and obs_vals are not copied. */
157 var_clone (const struct variable *old_var)
159 struct variable *new_var = var_create (var_get_name (old_var),
160 var_get_width (old_var));
162 var_set_missing_values (new_var, var_get_missing_values (old_var));
163 var_set_print_format (new_var, var_get_print_format (old_var));
164 var_set_write_format (new_var, var_get_write_format (old_var));
165 var_set_value_labels (new_var, var_get_value_labels (old_var));
166 var_set_label (new_var, var_get_label (old_var));
167 var_set_measure (new_var, var_get_measure (old_var));
168 var_set_display_width (new_var, var_get_display_width (old_var));
169 var_set_alignment (new_var, var_get_alignment (old_var));
170 var_set_leave (new_var, var_get_leave (old_var));
175 /* Destroys variable V.
176 V must not belong to a dictionary. If it does, use
177 dict_delete_var instead. */
179 var_destroy (struct variable *v)
183 assert (!var_has_vardict (v));
184 cat_stored_values_destroy (v->obs_vals);
186 val_labs_destroy (v->val_labs);
192 /* Variable names. */
194 /* Return variable V's name. */
196 var_get_name (const struct variable *v)
201 /* Sets V's name to NAME.
202 Do not use this function for a variable in a dictionary. Use
203 dict_rename_var instead. */
205 var_set_name (struct variable *v, const char *name)
207 assert (v->vardict.dict_index == -1);
208 assert (var_is_plausible_name (name, false));
210 str_copy_trunc (v->name, sizeof v->name, name);
213 /* Returns true if NAME is an acceptable name for a variable,
214 false otherwise. If ISSUE_ERROR is true, issues an
215 explanatory error message on failure. */
217 var_is_valid_name (const char *name, bool issue_error)
222 assert (name != NULL);
224 /* Note that strlen returns number of BYTES, not the number of
226 length = strlen (name);
228 plausible = var_is_plausible_name(name, issue_error);
234 if (!lex_is_id1 (name[0]))
237 msg (SE, _("Character `%c' (in %s) may not appear "
238 "as the first character in a variable name."),
244 for (i = 0; i < length; i++)
246 if (!lex_is_idn (name[i]))
249 msg (SE, _("Character `%c' (in %s) may not appear in "
259 /* Returns true if NAME is an plausible name for a variable,
260 false otherwise. If ISSUE_ERROR is true, issues an
261 explanatory error message on failure.
262 This function makes no use of LC_CTYPE.
265 var_is_plausible_name (const char *name, bool issue_error)
269 assert (name != NULL);
271 /* Note that strlen returns number of BYTES, not the number of
273 length = strlen (name);
277 msg (SE, _("Variable name cannot be empty string."));
280 else if (length > LONG_NAME_LEN)
283 msg (SE, _("Variable name %s exceeds %d-character limit."),
284 name, (int) LONG_NAME_LEN);
288 if (lex_id_to_token (ss_cstr (name)) != T_ID)
291 msg (SE, _("`%s' may not be used as a variable name because it "
292 "is a reserved word."), name);
299 /* A hsh_compare_func that orders variables A and B by their
302 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
304 const struct variable *a = a_;
305 const struct variable *b = b_;
307 return strcasecmp (a->name, b->name);
310 /* A hsh_hash_func that hashes variable V based on its name. */
312 hash_var_by_name (const void *v_, const void *aux UNUSED)
314 const struct variable *v = v_;
316 return hsh_hash_case_string (v->name);
319 /* A hsh_compare_func that orders pointers to variables A and B
322 compare_var_ptrs_by_name (const void *a_, const void *b_,
323 const void *aux UNUSED)
325 struct variable *const *a = a_;
326 struct variable *const *b = b_;
328 return strcasecmp (var_get_name (*a), var_get_name (*b));
331 /* A hsh_hash_func that hashes pointer to variable V based on its
334 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
336 struct variable *const *v = v_;
338 return hsh_hash_case_string (var_get_name (*v));
341 /* Returns the type of variable V. */
343 var_get_type (const struct variable *v)
345 return var_type_from_width (v->width);
348 /* Returns the width of variable V. */
350 var_get_width (const struct variable *v)
355 /* Sets the width of V to WIDTH. */
357 var_set_width (struct variable *v, int new_width)
359 enum var_type new_type = var_type_from_width (new_width);
361 if (mv_is_resizable (&v->miss, new_width))
362 mv_resize (&v->miss, new_width);
364 mv_init (&v->miss, new_width);
366 if (v->val_labs != NULL)
368 if (val_labs_can_set_width (v->val_labs, new_width))
369 val_labs_set_width (v->val_labs, new_width);
372 val_labs_destroy (v->val_labs);
377 if (var_get_type (v) != new_type)
379 v->print = (new_type == VAR_NUMERIC
380 ? fmt_for_output (FMT_F, 8, 2)
381 : fmt_for_output (FMT_A, new_width, 0));
384 else if (new_type == VAR_STRING)
386 v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width;
387 v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width;
390 v->width = new_width;
393 /* Returns true if variable V is numeric, false otherwise. */
395 var_is_numeric (const struct variable *v)
397 return var_get_type (v) == VAR_NUMERIC;
400 /* Returns true if variable V is a string variable, false
403 var_is_alpha (const struct variable *v)
405 return var_get_type (v) == VAR_STRING;
408 /* Returns true if variable V is a short string variable, false
411 var_is_short_string (const struct variable *v)
413 return v->width > 0 && v->width <= MAX_SHORT_STRING;
416 /* Returns true if variable V is a long string variable, false
419 var_is_long_string (const struct variable *v)
421 return v->width > MAX_SHORT_STRING;
424 /* Returns the number of "union value"s need to store a value of
427 var_get_value_cnt (const struct variable *v)
429 return v->width == 0 ? 1 : DIV_RND_UP (v->width, MAX_SHORT_STRING);
432 /* Returns variable V's missing values. */
433 const struct missing_values *
434 var_get_missing_values (const struct variable *v)
439 /* Sets variable V's missing values to MISS, which must be of V's
440 width or at least resizable to V's width.
441 If MISS is null, then V's missing values, if any, are
444 var_set_missing_values (struct variable *v, const struct missing_values *miss)
448 assert (mv_is_resizable (miss, v->width));
449 mv_copy (&v->miss, miss);
450 mv_resize (&v->miss, v->width);
453 mv_init (&v->miss, v->width);
456 /* Sets variable V to have no user-missing values. */
458 var_clear_missing_values (struct variable *v)
460 var_set_missing_values (v, NULL);
463 /* Returns true if V has any user-missing values,
466 var_has_missing_values (const struct variable *v)
468 return !mv_is_empty (&v->miss);
471 /* Returns true if VALUE is system missing or user-missing value
472 for V, false otherwise. */
474 var_is_value_missing (const struct variable *v, const union value *value)
476 return mv_is_value_missing (&v->miss, value);
479 /* Returns true if D is system missing or a missing value in V,
481 V must be a numeric variable. */
483 var_is_num_missing (const struct variable *v, double d)
485 return mv_is_num_missing (&v->miss, d);
488 /* Returns true if S[] is a missing value for V, false otherwise.
489 S[] must contain exactly as many characters as V's width.
490 V must be a string variable. */
492 var_is_str_missing (const struct variable *v, const char s[])
494 return mv_is_str_missing (&v->miss, s);
497 /* Returns true if VALUE is a missing value for V, false
500 var_is_value_user_missing (const struct variable *v, const union value *value)
502 return mv_is_value_user_missing (&v->miss, value);
505 /* Returns true if D is a user-missing value for V, false
506 otherwise. V must be a numeric variable. */
508 var_is_num_user_missing (const struct variable *v, double d)
510 return mv_is_num_user_missing (&v->miss, d);
513 /* Returns true if S[] is a missing value for V, false otherwise.
514 V must be a string variable.
515 S[] must contain exactly as many characters as V's width. */
517 var_is_str_user_missing (const struct variable *v, const char s[])
519 return mv_is_str_user_missing (&v->miss, s);
522 /* Returns true if V is a numeric variable and VALUE is the
523 system missing value. */
525 var_is_value_system_missing (const struct variable *v,
526 const union value *value)
528 return mv_is_value_system_missing (&v->miss, value);
531 /* Returns variable V's value labels,
532 possibly a null pointer if it has none. */
533 const struct val_labs *
534 var_get_value_labels (const struct variable *v)
539 /* Returns true if variable V has at least one value label. */
541 var_has_value_labels (const struct variable *v)
543 return val_labs_count (v->val_labs) > 0;
546 /* Sets variable V's value labels to a copy of VLS,
547 which must have a width equal to V's width or one that can be
548 changed to V's width.
549 If VLS is null, then V's value labels, if any, are removed. */
551 var_set_value_labels (struct variable *v, const struct val_labs *vls)
553 val_labs_destroy (v->val_labs);
558 assert (val_labs_can_set_width (vls, v->width));
559 v->val_labs = val_labs_copy (vls);
560 val_labs_set_width (v->val_labs, v->width);
564 /* Makes sure that V has a set of value labels,
565 by assigning one to it if necessary. */
567 alloc_value_labels (struct variable *v)
569 assert (!var_is_long_string (v));
570 if (v->val_labs == NULL)
571 v->val_labs = val_labs_create (v->width);
574 /* Attempts to add a value label with the given VALUE and LABEL
575 to V. Returns true if successful, false if VALUE has an
577 V must not be a long string variable. */
579 var_add_value_label (struct variable *v,
580 const union value *value, const char *label)
582 alloc_value_labels (v);
583 return val_labs_add (v->val_labs, *value, label);
586 /* Adds or replaces a value label with the given VALUE and LABEL
588 V must not be a long string variable. */
590 var_replace_value_label (struct variable *v,
591 const union value *value, const char *label)
593 alloc_value_labels (v);
594 val_labs_replace (v->val_labs, *value, label);
597 /* Removes V's value labels, if any. */
599 var_clear_value_labels (struct variable *v)
601 var_set_value_labels (v, NULL);
604 /* Returns the label associated with VALUE for variable V,
605 or a null pointer if none. */
607 var_lookup_value_label (const struct variable *v, const union value *value)
609 return val_labs_find (v->val_labs, *value);
612 /* Get a string representing VALUE for variable V.
613 That is, if VALUE has a label, return that label,
614 otherwise format VALUE and return the formatted string. */
616 var_get_value_name (const struct variable *v, const union value *value)
618 const char *name = var_lookup_value_label (v, value);
621 static char buf[MAX_STRING + 1];
622 data_out (value, &v->print, buf);
623 buf[v->print.w] = '\0';
629 /* Print and write formats. */
631 /* Returns V's print format specification. */
632 const struct fmt_spec *
633 var_get_print_format (const struct variable *v)
638 /* Sets V's print format specification to PRINT, which must be a
639 valid format specification for outputting a variable of V's
642 var_set_print_format (struct variable *v, const struct fmt_spec *print)
644 assert (fmt_check_width_compat (print, v->width));
648 /* Returns V's write format specification. */
649 const struct fmt_spec *
650 var_get_write_format (const struct variable *v)
655 /* Sets V's write format specification to WRITE, which must be a
656 valid format specification for outputting a variable of V's
659 var_set_write_format (struct variable *v, const struct fmt_spec *write)
661 assert (fmt_check_width_compat (write, v->width));
665 /* Sets V's print and write format specifications to FORMAT,
666 which must be a valid format specification for outputting a
667 variable of V's width. */
669 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
671 var_set_print_format (v, format);
672 var_set_write_format (v, format);
675 /* Return a string representing this variable, in the form most
676 appropriate from a human factors perspective, that is, its
677 variable label if it has one, otherwise its name. */
679 var_to_string (const struct variable *v)
681 return v->label != NULL ? v->label : v->name;
684 /* Returns V's variable label, or a null pointer if it has none. */
686 var_get_label (const struct variable *v)
691 /* Sets V's variable label to LABEL, stripping off leading and
692 trailing white space and truncating to 255 characters.
693 If LABEL is a null pointer or if LABEL is an empty string
694 (after stripping white space), then V's variable label (if
697 var_set_label (struct variable *v, const char *label)
704 struct substring s = ss_cstr (label);
705 ss_trim (&s, ss_cstr (CC_SPACES));
706 ss_truncate (&s, 255);
707 if (!ss_is_empty (s))
708 v->label = ss_xstrdup (s);
712 /* Removes any variable label from V. */
714 var_clear_label (struct variable *v)
716 var_set_label (v, NULL);
719 /* Returns true if V has a variable V,
722 var_has_label (const struct variable *v)
724 return v->label != NULL;
727 /* Returns true if M is a valid variable measurement level,
730 measure_is_valid (enum measure m)
732 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
735 /* Returns V's measurement level. */
737 var_get_measure (const struct variable *v)
742 /* Sets V's measurement level to MEASURE. */
744 var_set_measure (struct variable *v, enum measure measure)
746 assert (measure_is_valid (measure));
747 v->measure = measure;
750 /* Returns V's display width, which applies only to GUIs. */
752 var_get_display_width (const struct variable *v)
754 return v->display_width;
757 /* Sets V's display width to DISPLAY_WIDTH. */
759 var_set_display_width (struct variable *v, int display_width)
761 v->display_width = display_width;
764 /* Returns true if A is a valid alignment,
767 alignment_is_valid (enum alignment a)
769 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
772 /* Returns V's display alignment, which applies only to GUIs. */
774 var_get_alignment (const struct variable *v)
779 /* Sets V's display alignment to ALIGNMENT. */
781 var_set_alignment (struct variable *v, enum alignment alignment)
783 assert (alignment_is_valid (alignment));
784 v->alignment = alignment;
787 /* Whether variables' values should be preserved from case to
790 /* Returns true if variable V's value should be left from case to
791 case, instead of being reset to 0, system-missing, or blanks. */
793 var_get_leave (const struct variable *v)
798 /* Sets V's leave setting to LEAVE. */
800 var_set_leave (struct variable *v, bool leave)
802 assert (leave || !var_must_leave (v));
806 /* Returns true if V must be left from case to case,
807 false if it can be set either way. */
809 var_must_leave (const struct variable *v)
811 return dict_class_from_id (v->name) == DC_SCRATCH;
814 /* Returns V's short name, if it has one, or a null pointer
817 Short names are used only for system and portable file input
818 and output. They are upper-case only, not necessarily unique,
819 and limited to SHORT_NAME_LEN characters (plus a null
820 terminator). Any variable may have no short name, indicated
821 by returning a null pointer. */
823 var_get_short_name (const struct variable *v)
825 return v->short_name[0] != '\0' ? v->short_name : NULL;
828 /* Sets V's short_name to SHORT_NAME, truncating it to
829 SHORT_NAME_LEN characters and converting it to uppercase in
830 the process. Specifying a null pointer for SHORT_NAME clears
831 the variable's short name. */
833 var_set_short_name (struct variable *v, const char *short_name)
836 assert (short_name == NULL || var_is_plausible_name (short_name, false));
838 if (short_name != NULL)
840 str_copy_trunc (v->short_name, sizeof v->short_name, short_name);
841 str_uppercase (v->short_name);
844 v->short_name[0] = '\0';
847 /* Clears V's short name. */
849 var_clear_short_name (struct variable *v)
853 v->short_name[0] = '\0';
856 /* Relationship with dictionary. */
858 /* Returns V's index within its dictionary, the value
859 for which "dict_get_var (dict, index)" will return V.
860 V must be in a dictionary. */
862 var_get_dict_index (const struct variable *v)
864 assert (v->vardict.dict_index != -1);
865 return v->vardict.dict_index;
868 /* Returns V's index within the case represented by its
869 dictionary, that is, the value for which "case_data_idx (case,
870 index)" will return the data for V in that case.
871 V must be in a dictionary. */
873 var_get_case_index (const struct variable *v)
875 assert (v->vardict.case_index != -1);
876 return v->vardict.case_index;
879 /* Returns V's auxiliary data, or a null pointer if none has been
882 var_get_aux (const struct variable *v)
887 /* Assign auxiliary data AUX to variable V, which must not
888 already have auxiliary data. Before V's auxiliary data is
889 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
890 may be appropriate for use as AUX_DTOR.) */
892 var_attach_aux (struct variable *v,
893 void *aux, void (*aux_dtor) (struct variable *))
895 assert (v->aux == NULL);
896 assert (aux != NULL);
898 v->aux_dtor = aux_dtor;
902 /* Remove auxiliary data, if any, from V, and return it, without
903 calling any associated destructor. */
905 var_detach_aux (struct variable *v)
908 assert (aux != NULL);
913 /* Clears auxiliary data, if any, from V, and calls any
914 associated destructor. */
916 var_clear_aux (struct variable *v)
921 if (v->aux_dtor != NULL)
927 /* This function is appropriate for use an auxiliary data
928 destructor (passed as AUX_DTOR to var_attach_aux()) for the
929 case where the auxiliary data should be passed to free(). */
931 var_dtor_free (struct variable *v)
936 /* Observed categorical values. */
938 /* Returns V's observed categorical values,
939 which V must have. */
941 var_get_obs_vals (const struct variable *v)
943 assert (v->obs_vals != NULL);
947 /* Sets V's observed categorical values to CAT_VALS. */
949 var_set_obs_vals (struct variable *v, struct cat_vals *cat_vals)
951 cat_stored_values_destroy (v->obs_vals);
952 v->obs_vals = cat_vals;
955 /* Returns true if V has observed categorical values,
958 var_has_obs_vals (const struct variable *v)
960 return v->obs_vals != NULL;
963 /* Returns the dictionary class corresponding to a variable named
966 dict_class_from_id (const char *name)
979 /* Returns the name of dictionary class DICT_CLASS. */
981 dict_class_to_name (enum dict_class dict_class)
986 return _("ordinary");
996 /* Returns V's vardict structure. */
997 const struct vardict_info *
998 var_get_vardict (const struct variable *v)
1000 assert (var_has_vardict (v));
1004 /* Sets V's vardict data to VARDICT. */
1006 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1008 assert (vardict->dict_index >= 0);
1009 assert (vardict->case_index >= 0);
1010 v->vardict = *vardict;
1013 /* Returns true if V has vardict data. */
1015 var_has_vardict (const struct variable *v)
1017 return v->vardict.dict_index != -1;
1020 /* Clears V's vardict data. */
1022 var_clear_vardict (struct variable *v)
1024 v->vardict.dict_index = v->vardict.case_index = -1;