1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "cat-routines.h"
27 #include "dictionary.h"
29 #include "identifier.h"
30 #include "missing-values.h"
32 #include "value-labels.h"
35 #include <libpspp/alloc.h>
36 #include <libpspp/assertion.h>
37 #include <libpspp/compiler.h>
38 #include <libpspp/hash.h>
39 #include <libpspp/message.h>
40 #include <libpspp/misc.h>
41 #include <libpspp/str.h>
46 #define _(msgid) gettext (msgid)
51 /* Dictionary information. */
52 char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */
53 int width; /* 0 for numeric, otherwise string width. */
54 struct missing_values miss; /* Missing values. */
55 struct fmt_spec print; /* Default format for PRINT. */
56 struct fmt_spec write; /* Default format for WRITE. */
57 struct val_labs *val_labs; /* Value labels. */
58 char *label; /* Variable label. */
60 /* GUI information. */
61 enum measure measure; /* Nominal, ordinal, or continuous. */
62 int display_width; /* Width of data editor column. */
63 enum alignment alignment; /* Alignment of data in GUI. */
65 /* Case information. */
66 bool leave; /* Leave value from case to case? */
68 /* Data for use by containing dictionary. */
69 struct vardict_info vardict;
71 /* Short name, used only for system and portable file input
72 and output. Upper case only. There is no index for short
73 names. Short names are not necessarily unique. Any
74 variable may have no short name, indicated by an empty
76 char short_name[SHORT_NAME_LEN + 1];
78 /* Each command may use these fields as needed. */
80 void (*aux_dtor) (struct variable *);
82 /* Values of a categorical variable. Procedures need
83 vectors with binary entries, so any variable of type ALPHA will
84 have its values stored here. */
85 struct cat_vals *obs_vals;
88 /* Returns true if VAR_TYPE is a valid variable type. */
90 var_type_is_valid (enum var_type var_type)
92 return var_type == VAR_NUMERIC || var_type == VAR_STRING;
95 /* Returns the variable type for the given width. */
97 var_type_from_width (int width)
99 return width != 0 ? VAR_STRING : VAR_NUMERIC;
102 /* Creates and returns a new variable with the given NAME and
103 WIDTH and other fields initialized to default values. The
104 variable is not added to a dictionary; for that, use
105 dict_create_var instead. */
107 var_create (const char *name, int width)
111 assert (width >= 0 && width <= MAX_STRING);
113 v = xmalloc (sizeof *v);
114 v->vardict.dict_index = v->vardict.case_index = -1;
115 var_set_name (v, name);
117 mv_init (&v->miss, width);
118 v->leave = var_must_leave (v);
119 if (var_is_numeric (v))
121 v->print = fmt_for_output (FMT_F, 8, 2);
122 v->alignment = ALIGN_RIGHT;
123 v->display_width = 8;
124 v->measure = MEASURE_SCALE;
128 v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
129 v->alignment = ALIGN_LEFT;
130 v->display_width = 8;
131 v->measure = MEASURE_NOMINAL;
136 var_clear_short_name (v);
144 /* Creates and returns a clone of OLD_VAR. Most properties of
145 the new variable are copied from OLD_VAR, except:
147 - The variable's short name is not copied, because there is
148 no reason to give a new variable with potentially a new
149 name the same short name.
151 - The new variable is not added to OLD_VAR's dictionary by
152 default. Use dict_clone_var, instead, to do that.
154 - Auxiliary data and obs_vals are not copied. */
156 var_clone (const struct variable *old_var)
158 struct variable *new_var = var_create (var_get_name (old_var),
159 var_get_width (old_var));
161 var_set_missing_values (new_var, var_get_missing_values (old_var));
162 var_set_print_format (new_var, var_get_print_format (old_var));
163 var_set_write_format (new_var, var_get_write_format (old_var));
164 var_set_value_labels (new_var, var_get_value_labels (old_var));
165 var_set_label (new_var, var_get_label (old_var));
166 var_set_measure (new_var, var_get_measure (old_var));
167 var_set_display_width (new_var, var_get_display_width (old_var));
168 var_set_alignment (new_var, var_get_alignment (old_var));
169 var_set_leave (new_var, var_get_leave (old_var));
174 /* Destroys variable V.
175 V must not belong to a dictionary. If it does, use
176 dict_delete_var instead. */
178 var_destroy (struct variable *v)
182 assert (!var_has_vardict (v));
183 cat_stored_values_destroy (v->obs_vals);
185 val_labs_destroy (v->val_labs);
191 /* Variable names. */
193 /* Return variable V's name. */
195 var_get_name (const struct variable *v)
200 /* Sets V's name to NAME.
201 Do not use this function for a variable in a dictionary. Use
202 dict_rename_var instead. */
204 var_set_name (struct variable *v, const char *name)
206 assert (v->vardict.dict_index == -1);
207 assert (var_is_plausible_name (name, false));
209 str_copy_trunc (v->name, sizeof v->name, name);
212 /* Returns true if NAME is an acceptable name for a variable,
213 false otherwise. If ISSUE_ERROR is true, issues an
214 explanatory error message on failure. */
216 var_is_valid_name (const char *name, bool issue_error)
221 assert (name != NULL);
223 /* Note that strlen returns number of BYTES, not the number of
225 length = strlen (name);
227 plausible = var_is_plausible_name(name, issue_error);
233 if (!lex_is_id1 (name[0]))
236 msg (SE, _("Character `%c' (in %s) may not appear "
237 "as the first character in a variable name."),
243 for (i = 0; i < length; i++)
245 if (!lex_is_idn (name[i]))
248 msg (SE, _("Character `%c' (in %s) may not appear in "
258 /* Returns true if NAME is an plausible name for a variable,
259 false otherwise. If ISSUE_ERROR is true, issues an
260 explanatory error message on failure.
261 This function makes no use of LC_CTYPE.
264 var_is_plausible_name (const char *name, bool issue_error)
268 assert (name != NULL);
270 /* Note that strlen returns number of BYTES, not the number of
272 length = strlen (name);
276 msg (SE, _("Variable name cannot be empty string."));
279 else if (length > LONG_NAME_LEN)
282 msg (SE, _("Variable name %s exceeds %d-character limit."),
283 name, (int) LONG_NAME_LEN);
287 if (lex_id_to_token (ss_cstr (name)) != T_ID)
290 msg (SE, _("`%s' may not be used as a variable name because it "
291 "is a reserved word."), name);
298 /* A hsh_compare_func that orders variables A and B by their
301 compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED)
303 const struct variable *a = a_;
304 const struct variable *b = b_;
306 return strcasecmp (a->name, b->name);
309 /* A hsh_hash_func that hashes variable V based on its name. */
311 hash_var_by_name (const void *v_, const void *aux UNUSED)
313 const struct variable *v = v_;
315 return hsh_hash_case_string (v->name);
318 /* A hsh_compare_func that orders pointers to variables A and B
321 compare_var_ptrs_by_name (const void *a_, const void *b_,
322 const void *aux UNUSED)
324 struct variable *const *a = a_;
325 struct variable *const *b = b_;
327 return strcasecmp (var_get_name (*a), var_get_name (*b));
330 /* A hsh_hash_func that hashes pointer to variable V based on its
333 hash_var_ptr_by_name (const void *v_, const void *aux UNUSED)
335 struct variable *const *v = v_;
337 return hsh_hash_case_string (var_get_name (*v));
340 /* Returns the type of variable V. */
342 var_get_type (const struct variable *v)
344 return var_type_from_width (v->width);
347 /* Returns the width of variable V. */
349 var_get_width (const struct variable *v)
354 /* Sets the width of V to WIDTH. */
356 var_set_width (struct variable *v, int new_width)
358 enum var_type new_type = var_type_from_width (new_width);
360 if (mv_is_resizable (&v->miss, new_width))
361 mv_resize (&v->miss, new_width);
363 mv_init (&v->miss, new_width);
365 if (v->val_labs != NULL)
367 if (val_labs_can_set_width (v->val_labs, new_width))
368 val_labs_set_width (v->val_labs, new_width);
371 val_labs_destroy (v->val_labs);
376 if (var_get_type (v) != new_type)
378 v->print = (new_type == VAR_NUMERIC
379 ? fmt_for_output (FMT_F, 8, 2)
380 : fmt_for_output (FMT_A, new_width, 0));
383 else if (new_type == VAR_STRING)
385 v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width;
386 v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width;
389 v->width = new_width;
392 /* Returns true if variable V is numeric, false otherwise. */
394 var_is_numeric (const struct variable *v)
396 return var_get_type (v) == VAR_NUMERIC;
399 /* Returns true if variable V is a string variable, false
402 var_is_alpha (const struct variable *v)
404 return var_get_type (v) == VAR_STRING;
407 /* Returns true if variable V is a short string variable, false
410 var_is_short_string (const struct variable *v)
412 return v->width > 0 && v->width <= MAX_SHORT_STRING;
415 /* Returns true if variable V is a long string variable, false
418 var_is_long_string (const struct variable *v)
420 return v->width > MAX_SHORT_STRING;
423 /* Returns the number of "union value"s need to store a value of
426 var_get_value_cnt (const struct variable *v)
428 return v->width == 0 ? 1 : DIV_RND_UP (v->width, MAX_SHORT_STRING);
431 /* Returns variable V's missing values. */
432 const struct missing_values *
433 var_get_missing_values (const struct variable *v)
438 /* Sets variable V's missing values to MISS, which must be of V's
439 width or at least resizable to V's width.
440 If MISS is null, then V's missing values, if any, are
443 var_set_missing_values (struct variable *v, const struct missing_values *miss)
447 assert (mv_is_resizable (miss, v->width));
448 mv_copy (&v->miss, miss);
449 mv_resize (&v->miss, v->width);
452 mv_init (&v->miss, v->width);
455 /* Sets variable V to have no user-missing values. */
457 var_clear_missing_values (struct variable *v)
459 var_set_missing_values (v, NULL);
462 /* Returns true if V has any user-missing values,
465 var_has_missing_values (const struct variable *v)
467 return !mv_is_empty (&v->miss);
470 /* Returns true if VALUE is system missing or user-missing value
471 for V, false otherwise. */
473 var_is_value_missing (const struct variable *v, const union value *value)
475 return mv_is_value_missing (&v->miss, value);
478 /* Returns true if D is system missing or a missing value in V,
480 V must be a numeric variable. */
482 var_is_num_missing (const struct variable *v, double d)
484 return mv_is_num_missing (&v->miss, d);
487 /* Returns true if S[] is a missing value for V, false otherwise.
488 S[] must contain exactly as many characters as V's width.
489 V must be a string variable. */
491 var_is_str_missing (const struct variable *v, const char s[])
493 return mv_is_str_missing (&v->miss, s);
496 /* Returns true if VALUE is a missing value for V, false
499 var_is_value_user_missing (const struct variable *v, const union value *value)
501 return mv_is_value_user_missing (&v->miss, value);
504 /* Returns true if D is a user-missing value for V, false
505 otherwise. V must be a numeric variable. */
507 var_is_num_user_missing (const struct variable *v, double d)
509 return mv_is_num_user_missing (&v->miss, d);
512 /* Returns true if S[] is a missing value for V, false otherwise.
513 V must be a string variable.
514 S[] must contain exactly as many characters as V's width. */
516 var_is_str_user_missing (const struct variable *v, const char s[])
518 return mv_is_str_user_missing (&v->miss, s);
521 /* Returns true if V is a numeric variable and VALUE is the
522 system missing value. */
524 var_is_value_system_missing (const struct variable *v,
525 const union value *value)
527 return mv_is_value_system_missing (&v->miss, value);
530 /* Returns variable V's value labels,
531 possibly a null pointer if it has none. */
532 const struct val_labs *
533 var_get_value_labels (const struct variable *v)
538 /* Returns true if variable V has at least one value label. */
540 var_has_value_labels (const struct variable *v)
542 return val_labs_count (v->val_labs) > 0;
545 /* Sets variable V's value labels to a copy of VLS,
546 which must have a width equal to V's width or one that can be
547 changed to V's width.
548 If VLS is null, then V's value labels, if any, are removed. */
550 var_set_value_labels (struct variable *v, const struct val_labs *vls)
552 val_labs_destroy (v->val_labs);
557 assert (val_labs_can_set_width (vls, v->width));
558 v->val_labs = val_labs_copy (vls);
559 val_labs_set_width (v->val_labs, v->width);
563 /* Makes sure that V has a set of value labels,
564 by assigning one to it if necessary. */
566 alloc_value_labels (struct variable *v)
568 assert (!var_is_long_string (v));
569 if (v->val_labs == NULL)
570 v->val_labs = val_labs_create (v->width);
573 /* Attempts to add a value label with the given VALUE and LABEL
574 to V. Returns true if successful, false if VALUE has an
576 V must not be a long string variable. */
578 var_add_value_label (struct variable *v,
579 const union value *value, const char *label)
581 alloc_value_labels (v);
582 return val_labs_add (v->val_labs, *value, label);
585 /* Adds or replaces a value label with the given VALUE and LABEL
587 V must not be a long string variable. */
589 var_replace_value_label (struct variable *v,
590 const union value *value, const char *label)
592 alloc_value_labels (v);
593 val_labs_replace (v->val_labs, *value, label);
596 /* Removes V's value labels, if any. */
598 var_clear_value_labels (struct variable *v)
600 var_set_value_labels (v, NULL);
603 /* Returns the label associated with VALUE for variable V,
604 or a null pointer if none. */
606 var_lookup_value_label (const struct variable *v, const union value *value)
608 return val_labs_find (v->val_labs, *value);
611 /* Get a string representing VALUE for variable V.
612 That is, if VALUE has a label, return that label,
613 otherwise format VALUE and return the formatted string. */
615 var_get_value_name (const struct variable *v, const union value *value)
617 const char *name = var_lookup_value_label (v, value);
620 static char buf[MAX_STRING + 1];
621 data_out (value, &v->print, buf);
622 buf[v->print.w] = '\0';
628 /* Print and write formats. */
630 /* Returns V's print format specification. */
631 const struct fmt_spec *
632 var_get_print_format (const struct variable *v)
637 /* Sets V's print format specification to PRINT, which must be a
638 valid format specification for outputting a variable of V's
641 var_set_print_format (struct variable *v, const struct fmt_spec *print)
643 assert (fmt_check_width_compat (print, v->width));
647 /* Returns V's write format specification. */
648 const struct fmt_spec *
649 var_get_write_format (const struct variable *v)
654 /* Sets V's write format specification to WRITE, which must be a
655 valid format specification for outputting a variable of V's
658 var_set_write_format (struct variable *v, const struct fmt_spec *write)
660 assert (fmt_check_width_compat (write, v->width));
664 /* Sets V's print and write format specifications to FORMAT,
665 which must be a valid format specification for outputting a
666 variable of V's width. */
668 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
670 var_set_print_format (v, format);
671 var_set_write_format (v, format);
674 /* Return a string representing this variable, in the form most
675 appropriate from a human factors perspective, that is, its
676 variable label if it has one, otherwise its name. */
678 var_to_string (const struct variable *v)
680 return v->label != NULL ? v->label : v->name;
683 /* Returns V's variable label, or a null pointer if it has none. */
685 var_get_label (const struct variable *v)
690 /* Sets V's variable label to LABEL, stripping off leading and
691 trailing white space and truncating to 255 characters.
692 If LABEL is a null pointer or if LABEL is an empty string
693 (after stripping white space), then V's variable label (if
696 var_set_label (struct variable *v, const char *label)
703 struct substring s = ss_cstr (label);
704 ss_trim (&s, ss_cstr (CC_SPACES));
705 ss_truncate (&s, 255);
706 if (!ss_is_empty (s))
707 v->label = ss_xstrdup (s);
711 /* Removes any variable label from V. */
713 var_clear_label (struct variable *v)
715 var_set_label (v, NULL);
718 /* Returns true if V has a variable V,
721 var_has_label (const struct variable *v)
723 return v->label != NULL;
726 /* Returns true if M is a valid variable measurement level,
729 measure_is_valid (enum measure m)
731 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
734 /* Returns V's measurement level. */
736 var_get_measure (const struct variable *v)
741 /* Sets V's measurement level to MEASURE. */
743 var_set_measure (struct variable *v, enum measure measure)
745 assert (measure_is_valid (measure));
746 v->measure = measure;
749 /* Returns V's display width, which applies only to GUIs. */
751 var_get_display_width (const struct variable *v)
753 return v->display_width;
756 /* Sets V's display width to DISPLAY_WIDTH. */
758 var_set_display_width (struct variable *v, int display_width)
760 v->display_width = display_width;
763 /* Returns true if A is a valid alignment,
766 alignment_is_valid (enum alignment a)
768 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
771 /* Returns V's display alignment, which applies only to GUIs. */
773 var_get_alignment (const struct variable *v)
778 /* Sets V's display alignment to ALIGNMENT. */
780 var_set_alignment (struct variable *v, enum alignment alignment)
782 assert (alignment_is_valid (alignment));
783 v->alignment = alignment;
786 /* Whether variables' values should be preserved from case to
789 /* Returns true if variable V's value should be left from case to
790 case, instead of being reset to 0, system-missing, or blanks. */
792 var_get_leave (const struct variable *v)
797 /* Sets V's leave setting to LEAVE. */
799 var_set_leave (struct variable *v, bool leave)
801 assert (leave || !var_must_leave (v));
805 /* Returns true if V must be left from case to case,
806 false if it can be set either way. */
808 var_must_leave (const struct variable *v)
810 return dict_class_from_id (v->name) == DC_SCRATCH;
813 /* Returns V's short name, if it has one, or a null pointer
816 Short names are used only for system and portable file input
817 and output. They are upper-case only, not necessarily unique,
818 and limited to SHORT_NAME_LEN characters (plus a null
819 terminator). Any variable may have no short name, indicated
820 by returning a null pointer. */
822 var_get_short_name (const struct variable *v)
824 return v->short_name[0] != '\0' ? v->short_name : NULL;
827 /* Sets V's short_name to SHORT_NAME, truncating it to
828 SHORT_NAME_LEN characters and converting it to uppercase in
829 the process. Specifying a null pointer for SHORT_NAME clears
830 the variable's short name. */
832 var_set_short_name (struct variable *v, const char *short_name)
835 assert (short_name == NULL || var_is_plausible_name (short_name, false));
837 if (short_name != NULL)
839 str_copy_trunc (v->short_name, sizeof v->short_name, short_name);
840 str_uppercase (v->short_name);
843 v->short_name[0] = '\0';
846 /* Clears V's short name. */
848 var_clear_short_name (struct variable *v)
852 v->short_name[0] = '\0';
855 /* Relationship with dictionary. */
857 /* Returns V's index within its dictionary, the value
858 for which "dict_get_var (dict, index)" will return V.
859 V must be in a dictionary. */
861 var_get_dict_index (const struct variable *v)
863 assert (v->vardict.dict_index != -1);
864 return v->vardict.dict_index;
867 /* Returns V's index within the case represented by its
868 dictionary, that is, the value for which "case_data_idx (case,
869 index)" will return the data for V in that case.
870 V must be in a dictionary. */
872 var_get_case_index (const struct variable *v)
874 assert (v->vardict.case_index != -1);
875 return v->vardict.case_index;
878 /* Returns V's auxiliary data, or a null pointer if none has been
881 var_get_aux (const struct variable *v)
886 /* Assign auxiliary data AUX to variable V, which must not
887 already have auxiliary data. Before V's auxiliary data is
888 cleared, AUX_DTOR(V) will be called. (var_dtor_free, below,
889 may be appropriate for use as AUX_DTOR.) */
891 var_attach_aux (struct variable *v,
892 void *aux, void (*aux_dtor) (struct variable *))
894 assert (v->aux == NULL);
895 assert (aux != NULL);
897 v->aux_dtor = aux_dtor;
901 /* Remove auxiliary data, if any, from V, and return it, without
902 calling any associated destructor. */
904 var_detach_aux (struct variable *v)
907 assert (aux != NULL);
912 /* Clears auxiliary data, if any, from V, and calls any
913 associated destructor. */
915 var_clear_aux (struct variable *v)
920 if (v->aux_dtor != NULL)
926 /* This function is appropriate for use an auxiliary data
927 destructor (passed as AUX_DTOR to var_attach_aux()) for the
928 case where the auxiliary data should be passed to free(). */
930 var_dtor_free (struct variable *v)
935 /* Observed categorical values. */
937 /* Returns V's observed categorical values,
938 which V must have. */
940 var_get_obs_vals (const struct variable *v)
942 assert (v->obs_vals != NULL);
946 /* Sets V's observed categorical values to CAT_VALS. */
948 var_set_obs_vals (struct variable *v, struct cat_vals *cat_vals)
950 cat_stored_values_destroy (v->obs_vals);
951 v->obs_vals = cat_vals;
954 /* Returns true if V has observed categorical values,
957 var_has_obs_vals (const struct variable *v)
959 return v->obs_vals != NULL;
962 /* Returns the dictionary class corresponding to a variable named
965 dict_class_from_id (const char *name)
978 /* Returns the name of dictionary class DICT_CLASS. */
980 dict_class_to_name (enum dict_class dict_class)
985 return _("ordinary");
995 /* Returns V's vardict structure. */
996 const struct vardict_info *
997 var_get_vardict (const struct variable *v)
999 assert (var_has_vardict (v));
1003 /* Sets V's vardict data to VARDICT. */
1005 var_set_vardict (struct variable *v, const struct vardict_info *vardict)
1007 assert (vardict->dict_index >= 0);
1008 assert (vardict->case_index >= 0);
1009 v->vardict = *vardict;
1012 /* Returns true if V has vardict data. */
1014 var_has_vardict (const struct variable *v)
1016 return v->vardict.dict_index != -1;
1019 /* Clears V's vardict data. */
1021 var_clear_vardict (struct variable *v)
1023 v->vardict.dict_index = v->vardict.case_index = -1;