1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <libpspp/assertion.h>
23 #include <libpspp/message.h>
25 #include <libpspp/alloc.h>
26 #include <libpspp/compiler.h>
27 #include "dictionary.h"
28 #include <libpspp/hash.h>
29 #include "identifier.h"
30 #include <libpspp/misc.h>
31 #include <libpspp/str.h>
32 #include "value-labels.h"
37 #define _(msgid) gettext (msgid)
39 /* Returns true if VAR_TYPE is a valid variable type. */
41 var_type_is_valid (enum var_type var_type)
43 return var_type == NUMERIC || var_type == ALPHA;
46 /* Returns an adjective describing the given variable TYPE,
47 suitable for use in phrases like "numeric variable". */
49 var_type_adj (enum var_type type)
51 return type == NUMERIC ? _("numeric") : _("string");
54 /* Returns a noun describing a value of the given variable TYPE,
55 suitable for use in phrases like "a number". */
57 var_type_noun (enum var_type type)
59 return type == NUMERIC ? _("number") : _("string");
62 /* Returns true if M is a valid variable measurement level,
65 measure_is_valid (enum measure m)
67 return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE;
70 /* Returns true if A is a valid alignment,
73 alignment_is_valid (enum alignment a)
75 return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE;
78 /* Assign auxiliary data AUX to variable V, which must not
79 already have auxiliary data. Before V's auxiliary data is
80 cleared, AUX_DTOR(V) will be called. */
82 var_attach_aux (struct variable *v,
83 void *aux, void (*aux_dtor) (struct variable *))
85 assert (v->aux == NULL);
88 v->aux_dtor = aux_dtor;
92 /* Remove auxiliary data, if any, from V, and returns it, without
93 calling any associated destructor. */
95 var_detach_aux (struct variable *v)
103 /* Clears auxiliary data, if any, from V, and calls any
104 associated destructor. */
106 var_clear_aux (struct variable *v)
111 if (v->aux_dtor != NULL)
117 /* This function is appropriate for use an auxiliary data
118 destructor (passed as AUX_DTOR to var_attach_aux()) for the
119 case where the auxiliary data should be passed to free(). */
121 var_dtor_free (struct variable *v)
126 /* Duplicate a value.
127 The caller is responsible for freeing the returned value
130 value_dup (const union value *val, int width)
132 size_t bytes = MAX(width, sizeof *val);
134 union value *v = xmalloc (bytes);
135 memcpy (v, val, bytes);
141 /* Compares A and B, which both have the given WIDTH, and returns
142 a strcmp()-type result. */
144 compare_values (const union value *a, const union value *b, int width)
147 return a->f < b->f ? -1 : a->f > b->f;
149 return memcmp (a->s, b->s, MIN(MAX_SHORT_STRING, width));
152 /* Create a hash of v */
154 hash_value(const union value *v, int width)
159 id_hash = hsh_hash_double (v->f);
161 id_hash = hsh_hash_bytes (v->s, MIN(MAX_SHORT_STRING, width));
166 /* Return variable V's name. */
168 var_get_name (const struct variable *v)
173 /* Sets V's name to NAME. */
175 var_set_name (struct variable *v, const char *name)
177 assert (name[0] != '\0');
178 assert (lex_id_to_token (ss_cstr (name)) == T_ID);
180 str_copy_trunc (v->name, sizeof v->name, name);
183 /* Returns true if NAME is an acceptable name for a variable,
184 false otherwise. If ISSUE_ERROR is true, issues an
185 explanatory error message on failure. */
187 var_is_valid_name (const char *name, bool issue_error)
192 assert (name != NULL);
194 /* Note that strlen returns number of BYTES, not the number of
196 length = strlen (name);
198 plausible = var_is_plausible_name(name, issue_error);
204 if (!lex_is_id1 (name[0]))
207 msg (SE, _("Character `%c' (in %s), may not appear "
208 "as the first character in a variable name."),
214 for (i = 0; i < length; i++)
216 if (!lex_is_idn (name[i]))
219 msg (SE, _("Character `%c' (in %s) may not appear in "
230 Returns true if NAME is an plausible name for a variable,
231 false otherwise. If ISSUE_ERROR is true, issues an
232 explanatory error message on failure.
233 This function makes no use of LC_CTYPE.
236 var_is_plausible_name (const char *name, bool issue_error)
240 assert (name != NULL);
242 /* Note that strlen returns number of BYTES, not the number of
244 length = strlen (name);
248 msg (SE, _("Variable name cannot be empty string."));
251 else if (length > LONG_NAME_LEN)
254 msg (SE, _("Variable name %s exceeds %d-character limit."),
255 name, (int) LONG_NAME_LEN);
259 if (lex_id_to_token (ss_cstr (name)) != T_ID)
262 msg (SE, _("`%s' may not be used as a variable name because it "
263 "is a reserved word."), name);
270 /* A hsh_compare_func that orders variables A and B by their
273 compare_var_names (const void *a_, const void *b_, const void *aux UNUSED)
275 const struct variable *a = a_;
276 const struct variable *b = b_;
278 return strcasecmp (var_get_name (a), var_get_name (b));
281 /* A hsh_hash_func that hashes variable V based on its name. */
283 hash_var_name (const void *v_, const void *aux UNUSED)
285 const struct variable *v = v_;
287 return hsh_hash_case_string (var_get_name (v));
290 /* A hsh_compare_func that orders pointers to variables A and B
293 compare_var_ptr_names (const void *a_, const void *b_, const void *aux UNUSED)
295 struct variable *const *a = a_;
296 struct variable *const *b = b_;
298 return strcasecmp (var_get_name (*a), var_get_name (*b));
301 /* A hsh_hash_func that hashes pointer to variable V based on its
304 hash_var_ptr_name (const void *v_, const void *aux UNUSED)
306 struct variable *const *v = v_;
308 return hsh_hash_case_string (var_get_name (*v));
311 /* Returns the type of a variable with the given WIDTH. */
313 width_to_type (int width)
315 return width == 0 ? NUMERIC : ALPHA;
318 /* Returns the type of variable V. */
320 var_get_type (const struct variable *v)
322 return width_to_type (v->width);
325 /* Returns the width of variable V. */
327 var_get_width (const struct variable *v)
332 /* Sets the width of V to WIDTH. */
334 var_set_width (struct variable *v, int new_width)
336 enum var_type new_type = width_to_type (new_width);
338 if (mv_is_resizable (&v->miss, new_width))
339 mv_resize (&v->miss, new_width);
341 mv_init (&v->miss, new_width);
343 if (v->val_labs != NULL)
345 if (val_labs_can_set_width (v->val_labs, new_width))
346 val_labs_set_width (v->val_labs, new_width);
349 val_labs_destroy (v->val_labs);
354 if (var_get_type (v) != new_type)
356 v->print = (new_type == NUMERIC
357 ? fmt_for_output (FMT_F, 8, 2)
358 : fmt_for_output (FMT_A, new_width, 0));
361 else if (new_type == ALPHA)
363 v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width;
364 v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width;
367 v->width = new_width;
370 /* Returns true if variable V is numeric, false otherwise. */
372 var_is_numeric (const struct variable *v)
374 return var_get_type (v) == NUMERIC;
377 /* Returns true if variable V is a string variable, false
380 var_is_alpha (const struct variable *v)
382 return var_get_type (v) == ALPHA;
385 /* Returns true if variable V is a short string variable, false
388 var_is_short_string (const struct variable *v)
390 return v->width > 0 && v->width <= MAX_SHORT_STRING;
393 /* Returns true if variable V is a long string variable, false
396 var_is_long_string (const struct variable *v)
398 return v->width > MAX_SHORT_STRING;
401 /* Returns true if variable V is a very long string variable,
404 var_is_very_long_string (const struct variable *v)
406 return v->width > MAX_LONG_STRING;
409 /* Returns variable V's missing values. */
410 const struct missing_values *
411 var_get_missing_values (const struct variable *v)
416 /* Sets variable V's missing values to MISS, which must be of the
419 var_set_missing_values (struct variable *v, const struct missing_values *miss)
423 assert (v->width == mv_get_width (miss));
424 mv_copy (&v->miss, miss);
427 mv_init (&v->miss, v->width);
430 /* Sets variable V to have no user-missing values. */
432 var_clear_missing_values (struct variable *v)
434 var_set_missing_values (v, NULL);
437 /* Returns true if V has any user-missing values,
440 var_has_missing_values (const struct variable *v)
442 return !mv_is_empty (&v->miss);
445 /* Returns true if VALUE is system missing or user-missing value
446 for V, false otherwise. */
448 var_is_value_missing (const struct variable *v, const union value *value)
450 return mv_is_value_missing (&v->miss, value);
453 /* Returns true if D is system missing or a missing value in V,
455 V must be a numeric variable. */
457 var_is_num_missing (const struct variable *v, double d)
459 return mv_is_num_missing (&v->miss, d);
462 /* Returns true if S[] is a missing value for V, false otherwise.
463 S[] must contain exactly as many characters as V's width.
464 V must be a string variable. */
466 var_is_str_missing (const struct variable *v, const char s[])
468 return mv_is_str_missing (&v->miss, s);
471 /* Returns true if VALUE is a missing value for V, false
474 var_is_value_user_missing (const struct variable *v, const union value *value)
476 return mv_is_value_user_missing (&v->miss, value);
479 /* Returns true if D is a user-missing value for V, false
480 otherwise. V must be a numeric variable. */
482 var_is_num_user_missing (const struct variable *v, double d)
484 return mv_is_num_user_missing (&v->miss, d);
487 /* Returns true if S[] is a missing value for V, false otherwise.
488 V must be a string variable.
489 S[] must contain exactly as many characters as V's width. */
491 var_is_str_user_missing (const struct variable *v, const char s[])
493 return mv_is_str_user_missing (&v->miss, s);
496 /* Returns true if V is a numeric variable and VALUE is the
497 system missing value. */
499 var_is_value_system_missing (const struct variable *v,
500 const union value *value)
502 return mv_is_value_system_missing (&v->miss, value);
505 /* Print and write formats. */
507 /* Returns V's print format specification. */
508 const struct fmt_spec *
509 var_get_print_format (const struct variable *v)
514 /* Sets V's print format specification to PRINT, which must be a
515 valid format specification for outputting a variable of V's
518 var_set_print_format (struct variable *v, const struct fmt_spec *print)
520 assert (fmt_check_width_compat (print, v->width));
524 /* Returns V's write format specification. */
525 const struct fmt_spec *
526 var_get_write_format (const struct variable *v)
531 /* Sets V's write format specification to WRITE, which must be a
532 valid format specification for outputting a variable of V's
535 var_set_write_format (struct variable *v, const struct fmt_spec *write)
537 assert (fmt_check_width_compat (write, v->width));
541 /* Sets V's print and write format specifications to FORMAT,
542 which must be a valid format specification for outputting a
543 variable of V's width. */
545 var_set_both_formats (struct variable *v, const struct fmt_spec *format)
547 var_set_print_format (v, format);
548 var_set_write_format (v, format);
551 /* Returns V's variable label, or a null pointer if it has none. */
553 var_get_label (const struct variable *v)
558 /* Sets V's variable label to LABEL, stripping off leading and
559 trailing white space and truncating to 255 characters.
560 If LABEL is a null pointer or if LABEL is an empty string
561 (after stripping white space), then V's variable label (if
564 var_set_label (struct variable *v, const char *label)
571 struct substring s = ss_cstr (label);
572 ss_trim (&s, ss_cstr (CC_SPACES));
573 ss_truncate (&s, 255);
574 if (!ss_is_empty (s))
575 v->label = ss_xstrdup (s);
579 /* Removes any variable label from V. */
581 var_clear_label (struct variable *v)
583 var_set_label (v, NULL);
586 /* Returns true if V has a variable V,
589 var_has_label (const struct variable *v)
591 return v->label != NULL;
594 /* Returns V's measurement level. */
596 var_get_measure (const struct variable *v)
601 /* Sets V's measurement level to MEASURE. */
603 var_set_measure (struct variable *v, enum measure measure)
605 assert (measure_is_valid (measure));
606 v->measure = measure;
609 /* Returns V's display width, which applies only to GUIs. */
611 var_get_display_width (const struct variable *v)
613 return v->display_width;
616 /* Sets V's display width to DISPLAY_WIDTH. */
618 var_set_display_width (struct variable *v, int display_width)
620 v->display_width = display_width;
623 /* Returns V's display alignment, which applies only to GUIs. */
625 var_get_alignment (const struct variable *v)
630 /* Sets V's display alignment to ALIGNMENT. */
632 var_set_alignment (struct variable *v, enum alignment alignment)
634 assert (alignment_is_valid (alignment));
635 v->alignment = alignment;
638 /* Returns the number of "union value"s need to store a value of
641 var_get_value_cnt (const struct variable *v)
643 return v->width == 0 ? 1 : DIV_RND_UP (v->width, MAX_SHORT_STRING);
646 /* Return whether variable V's values should be preserved from
649 var_get_leave (const struct variable *v)
654 /* Returns V's short name, if it has one, or a null pointer
657 Short names are used only for system and portable file input
658 and output. They are upper-case only, not necessarily unique,
659 and limited to SHORT_NAME_LEN characters (plus a null
660 terminator). Any variable may have no short name, indicated
661 by returning a null pointer. */
663 var_get_short_name (const struct variable *v)
665 return v->short_name[0] != '\0' ? v->short_name : NULL;
668 /* Sets V's short_name to SHORT_NAME, truncating it to
669 SHORT_NAME_LEN characters and converting it to uppercase in
670 the process. Specifying a null pointer for SHORT_NAME clears
671 the variable's short name. */
673 var_set_short_name (struct variable *v, const char *short_name)
676 assert (short_name == NULL || var_is_plausible_name (short_name, false));
678 if (short_name != NULL)
680 str_copy_trunc (v->short_name, sizeof v->short_name, short_name);
681 str_uppercase (v->short_name);
684 v->short_name[0] = '\0';
687 /* Clears V's short name. */
689 var_clear_short_name (struct variable *v)
693 v->short_name[0] = '\0';
696 /* Sets V's short name to BASE, followed by a suffix of the form
697 _A, _B, _C, ..., _AA, _AB, etc. according to the value of
698 SUFFIX_NUMBER. Truncates BASE as necessary to fit. */
700 var_set_short_name_suffix (struct variable *v, const char *base,
703 char suffix[SHORT_NAME_LEN + 1];
704 char short_name[SHORT_NAME_LEN + 1];
709 assert (suffix_number >= 0);
712 var_set_short_name (v, base);
714 /* Compose suffix. */
715 start = end = suffix + sizeof suffix - 1;
719 *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26];
720 if (start <= suffix + 1)
721 msg (SE, _("Variable suffix too large."));
724 while (suffix_number > 0);
727 /* Append suffix to V's short name. */
728 str_copy_trunc (short_name, sizeof short_name, base);
730 if (len + strlen (short_name) > SHORT_NAME_LEN)
731 ofs = SHORT_NAME_LEN - len;
733 ofs = strlen (short_name);
734 strcpy (short_name + ofs, start);
737 var_set_short_name (v, short_name);
741 /* Returns the dictionary class corresponding to a variable named
744 dict_class_from_id (const char *name)
746 assert (name != NULL);
759 /* Returns the name of dictionary class DICT_CLASS. */
761 dict_class_to_name (enum dict_class dict_class)
766 return _("ordinary");
776 /* Return the number of bytes used when writing case_data for a variable
779 width_to_bytes(int width)
784 return MAX_SHORT_STRING ;
785 else if (width <= MAX_LONG_STRING)
786 return ROUND_UP (width, MAX_SHORT_STRING);
789 int chunks = width / EFFECTIVE_LONG_STRING_LENGTH ;
790 int remainder = width % EFFECTIVE_LONG_STRING_LENGTH ;
791 int bytes = remainder + (chunks * (MAX_LONG_STRING + 1) );
792 return ROUND_UP (bytes, MAX_SHORT_STRING);