X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fvariable.c;h=a60adcafcd8d25422f7be32d8f870059c2c028f3;hb=f15c854d8500105766b2f5666bb62b983ff24f88;hp=b43fce7e41d6f844ff74c35c183225af1827962b;hpb=ade89522516fe8cc47299d8da6e28e129d96de94;p=pspp-builds.git diff --git a/src/data/variable.c b/src/data/variable.c index b43fce7e..a60adcaf 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -1,210 +1,245 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include "variable.h" -#include -#include + #include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include #include -#include "dictionary.h" #include -#include "identifier.h" -#include +#include #include -#include "value-labels.h" #include "minmax.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) -/* Returns true if VAR_TYPE is a valid variable type. */ -bool -var_type_is_valid (enum var_type var_type) -{ - return var_type == NUMERIC || var_type == ALPHA; -} +/* A variable. */ +struct variable + { + /* Dictionary information. */ + char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */ + int width; /* 0 for numeric, otherwise string width. */ + struct missing_values miss; /* Missing values. */ + struct fmt_spec print; /* Default format for PRINT. */ + struct fmt_spec write; /* Default format for WRITE. */ + struct val_labs *val_labs; /* Value labels. */ + char *label; /* Variable label. */ + + /* GUI information. */ + enum measure measure; /* Nominal, ordinal, or continuous. */ + int display_width; /* Width of data editor column. */ + enum alignment alignment; /* Alignment of data in GUI. */ + + /* Case information. */ + bool leave; /* Leave value from case to case? */ + + /* Data for use by containing dictionary. */ + struct vardict_info vardict; + + /* Used only for system and portable file input and output. + See short-names.h. */ + char **short_names; + size_t short_name_cnt; + + /* Each command may use these fields as needed. */ + void *aux; + void (*aux_dtor) (struct variable *); + + /* Values of a categorical variable. Procedures need + vectors with binary entries, so any variable of type ALPHA will + have its values stored here. */ + struct cat_vals *obs_vals; + + /* Custom attributes. */ + struct attrset attributes; + }; + +/* Creates and returns a new variable with the given NAME and + WIDTH and other fields initialized to default values. The + variable is not added to a dictionary; for that, use + dict_create_var instead. */ +struct variable * +var_create (const char *name, int width) +{ + struct variable *v; + enum val_type type; + + assert (width >= 0 && width <= MAX_STRING); + + v = xmalloc (sizeof *v); + v->vardict.dict_index = v->vardict.case_index = -1; + var_set_name (v, name); + v->width = width; + mv_init (&v->miss, width); + v->leave = var_must_leave (v); + type = val_type_from_width (width); + v->alignment = var_default_alignment (type); + v->measure = var_default_measure (type); + v->display_width = var_default_display_width (width); + v->print = v->write = var_default_formats (width); + v->val_labs = NULL; + v->label = NULL; + v->short_names = NULL; + v->short_name_cnt = 0; + v->aux = NULL; + v->aux_dtor = NULL; + v->obs_vals = NULL; + attrset_init (&v->attributes); -/* Returns an adjective describing the given variable TYPE, - suitable for use in phrases like "numeric variable". */ -const char * -var_type_adj (enum var_type type) -{ - return type == NUMERIC ? _("numeric") : _("string"); + return v; } -/* Returns a noun describing a value of the given variable TYPE, - suitable for use in phrases like "a number". */ -const char * -var_type_noun (enum var_type type) -{ - return type == NUMERIC ? _("number") : _("string"); -} - -/* Returns true if M is a valid variable measurement level, - false otherwise. */ -bool -measure_is_valid (enum measure m) -{ - return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE; -} +/* Creates and returns a clone of OLD_VAR. Most properties of + the new variable are copied from OLD_VAR, except: -/* Returns true if A is a valid alignment, - false otherwise. */ -bool -alignment_is_valid (enum alignment a) -{ - return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE; -} - -/* Assign auxiliary data AUX to variable V, which must not - already have auxiliary data. Before V's auxiliary data is - cleared, AUX_DTOR(V) will be called. */ -void * -var_attach_aux (struct variable *v, - void *aux, void (*aux_dtor) (struct variable *)) -{ - assert (v->aux == NULL); - assert (aux != NULL); - v->aux = aux; - v->aux_dtor = aux_dtor; - return aux; -} + - The variable's short name is not copied, because there is + no reason to give a new variable with potentially a new + name the same short name. -/* Remove auxiliary data, if any, from V, and returns it, without - calling any associated destructor. */ -void * -var_detach_aux (struct variable *v) -{ - void *aux = v->aux; - assert (aux != NULL); - v->aux = NULL; - return aux; -} + - The new variable is not added to OLD_VAR's dictionary by + default. Use dict_clone_var, instead, to do that. -/* Clears auxiliary data, if any, from V, and calls any - associated destructor. */ -void -var_clear_aux (struct variable *v) + - Auxiliary data and obs_vals are not copied. */ +struct variable * +var_clone (const struct variable *old_var) { - assert (v != NULL); - if (v->aux != NULL) - { - if (v->aux_dtor != NULL) - v->aux_dtor (v); - v->aux = NULL; - } -} + struct variable *new_var = var_create (var_get_name (old_var), + var_get_width (old_var)); -/* This function is appropriate for use an auxiliary data - destructor (passed as AUX_DTOR to var_attach_aux()) for the - case where the auxiliary data should be passed to free(). */ -void -var_dtor_free (struct variable *v) -{ - free (v->aux); + var_set_missing_values (new_var, var_get_missing_values (old_var)); + var_set_print_format (new_var, var_get_print_format (old_var)); + var_set_write_format (new_var, var_get_write_format (old_var)); + var_set_value_labels (new_var, var_get_value_labels (old_var)); + var_set_label (new_var, var_get_label (old_var)); + var_set_measure (new_var, var_get_measure (old_var)); + var_set_display_width (new_var, var_get_display_width (old_var)); + var_set_alignment (new_var, var_get_alignment (old_var)); + var_set_leave (new_var, var_get_leave (old_var)); + var_set_attributes (new_var, var_get_attributes (old_var)); + + return new_var; } -/* Duplicate a value. - The caller is responsible for freeing the returned value -*/ -union value * -value_dup (const union value *val, int width) +/* Create a variable of the specified WIDTH to be used for + internal calculations only. The variable is assigned a unique + dictionary index and a case index of CASE_IDX. */ +struct variable * +var_create_internal (int case_idx, int width) { - size_t bytes = MAX(width, sizeof *val); - - union value *v = xmalloc (bytes); - memcpy (v, val, bytes); - return v; -} + struct variable *v = var_create ("$internal", width); + struct vardict_info vdi; + static int counter = INT_MAX / 2; + vdi.dict = NULL; + vdi.case_index = case_idx; + vdi.dict_index = counter++; + if (counter == INT_MAX) + counter = INT_MAX / 2; + var_set_vardict (v, &vdi); -/* Compares A and B, which both have the given WIDTH, and returns - a strcmp()-type result. */ -int -compare_values (const union value *a, const union value *b, int width) -{ - if (width == 0) - return a->f < b->f ? -1 : a->f > b->f; - else - return memcmp (a->s, b->s, MIN(MAX_SHORT_STRING, width)); + return v; } -/* Create a hash of v */ -unsigned -hash_value(const union value *v, int width) +/* Destroys variable V. + V must not belong to a dictionary. If it does, use + dict_delete_var instead. */ +void +var_destroy (struct variable *v) { - unsigned id_hash; - - if ( 0 == width ) - id_hash = hsh_hash_double (v->f); - else - id_hash = hsh_hash_bytes (v->s, MIN(MAX_SHORT_STRING, width)); - - return id_hash; + if (v != NULL) + { + if (var_has_vardict (v)) + { + const struct vardict_info *vdi = var_get_vardict (v); + assert (vdi->dict == NULL); + } + mv_destroy (&v->miss); + cat_stored_values_destroy (v->obs_vals); + var_clear_short_names (v); + var_clear_aux (v); + val_labs_destroy (v->val_labs); + var_clear_label (v); + free (v); + } } +/* Variable names. */ + /* Return variable V's name. */ const char * -var_get_name (const struct variable *v) +var_get_name (const struct variable *v) { return v->name; } -/* Sets V's name to NAME. */ +/* Sets V's name to NAME. + Do not use this function for a variable in a dictionary. Use + dict_rename_var instead. */ void -var_set_name (struct variable *v, const char *name) +var_set_name (struct variable *v, const char *name) { - assert (name[0] != '\0'); - assert (lex_id_to_token (ss_cstr (name)) == T_ID); + assert (v->vardict.dict_index == -1); + assert (var_is_plausible_name (name, false)); str_copy_trunc (v->name, sizeof v->name, name); + dict_var_changed (v); } /* Returns true if NAME is an acceptable name for a variable, false otherwise. If ISSUE_ERROR is true, issues an explanatory error message on failure. */ bool -var_is_valid_name (const char *name, bool issue_error) +var_is_valid_name (const char *name, bool issue_error) { bool plausible; size_t length, i; - + assert (name != NULL); - /* Note that strlen returns number of BYTES, not the number of + /* Note that strlen returns number of BYTES, not the number of CHARACTERS */ length = strlen (name); plausible = var_is_plausible_name(name, issue_error); - if ( ! plausible ) + if ( ! plausible ) return false; if (!lex_is_id1 (name[0])) { if (issue_error) - msg (SE, _("Character `%c' (in %s), may not appear " + msg (SE, _("Character `%c' (in %s) may not appear " "as the first character in a variable name."), name[0], name); return false; @@ -213,7 +248,7 @@ var_is_valid_name (const char *name, bool issue_error) for (i = 0; i < length; i++) { - if (!lex_is_idn (name[i])) + if (!lex_is_idn (name[i])) { if (issue_error) msg (SE, _("Character `%c' (in %s) may not appear in " @@ -226,37 +261,36 @@ var_is_valid_name (const char *name, bool issue_error) return true; } -/* - Returns true if NAME is an plausible name for a variable, +/* Returns true if NAME is an plausible name for a variable, false otherwise. If ISSUE_ERROR is true, issues an - explanatory error message on failure. + explanatory error message on failure. This function makes no use of LC_CTYPE. */ bool -var_is_plausible_name (const char *name, bool issue_error) +var_is_plausible_name (const char *name, bool issue_error) { size_t length; - + assert (name != NULL); - /* Note that strlen returns number of BYTES, not the number of + /* Note that strlen returns number of BYTES, not the number of CHARACTERS */ length = strlen (name); - if (length < 1) + if (length < 1) { if (issue_error) msg (SE, _("Variable name cannot be empty string.")); return false; } - else if (length > LONG_NAME_LEN) + else if (length > VAR_NAME_LEN) { if (issue_error) msg (SE, _("Variable name %s exceeds %d-character limit."), - name, (int) LONG_NAME_LEN); + name, (int) VAR_NAME_LEN); return false; } - if (lex_id_to_token (ss_cstr (name)) != T_ID) + if (lex_id_to_token (ss_cstr (name)) != T_ID) { if (issue_error) msg (SE, _("`%s' may not be used as a variable name because it " @@ -267,30 +301,38 @@ var_is_plausible_name (const char *name, bool issue_error) return true; } +/* Returns VAR's dictionary class. */ +enum dict_class +var_get_dict_class (const struct variable *var) +{ + return dict_class_from_id (var->name); +} + /* A hsh_compare_func that orders variables A and B by their names. */ int -compare_var_names (const void *a_, const void *b_, const void *aux UNUSED) +compare_vars_by_name (const void *a_, const void *b_, const void *aux UNUSED) { const struct variable *a = a_; const struct variable *b = b_; - return strcasecmp (var_get_name (a), var_get_name (b)); + return strcasecmp (a->name, b->name); } /* A hsh_hash_func that hashes variable V based on its name. */ unsigned -hash_var_name (const void *v_, const void *aux UNUSED) +hash_var_by_name (const void *v_, const void *aux UNUSED) { const struct variable *v = v_; - return hsh_hash_case_string (var_get_name (v)); + return hash_case_string (v->name, 0); } /* A hsh_compare_func that orders pointers to variables A and B by their names. */ int -compare_var_ptr_names (const void *a_, const void *b_, const void *aux UNUSED) +compare_var_ptrs_by_name (const void *a_, const void *b_, + const void *aux UNUSED) { struct variable *const *a = a_; struct variable *const *b = b_; @@ -298,138 +340,126 @@ compare_var_ptr_names (const void *a_, const void *b_, const void *aux UNUSED) return strcasecmp (var_get_name (*a), var_get_name (*b)); } +/* A hsh_compare_func that orders pointers to variables A and B + by their dictionary indexes. */ +int +compare_var_ptrs_by_dict_index (const void *a_, const void *b_, + const void *aux UNUSED) +{ + struct variable *const *a = a_; + struct variable *const *b = b_; + size_t a_index = var_get_dict_index (*a); + size_t b_index = var_get_dict_index (*b); + + return a_index < b_index ? -1 : a_index > b_index; +} + /* A hsh_hash_func that hashes pointer to variable V based on its name. */ unsigned -hash_var_ptr_name (const void *v_, const void *aux UNUSED) +hash_var_ptr_by_name (const void *v_, const void *aux UNUSED) { struct variable *const *v = v_; - return hsh_hash_case_string (var_get_name (*v)); + return hash_case_string (var_get_name (*v), 0); } -/* Returns the type of a variable with the given WIDTH. */ -static enum var_type -width_to_type (int width) -{ - return width == 0 ? NUMERIC : ALPHA; -} - /* Returns the type of variable V. */ -enum var_type -var_get_type (const struct variable *v) +enum val_type +var_get_type (const struct variable *v) { - return width_to_type (v->width); + return val_type_from_width (v->width); } /* Returns the width of variable V. */ int -var_get_width (const struct variable *v) +var_get_width (const struct variable *v) { return v->width; } -/* Sets the width of V to WIDTH. */ +/* Changes the width of V to NEW_WIDTH. + This function should be used cautiously. */ void -var_set_width (struct variable *v, int new_width) +var_set_width (struct variable *v, int new_width) { - enum var_type new_type = width_to_type (new_width); - + const int old_width = v->width; + + if (old_width == new_width) + return; + if (mv_is_resizable (&v->miss, new_width)) mv_resize (&v->miss, new_width); else - mv_init (&v->miss, new_width); + { + mv_destroy (&v->miss); + mv_init (&v->miss, new_width); + } - if (v->val_labs != NULL) + if (v->val_labs != NULL) { if (val_labs_can_set_width (v->val_labs, new_width)) val_labs_set_width (v->val_labs, new_width); - else + else { val_labs_destroy (v->val_labs); v->val_labs = NULL; } } - - if (var_get_type (v) != new_type) - { - v->print = (new_type == NUMERIC - ? fmt_for_output (FMT_F, 8, 2) - : fmt_for_output (FMT_A, new_width, 0)); - v->write = v->print; - } - else if (new_type == ALPHA) - { - v->print.w = v->print.type == FMT_AHEX ? new_width * 2 : new_width; - v->write.w = v->write.type == FMT_AHEX ? new_width * 2 : new_width; - } + + fmt_resize (&v->print, new_width); + fmt_resize (&v->write, new_width); v->width = new_width; + dict_var_resized (v, old_width); + dict_var_changed (v); } /* Returns true if variable V is numeric, false otherwise. */ bool -var_is_numeric (const struct variable *v) +var_is_numeric (const struct variable *v) { - return var_get_type (v) == NUMERIC; + return var_get_type (v) == VAL_NUMERIC; } /* Returns true if variable V is a string variable, false otherwise. */ bool -var_is_alpha (const struct variable *v) -{ - return var_get_type (v) == ALPHA; -} - -/* Returns true if variable V is a short string variable, false - otherwise. */ -bool -var_is_short_string (const struct variable *v) -{ - return v->width > 0 && v->width <= MAX_SHORT_STRING; -} - -/* Returns true if variable V is a long string variable, false - otherwise. */ -bool -var_is_long_string (const struct variable *v) +var_is_alpha (const struct variable *v) { - return v->width > MAX_SHORT_STRING; + return var_get_type (v) == VAL_STRING; } - -/* Returns true if variable V is a very long string variable, - false otherwise. */ -bool -var_is_very_long_string (const struct variable *v) -{ - return v->width > MAX_LONG_STRING; -} - + /* Returns variable V's missing values. */ const struct missing_values * -var_get_missing_values (const struct variable *v) +var_get_missing_values (const struct variable *v) { return &v->miss; } -/* Sets variable V's missing values to MISS, which must be of the - correct width. */ +/* Sets variable V's missing values to MISS, which must be of V's + width or at least resizable to V's width. + If MISS is null, then V's missing values, if any, are + cleared. */ void var_set_missing_values (struct variable *v, const struct missing_values *miss) { - if (miss != NULL) + if (miss != NULL) { - assert (v->width == mv_get_width (miss)); + assert (mv_is_resizable (miss, v->width)); + mv_destroy (&v->miss); mv_copy (&v->miss, miss); + mv_resize (&v->miss, v->width); } else - mv_init (&v->miss, v->width); + mv_clear (&v->miss); + + dict_var_changed (v); } /* Sets variable V to have no user-missing values. */ void -var_clear_missing_values (struct variable *v) +var_clear_missing_values (struct variable *v) { var_set_missing_values (v, NULL); } @@ -437,120 +467,214 @@ var_clear_missing_values (struct variable *v) /* Returns true if V has any user-missing values, false otherwise. */ bool -var_has_missing_values (const struct variable *v) +var_has_missing_values (const struct variable *v) { return !mv_is_empty (&v->miss); } -/* Returns true if VALUE is system missing or user-missing value - for V, false otherwise. */ +/* Returns true if VALUE is in the given CLASS of missing values + in V, false otherwise. */ bool -var_is_value_missing (const struct variable *v, const union value *value) +var_is_value_missing (const struct variable *v, const union value *value, + enum mv_class class) { - return mv_is_value_missing (&v->miss, value); + return mv_is_value_missing (&v->miss, value, class); } -/* Returns true if D is system missing or a missing value in V, - false otherwise. +/* Returns true if D is in the given CLASS of missing values in + V, false otherwise. V must be a numeric variable. */ bool -var_is_num_missing (const struct variable *v, double d) +var_is_num_missing (const struct variable *v, double d, enum mv_class class) { - return mv_is_num_missing (&v->miss, d); + return mv_is_num_missing (&v->miss, d, class); } /* Returns true if S[] is a missing value for V, false otherwise. S[] must contain exactly as many characters as V's width. V must be a string variable. */ bool -var_is_str_missing (const struct variable *v, const char s[]) +var_is_str_missing (const struct variable *v, const char s[], + enum mv_class class) +{ + return mv_is_str_missing (&v->miss, s, class); +} + +/* Returns variable V's value labels, + possibly a null pointer if it has none. */ +const struct val_labs * +var_get_value_labels (const struct variable *v) { - return mv_is_str_missing (&v->miss, s); + return v->val_labs; } -/* Returns true if VALUE is a missing value for V, false - otherwise. */ +/* Returns true if variable V has at least one value label. */ bool -var_is_value_user_missing (const struct variable *v, const union value *value) +var_has_value_labels (const struct variable *v) { - return mv_is_value_user_missing (&v->miss, value); + return val_labs_count (v->val_labs) > 0; } -/* Returns true if D is a user-missing value for V, false - otherwise. V must be a numeric variable. */ -bool -var_is_num_user_missing (const struct variable *v, double d) +/* Sets variable V's value labels to a copy of VLS, + which must have a width equal to V's width or one that can be + changed to V's width. + If VLS is null, then V's value labels, if any, are removed. */ +void +var_set_value_labels (struct variable *v, const struct val_labs *vls) { - return mv_is_num_user_missing (&v->miss, d); + val_labs_destroy (v->val_labs); + v->val_labs = NULL; + + if (vls != NULL) + { + assert (val_labs_can_set_width (vls, v->width)); + v->val_labs = val_labs_clone (vls); + val_labs_set_width (v->val_labs, v->width); + dict_var_changed (v); + } } -/* Returns true if S[] is a missing value for V, false otherwise. - V must be a string variable. - S[] must contain exactly as many characters as V's width. */ -bool -var_is_str_user_missing (const struct variable *v, const char s[]) +/* Makes sure that V has a set of value labels, + by assigning one to it if necessary. */ +static void +alloc_value_labels (struct variable *v) { - return mv_is_str_user_missing (&v->miss, s); + if (v->val_labs == NULL) + v->val_labs = val_labs_create (v->width); } -/* Returns true if V is a numeric variable and VALUE is the - system missing value. */ +/* Attempts to add a value label with the given VALUE and LABEL + to V. Returns true if successful, false if VALUE has an + existing label or if V is a long string variable. */ bool -var_is_value_system_missing (const struct variable *v, - const union value *value) +var_add_value_label (struct variable *v, + const union value *value, const char *label) +{ + alloc_value_labels (v); + return val_labs_add (v->val_labs, value, label); +} + +/* Adds or replaces a value label with the given VALUE and LABEL + to V. + Has no effect if V is a long string variable. */ +void +var_replace_value_label (struct variable *v, + const union value *value, const char *label) +{ + alloc_value_labels (v); + val_labs_replace (v->val_labs, value, label); +} + +/* Removes V's value labels, if any. */ +void +var_clear_value_labels (struct variable *v) +{ + var_set_value_labels (v, NULL); +} + +/* Returns the label associated with VALUE for variable V, + or a null pointer if none. */ +const char * +var_lookup_value_label (const struct variable *v, const union value *value) +{ + return val_labs_find (v->val_labs, value); +} + +/* Append STR with a string representing VALUE for variable V. + That is, if VALUE has a label, append that label, + otherwise format VALUE and append the formatted string. + STR must be a pointer to an initialised struct string. +*/ +void +var_append_value_name (const struct variable *v, const union value *value, + struct string *str) { - return mv_is_value_system_missing (&v->miss, value); + const char *name = var_lookup_value_label (v, value); + if (name == NULL) + { + char *s = data_out (value, &v->print); + ds_put_cstr (str, s); + free (s); + } + else + ds_put_cstr (str, name); } /* Print and write formats. */ /* Returns V's print format specification. */ const struct fmt_spec * -var_get_print_format (const struct variable *v) +var_get_print_format (const struct variable *v) { return &v->print; } /* Sets V's print format specification to PRINT, which must be a - valid format specification for outputting a variable of V's - width. */ + valid format specification for a variable of V's width + (ordinarily an output format, but input formats are not + rejected). */ void -var_set_print_format (struct variable *v, const struct fmt_spec *print) +var_set_print_format (struct variable *v, const struct fmt_spec *print) { assert (fmt_check_width_compat (print, v->width)); v->print = *print; + dict_var_changed (v); } /* Returns V's write format specification. */ const struct fmt_spec * -var_get_write_format (const struct variable *v) +var_get_write_format (const struct variable *v) { return &v->write; } /* Sets V's write format specification to WRITE, which must be a - valid format specification for outputting a variable of V's - width. */ + valid format specification for a variable of V's width + (ordinarily an output format, but input formats are not + rejected). */ void -var_set_write_format (struct variable *v, const struct fmt_spec *write) +var_set_write_format (struct variable *v, const struct fmt_spec *write) { assert (fmt_check_width_compat (write, v->width)); v->write = *write; + dict_var_changed (v); } /* Sets V's print and write format specifications to FORMAT, - which must be a valid format specification for outputting a - variable of V's width. */ + which must be a valid format specification for a variable of + V's width (ordinarily an output format, but input formats are + not rejected). */ void -var_set_both_formats (struct variable *v, const struct fmt_spec *format) +var_set_both_formats (struct variable *v, const struct fmt_spec *format) { var_set_print_format (v, format); var_set_write_format (v, format); } + +/* Returns the default print and write format for a variable of + the given TYPE, as set by var_create. The return value can be + used to reset a variable's print and write formats to the + default. */ +struct fmt_spec +var_default_formats (int width) +{ + return (width == 0 + ? fmt_for_output (FMT_F, 8, 2) + : fmt_for_output (FMT_A, width, 0)); +} +/* Return a string representing this variable, in the form most + appropriate from a human factors perspective, that is, its + variable label if it has one, otherwise its name. */ +const char * +var_to_string (const struct variable *v) +{ + return v->label != NULL ? v->label : v->name; +} + /* Returns V's variable label, or a null pointer if it has none. */ const char * -var_get_label (const struct variable *v) +var_get_label (const struct variable *v) { return v->label; } @@ -561,24 +685,25 @@ var_get_label (const struct variable *v) (after stripping white space), then V's variable label (if any) is removed. */ void -var_set_label (struct variable *v, const char *label) +var_set_label (struct variable *v, const char *label) { free (v->label); v->label = NULL; - if (label != NULL) + if (label != NULL) { struct substring s = ss_cstr (label); ss_trim (&s, ss_cstr (CC_SPACES)); ss_truncate (&s, 255); - if (!ss_is_empty (s)) + if (!ss_is_empty (s)) v->label = ss_xstrdup (s); } + dict_var_changed (v); } /* Removes any variable label from V. */ void -var_clear_label (struct variable *v) +var_clear_label (struct variable *v) { var_set_label (v, NULL); } @@ -586,211 +711,374 @@ var_clear_label (struct variable *v) /* Returns true if V has a variable V, false otherwise. */ bool -var_has_label (const struct variable *v) +var_has_label (const struct variable *v) { return v->label != NULL; } +/* Returns true if M is a valid variable measurement level, + false otherwise. */ +bool +measure_is_valid (enum measure m) +{ + return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE; +} + /* Returns V's measurement level. */ enum measure -var_get_measure (const struct variable *v) +var_get_measure (const struct variable *v) { return v->measure; } /* Sets V's measurement level to MEASURE. */ void -var_set_measure (struct variable *v, enum measure measure) +var_set_measure (struct variable *v, enum measure measure) { assert (measure_is_valid (measure)); v->measure = measure; + dict_var_changed (v); } +/* Returns the default measurement level for a variable of the + given TYPE, as set by var_create. The return value can be + used to reset a variable's measurement level to the + default. */ +enum measure +var_default_measure (enum val_type type) +{ + return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL; +} + /* Returns V's display width, which applies only to GUIs. */ int -var_get_display_width (const struct variable *v) +var_get_display_width (const struct variable *v) { return v->display_width; } /* Sets V's display width to DISPLAY_WIDTH. */ void -var_set_display_width (struct variable *v, int display_width) +var_set_display_width (struct variable *v, int new_width) +{ + int old_width = v->display_width; + + v->display_width = new_width; + + if ( old_width != new_width) + dict_var_display_width_changed (v); + + dict_var_changed (v); +} + +/* Returns the default display width for a variable of the given + WIDTH, as set by var_create. The return value can be used to + reset a variable's display width to the default. */ +int +var_default_display_width (int width) +{ + return width == 0 ? 8 : MIN (width, 32); +} + +/* Returns true if A is a valid alignment, + false otherwise. */ +bool +alignment_is_valid (enum alignment a) { - v->display_width = display_width; + return a == ALIGN_LEFT || a == ALIGN_RIGHT || a == ALIGN_CENTRE; } /* Returns V's display alignment, which applies only to GUIs. */ enum alignment -var_get_alignment (const struct variable *v) +var_get_alignment (const struct variable *v) { return v->alignment; } /* Sets V's display alignment to ALIGNMENT. */ void -var_set_alignment (struct variable *v, enum alignment alignment) +var_set_alignment (struct variable *v, enum alignment alignment) { assert (alignment_is_valid (alignment)); v->alignment = alignment; + dict_var_changed (v); } - -/* Returns the number of "union value"s need to store a value of - variable V. */ -size_t -var_get_value_cnt (const struct variable *v) + +/* Returns the default display alignment for a variable of the + given TYPE, as set by var_create. The return value can be + used to reset a variable's display alignment to the default. */ +enum alignment +var_default_alignment (enum val_type type) { - return v->width == 0 ? 1 : DIV_RND_UP (v->width, MAX_SHORT_STRING); + return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT; } + +/* Whether variables' values should be preserved from case to + case. */ -/* Return whether variable V's values should be preserved from - case to case. */ +/* Returns true if variable V's value should be left from case to + case, instead of being reset to system-missing or blanks. */ bool -var_get_leave (const struct variable *v) +var_get_leave (const struct variable *v) { return v->leave; } + +/* Sets V's leave setting to LEAVE. */ +void +var_set_leave (struct variable *v, bool leave) +{ + assert (leave || !var_must_leave (v)); + v->leave = leave; + dict_var_changed (v); +} + +/* Returns true if V must be left from case to case, + false if it can be set either way. */ +bool +var_must_leave (const struct variable *v) +{ + return var_get_dict_class (v) == DC_SCRATCH; +} -/* Returns V's short name, if it has one, or a null pointer - otherwise. +/* Returns the number of short names stored in VAR. Short names are used only for system and portable file input and output. They are upper-case only, not necessarily unique, and limited to SHORT_NAME_LEN characters (plus a null - terminator). Any variable may have no short name, indicated - by returning a null pointer. */ + terminator). Ordinarily a variable has at most one short + name, but very long string variables (longer than 255 bytes) + may have more. A variable might not have any short name at + all if it hasn't been saved to or read from a system or + portable file. */ +size_t +var_get_short_name_cnt (const struct variable *var) +{ + return var->short_name_cnt; +} + +/* Returns VAR's short name with the given IDX, if it has one + with that index, or a null pointer otherwise. Short names may + be sparse: even if IDX is less than the number of short names + in VAR, this function may return a null pointer. */ const char * -var_get_short_name (const struct variable *v) +var_get_short_name (const struct variable *var, size_t idx) { - return v->short_name[0] != '\0' ? v->short_name : NULL; + return idx < var->short_name_cnt ? var->short_names[idx] : NULL; } -/* Sets V's short_name to SHORT_NAME, truncating it to - SHORT_NAME_LEN characters and converting it to uppercase in - the process. Specifying a null pointer for SHORT_NAME clears - the variable's short name. */ +/* Sets VAR's short name with the given IDX to SHORT_NAME, + truncating it to SHORT_NAME_LEN characters and converting it + to uppercase in the process. Specifying a null pointer for + SHORT_NAME clears the specified short name. */ void -var_set_short_name (struct variable *v, const char *short_name) +var_set_short_name (struct variable *var, size_t idx, const char *short_name) { - assert (v != NULL); + assert (var != NULL); assert (short_name == NULL || var_is_plausible_name (short_name, false)); + /* Clear old short name numbered IDX, if any. */ + if (idx < var->short_name_cnt) + { + free (var->short_names[idx]); + var->short_names[idx] = NULL; + } + + /* Install new short name for IDX. */ if (short_name != NULL) { - str_copy_trunc (v->short_name, sizeof v->short_name, short_name); - str_uppercase (v->short_name); + if (idx >= var->short_name_cnt) + { + size_t old_cnt = var->short_name_cnt; + size_t i; + var->short_name_cnt = MAX (idx * 2, 1); + var->short_names = xnrealloc (var->short_names, var->short_name_cnt, + sizeof *var->short_names); + for (i = old_cnt; i < var->short_name_cnt; i++) + var->short_names[i] = NULL; + } + var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING); + str_uppercase (var->short_names[idx]); } - else - v->short_name[0] = '\0'; + + dict_var_changed (var); } -/* Clears V's short name. */ +/* Clears V's short names. */ void -var_clear_short_name (struct variable *v) +var_clear_short_names (struct variable *v) { - assert (v != NULL); + size_t i; - v->short_name[0] = '\0'; + for (i = 0; i < v->short_name_cnt; i++) + free (v->short_names[i]); + free (v->short_names); + v->short_names = NULL; + v->short_name_cnt = 0; } + +/* Relationship with dictionary. */ -/* Sets V's short name to BASE, followed by a suffix of the form - _A, _B, _C, ..., _AA, _AB, etc. according to the value of - SUFFIX_NUMBER. Truncates BASE as necessary to fit. */ -void -var_set_short_name_suffix (struct variable *v, const char *base, - int suffix_number) +/* Returns V's index within its dictionary, the value + for which "dict_get_var (dict, index)" will return V. + V must be in a dictionary. */ +size_t +var_get_dict_index (const struct variable *v) { - char suffix[SHORT_NAME_LEN + 1]; - char short_name[SHORT_NAME_LEN + 1]; - char *start, *end; - int len, ofs; + assert (v->vardict.dict_index != -1); + return v->vardict.dict_index; +} - assert (v != NULL); - assert (suffix_number >= 0); +/* Returns V's index within the case represented by its + dictionary, that is, the value for which "case_data_idx (case, + index)" will return the data for V in that case. + V must be in a dictionary. */ +size_t +var_get_case_index (const struct variable *v) +{ + assert (v->vardict.case_index != -1); + return v->vardict.case_index; +} + +/* Returns V's auxiliary data, or a null pointer if none has been + attached. */ +void * +var_get_aux (const struct variable *v) +{ + return v->aux; +} - /* Set base name. */ - var_set_short_name (v, base); +/* Assign auxiliary data AUX to variable V, which must not + already have auxiliary data. Before V's auxiliary data is + cleared, AUX_DTOR(V) will be called. (var_dtor_free, below, + may be appropriate for use as AUX_DTOR.) */ +void * +var_attach_aux (const struct variable *v_, + void *aux, void (*aux_dtor) (struct variable *)) +{ + struct variable *v = (struct variable *) v_ ; /* cast away const */ + assert (v->aux == NULL); + assert (aux != NULL); + v->aux = aux; + v->aux_dtor = aux_dtor; + return aux; +} + +/* Remove auxiliary data, if any, from V, and return it, without + calling any associated destructor. */ +void * +var_detach_aux (struct variable *v) +{ + void *aux = v->aux; + assert (aux != NULL); + v->aux = NULL; + return aux; +} - /* Compose suffix. */ - start = end = suffix + sizeof suffix - 1; - *end = '\0'; - do +/* Clears auxiliary data, if any, from V, and calls any + associated destructor. */ +void +var_clear_aux (struct variable *v) +{ + assert (v != NULL); + if (v->aux != NULL) { - *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26]; - if (start <= suffix + 1) - msg (SE, _("Variable suffix too large.")); - suffix_number /= 26; + if (v->aux_dtor != NULL) + v->aux_dtor (v); + v->aux = NULL; } - while (suffix_number > 0); - *--start = '_'; - - /* Append suffix to V's short name. */ - str_copy_trunc (short_name, sizeof short_name, base); - len = end - start; - if (len + strlen (short_name) > SHORT_NAME_LEN) - ofs = SHORT_NAME_LEN - len; - else - ofs = strlen (short_name); - strcpy (short_name + ofs, start); +} - /* Set name. */ - var_set_short_name (v, short_name); +/* This function is appropriate for use an auxiliary data + destructor (passed as AUX_DTOR to var_attach_aux()) for the + case where the auxiliary data should be passed to free(). */ +void +var_dtor_free (struct variable *v) +{ + free (v->aux); } + +/* Observed categorical values. */ +/* Returns V's observed categorical values, + which V must have. */ +struct cat_vals * +var_get_obs_vals (const struct variable *v) +{ + assert (v->obs_vals != NULL); + return v->obs_vals; +} -/* Returns the dictionary class corresponding to a variable named - NAME. */ -enum dict_class -dict_class_from_id (const char *name) +/* Sets V's observed categorical values to CAT_VALS. + V becomes the owner of CAT_VALS. */ +void +var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals) { - assert (name != NULL); + struct variable *v = (struct variable *) v_ ; /* cast away const */ + cat_stored_values_destroy (v->obs_vals); + v->obs_vals = cat_vals; +} - switch (name[0]) - { - default: - return DC_ORDINARY; - case '$': - return DC_SYSTEM; - case '#': - return DC_SCRATCH; - } +/* Returns true if V has observed categorical values, + false otherwise. */ +bool +var_has_obs_vals (const struct variable *v) +{ + return v->obs_vals != NULL; +} + +/* Returns variable V's attribute set. The caller may examine or + modify the attribute set, but must not destroy it. Destroying + V, or calling var_set_attributes() on V, will also destroy its + attribute set. */ +struct attrset * +var_get_attributes (const struct variable *v) +{ + return (struct attrset *) &v->attributes; } -/* Returns the name of dictionary class DICT_CLASS. */ -const char * -dict_class_to_name (enum dict_class dict_class) +/* Replaces variable V's attributes set by a copy of ATTRS. */ +void +var_set_attributes (struct variable *v, const struct attrset *attrs) { - switch (dict_class) - { - case DC_ORDINARY: - return _("ordinary"); - case DC_SYSTEM: - return _("system"); - case DC_SCRATCH: - return _("scratch"); - default: - NOT_REACHED (); - } + attrset_destroy (&v->attributes); + attrset_clone (&v->attributes, attrs); } -/* Return the number of bytes used when writing case_data for a variable - of WIDTH */ -int -width_to_bytes(int width) +/* Returns true if V has any custom attributes, false if it has none. */ +bool +var_has_attributes (const struct variable *v) { - assert (width >= 0); + return attrset_count (&v->attributes) > 0; +} + +/* Returns V's vardict structure. */ +const struct vardict_info * +var_get_vardict (const struct variable *v) +{ + assert (var_has_vardict (v)); + return &v->vardict; +} - if ( width == 0 ) - return MAX_SHORT_STRING ; - else if (width <= MAX_LONG_STRING) - return ROUND_UP (width, MAX_SHORT_STRING); - else - { - int chunks = width / EFFECTIVE_LONG_STRING_LENGTH ; - int remainder = width % EFFECTIVE_LONG_STRING_LENGTH ; - int bytes = remainder + (chunks * (MAX_LONG_STRING + 1) ); - return ROUND_UP (bytes, MAX_SHORT_STRING); - } +/* Sets V's vardict data to VARDICT. */ +void +var_set_vardict (struct variable *v, const struct vardict_info *vardict) +{ + assert (vardict->dict_index >= 0); + assert (vardict->case_index >= 0); + v->vardict = *vardict; } +/* Returns true if V has vardict data. */ +bool +var_has_vardict (const struct variable *v) +{ + return v->vardict.dict_index != -1; +} +/* Clears V's vardict data. */ +void +var_clear_vardict (struct variable *v) +{ + v->vardict.dict_index = v->vardict.case_index = -1; +}