X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fvariable.c;h=029e3f49dd74739f6c6a3f57b98f7300bdbc7f4b;hb=9ade26c8349b4434008c46cf09bc7473ec743972;hp=ccbe65dc0ed9757be75091637bb346b276182d84;hpb=ddd7c113f3e50c8d87f6a677856799d05a1f40c7;p=pspp-builds.git diff --git a/src/data/variable.c b/src/data/variable.c index ccbe65dc..029e3f49 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,28 +15,29 @@ along with this program. If not, see . */ #include -#include "variable.h" + +#include "data/variable.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" +#include "data/attributes.h" +#include "data/data-out.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/identifier.h" +#include "data/missing-values.h" +#include "data/value-labels.h" +#include "data/vardict.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/hash-functions.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -45,7 +46,7 @@ struct variable { /* Dictionary information. */ - char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */ + char *name; /* Variable name. Mixed case. */ int width; /* 0 for numeric, otherwise string width. */ struct missing_values miss; /* Missing values. */ struct fmt_spec print; /* Default format for PRINT. */ @@ -62,7 +63,7 @@ struct variable bool leave; /* Leave value from case to case? */ /* Data for use by containing dictionary. */ - struct vardict_info vardict; + struct vardict_info *vardict; /* Used only for system and portable file input and output. See short-names.h. */ @@ -73,11 +74,6 @@ struct variable void *aux; void (*aux_dtor) (struct variable *); - /* Values of a categorical variable. Procedures need - vectors with binary entries, so any variable of type ALPHA will - have its values stored here. */ - struct cat_vals *obs_vals; - /* Custom attributes. */ struct attrset attributes; }; @@ -95,7 +91,8 @@ var_create (const char *name, int width) assert (width >= 0 && width <= MAX_STRING); v = xmalloc (sizeof *v); - v->vardict.dict_index = v->vardict.case_index = -1; + v->vardict = NULL; + v->name = NULL; var_set_name (v, name); v->width = width; mv_init (&v->miss, width); @@ -111,7 +108,6 @@ var_create (const char *name, int width) v->short_name_cnt = 0; v->aux = NULL; v->aux_dtor = NULL; - v->obs_vals = NULL; attrset_init (&v->attributes); return v; @@ -126,8 +122,7 @@ var_create (const char *name, int width) - The new variable is not added to OLD_VAR's dictionary by default. Use dict_clone_var, instead, to do that. - - - Auxiliary data and obs_vals are not copied. */ +*/ struct variable * var_clone (const struct variable *old_var) { @@ -138,7 +133,7 @@ var_clone (const struct variable *old_var) var_set_print_format (new_var, var_get_print_format (old_var)); var_set_write_format (new_var, var_get_write_format (old_var)); var_set_value_labels (new_var, var_get_value_labels (old_var)); - var_set_label (new_var, var_get_label (old_var)); + var_set_label (new_var, var_get_label (old_var), NULL, false); var_set_measure (new_var, var_get_measure (old_var)); var_set_display_width (new_var, var_get_display_width (old_var)); var_set_alignment (new_var, var_get_alignment (old_var)); @@ -148,23 +143,6 @@ var_clone (const struct variable *old_var) return new_var; } -/* Create a variable to be used for internal calculations only */ -struct variable * -var_create_internal (int case_idx) -{ - struct variable *v = var_create ("$internal", 0); - - struct vardict_info vdi; - - vdi.dict = NULL; - vdi.dict_index = 0; - vdi.case_index = case_idx; - - var_set_vardict (v, &vdi); - - return v; -} - /* Destroys variable V. V must not belong to a dictionary. If it does, use dict_delete_var instead. */ @@ -173,128 +151,40 @@ var_destroy (struct variable *v) { if (v != NULL) { - if (var_has_vardict (v)) - { - const struct vardict_info *vdi = var_get_vardict (v); - assert (vdi->dict == NULL); - } - cat_stored_values_destroy (v->obs_vals); + assert (!var_has_vardict (v)); + mv_destroy (&v->miss); var_clear_short_names (v); var_clear_aux (v); val_labs_destroy (v->val_labs); var_clear_label (v); + free (v->name); free (v); } } /* Variable names. */ -/* Return variable V's name. */ +/* Return variable V's name, as a UTF-8 encoded string. */ const char * var_get_name (const struct variable *v) { return v->name; } -/* Sets V's name to NAME. +/* Sets V's name to NAME, a UTF-8 encoded string. Do not use this function for a variable in a dictionary. Use dict_rename_var instead. */ void var_set_name (struct variable *v, const char *name) { - assert (v->vardict.dict_index == -1); - assert (var_is_plausible_name (name, false)); + assert (!var_has_vardict (v)); + assert (id_is_plausible (name, false)); - str_copy_trunc (v->name, sizeof v->name, name); + free (v->name); + v->name = xstrdup (name); dict_var_changed (v); } -/* Returns true if NAME is an acceptable name for a variable, - false otherwise. If ISSUE_ERROR is true, issues an - explanatory error message on failure. */ -bool -var_is_valid_name (const char *name, bool issue_error) -{ - bool plausible; - size_t length, i; - - assert (name != NULL); - - /* Note that strlen returns number of BYTES, not the number of - CHARACTERS */ - length = strlen (name); - - plausible = var_is_plausible_name(name, issue_error); - - if ( ! plausible ) - return false; - - - if (!lex_is_id1 (name[0])) - { - if (issue_error) - msg (SE, _("Character `%c' (in %s) may not appear " - "as the first character in a variable name."), - name[0], name); - return false; - } - - - for (i = 0; i < length; i++) - { - if (!lex_is_idn (name[i])) - { - if (issue_error) - msg (SE, _("Character `%c' (in %s) may not appear in " - "a variable name."), - name[i], name); - return false; - } - } - - return true; -} - -/* Returns true if NAME is an plausible name for a variable, - false otherwise. If ISSUE_ERROR is true, issues an - explanatory error message on failure. - This function makes no use of LC_CTYPE. -*/ -bool -var_is_plausible_name (const char *name, bool issue_error) -{ - size_t length; - - assert (name != NULL); - - /* Note that strlen returns number of BYTES, not the number of - CHARACTERS */ - length = strlen (name); - if (length < 1) - { - if (issue_error) - msg (SE, _("Variable name cannot be empty string.")); - return false; - } - else if (length > VAR_NAME_LEN) - { - if (issue_error) - msg (SE, _("Variable name %s exceeds %d-character limit."), - name, (int) VAR_NAME_LEN); - return false; - } - - if (lex_id_to_token (ss_cstr (name)) != T_ID) - { - if (issue_error) - msg (SE, _("`%s' may not be used as a variable name because it " - "is a reserved word."), name); - return false; - } - - return true; -} - /* Returns VAR's dictionary class. */ enum dict_class var_get_dict_class (const struct variable *var) @@ -319,7 +209,7 @@ hash_var_by_name (const void *v_, const void *aux UNUSED) { const struct variable *v = v_; - return hsh_hash_case_string (v->name); + return hash_case_string (v->name, 0); } /* A hsh_compare_func that orders pointers to variables A and B @@ -355,7 +245,7 @@ hash_var_ptr_by_name (const void *v_, const void *aux UNUSED) { struct variable *const *v = v_; - return hsh_hash_case_string (var_get_name (*v)); + return hash_case_string (var_get_name (*v), 0); } /* Returns the type of variable V. */ @@ -379,10 +269,16 @@ var_set_width (struct variable *v, int new_width) { const int old_width = v->width; + if (old_width == new_width) + return; + if (mv_is_resizable (&v->miss, new_width)) mv_resize (&v->miss, new_width); else - mv_init (&v->miss, new_width); + { + mv_destroy (&v->miss); + mv_init (&v->miss, new_width); + } if (v->val_labs != NULL) { @@ -399,15 +295,7 @@ var_set_width (struct variable *v, int new_width) fmt_resize (&v->write, new_width); v->width = new_width; - - { - const int old_val_count = value_cnt_from_width (old_width); - const int new_val_count = value_cnt_from_width (new_width); - - if ( old_val_count != new_val_count) - dict_var_resized (v, new_val_count - old_val_count); - } - + dict_var_resized (v, old_width); dict_var_changed (v); } @@ -425,30 +313,6 @@ var_is_alpha (const struct variable *v) { return var_get_type (v) == VAL_STRING; } - -/* Returns true if variable V is a short string variable, false - otherwise. */ -bool -var_is_short_string (const struct variable *v) -{ - return v->width > 0 && v->width <= MAX_SHORT_STRING; -} - -/* Returns true if variable V is a long string variable, false - otherwise. */ -bool -var_is_long_string (const struct variable *v) -{ - return v->width > MAX_SHORT_STRING; -} - -/* Returns the number of "union value"s need to store a value of - variable V. */ -size_t -var_get_value_cnt (const struct variable *v) -{ - return value_cnt_from_width (v->width); -} /* Returns variable V's missing values. */ const struct missing_values * @@ -467,11 +331,12 @@ var_set_missing_values (struct variable *v, const struct missing_values *miss) if (miss != NULL) { assert (mv_is_resizable (miss, v->width)); + mv_destroy (&v->miss); mv_copy (&v->miss, miss); mv_resize (&v->miss, v->width); } else - mv_init (&v->miss, v->width); + mv_clear (&v->miss); dict_var_changed (v); } @@ -513,7 +378,7 @@ var_is_num_missing (const struct variable *v, double d, enum mv_class class) S[] must contain exactly as many characters as V's width. V must be a string variable. */ bool -var_is_str_missing (const struct variable *v, const char s[], +var_is_str_missing (const struct variable *v, const uint8_t s[], enum mv_class class) { return mv_is_str_missing (&v->miss, s, class); @@ -558,31 +423,30 @@ var_set_value_labels (struct variable *v, const struct val_labs *vls) static void alloc_value_labels (struct variable *v) { - assert (!var_is_long_string (v)); if (v->val_labs == NULL) v->val_labs = val_labs_create (v->width); } /* Attempts to add a value label with the given VALUE and LABEL - to V. Returns true if successful, false if VALUE has an - existing label or if V is a long string variable. */ + to V. Returns true if successful, false otherwise (probably + due to an existing label). */ bool var_add_value_label (struct variable *v, const union value *value, const char *label) { alloc_value_labels (v); - return val_labs_add (v->val_labs, *value, label); + return val_labs_add (v->val_labs, value, label); } /* Adds or replaces a value label with the given VALUE and LABEL to V. - Has no effect if V is a long string variable. */ +*/ void var_replace_value_label (struct variable *v, const union value *value, const char *label) { alloc_value_labels (v); - val_labs_replace (v->val_labs, *value, label); + val_labs_replace (v->val_labs, value, label); } /* Removes V's value labels, if any. */ @@ -597,7 +461,7 @@ var_clear_value_labels (struct variable *v) const char * var_lookup_value_label (const struct variable *v, const union value *value) { - return val_labs_find (v->val_labs, *value); + return val_labs_find (v->val_labs, value); } /* Append STR with a string representing VALUE for variable V. @@ -612,8 +476,9 @@ var_append_value_name (const struct variable *v, const union value *value, const char *name = var_lookup_value_label (v, value); if (name == NULL) { - char *s = ds_put_uninit (str, v->print.w); - data_out (value, &v->print, s); + char *s = data_out (value, var_get_encoding (v), &v->print); + ds_put_cstr (str, s); + free (s); } else ds_put_cstr (str, name); @@ -698,33 +563,61 @@ var_get_label (const struct variable *v) return v->label; } -/* Sets V's variable label to LABEL, stripping off leading and - trailing white space and truncating to 255 characters. - If LABEL is a null pointer or if LABEL is an empty string - (after stripping white space), then V's variable label (if - any) is removed. */ -void -var_set_label (struct variable *v, const char *label) +/* Sets V's variable label to UTF-8 encoded string LABEL, stripping off leading + and trailing white space. If LABEL is a null pointer or if LABEL is an + empty string (after stripping white space), then V's variable label (if any) + is removed. + + Variable labels are limited to 255 bytes in the dictionary encoding, which + should be specified as DICT_ENCODING. If LABEL fits within this limit, this + function returns true. Otherwise, the variable label is set to a truncated + value, this function returns false and, if ISSUE_WARNING is true, issues a + warning. */ +bool +var_set_label (struct variable *v, const char *label, + const char *dict_encoding, bool issue_warning) { + bool truncated = false; + free (v->label); v->label = NULL; if (label != NULL) { struct substring s = ss_cstr (label); + size_t trunc_len; + + if (dict_encoding != NULL) + { + enum { MAX_LABEL_LEN = 255 }; + + trunc_len = utf8_encoding_trunc_len (label, dict_encoding, + MAX_LABEL_LEN); + if (ss_length (s) > trunc_len) + { + if (issue_warning) + msg (SW, _("Truncating variable label for variable `%s' to %d " + "bytes."), var_get_name (v), MAX_LABEL_LEN); + ss_truncate (&s, trunc_len); + truncated = true; + } + } + ss_trim (&s, ss_cstr (CC_SPACES)); - ss_truncate (&s, 255); if (!ss_is_empty (s)) v->label = ss_xstrdup (s); - dict_var_changed (v); } + + dict_var_changed (v); + + return truncated; } /* Removes any variable label from V. */ void var_clear_label (struct variable *v) { - var_set_label (v, NULL); + var_set_label (v, NULL, NULL, false); } /* Returns true if V has a variable V, @@ -893,8 +786,7 @@ var_get_short_name (const struct variable *var, size_t idx) void var_set_short_name (struct variable *var, size_t idx, const char *short_name) { - assert (var != NULL); - assert (short_name == NULL || var_is_plausible_name (short_name, false)); + assert (short_name == NULL || id_is_plausible (short_name, false)); /* Clear old short name numbered IDX, if any. */ if (idx < var->short_name_cnt) @@ -944,8 +836,8 @@ var_clear_short_names (struct variable *v) size_t var_get_dict_index (const struct variable *v) { - assert (v->vardict.dict_index != -1); - return v->vardict.dict_index; + assert (var_has_vardict (v)); + return vardict_get_dict_index (v->vardict); } /* Returns V's index within the case represented by its @@ -955,8 +847,8 @@ var_get_dict_index (const struct variable *v) size_t var_get_case_index (const struct variable *v) { - assert (v->vardict.case_index != -1); - return v->vardict.case_index; + assert (var_has_vardict (v)); + return vardict_get_case_index (v->vardict); } /* Returns V's auxiliary data, or a null pointer if none has been @@ -975,7 +867,7 @@ void * var_attach_aux (const struct variable *v_, void *aux, void (*aux_dtor) (struct variable *)) { - struct variable *v = (struct variable *) v_ ; /* cast away const */ + struct variable *v = CONST_CAST (struct variable *, v_); assert (v->aux == NULL); assert (aux != NULL); v->aux = aux; @@ -999,7 +891,6 @@ var_detach_aux (struct variable *v) void var_clear_aux (struct variable *v) { - assert (v != NULL); if (v->aux != NULL) { if (v->aux_dtor != NULL) @@ -1017,35 +908,6 @@ var_dtor_free (struct variable *v) free (v->aux); } -/* Observed categorical values. */ - -/* Returns V's observed categorical values, - which V must have. */ -struct cat_vals * -var_get_obs_vals (const struct variable *v) -{ - assert (v->obs_vals != NULL); - return v->obs_vals; -} - -/* Sets V's observed categorical values to CAT_VALS. - V becomes the owner of CAT_VALS. */ -void -var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals) -{ - struct variable *v = (struct variable *) v_ ; /* cast away const */ - cat_stored_values_destroy (v->obs_vals); - v->obs_vals = cat_vals; -} - -/* Returns true if V has observed categorical values, - false otherwise. */ -bool -var_has_obs_vals (const struct variable *v) -{ - return v->obs_vals != NULL; -} - /* Returns variable V's attribute set. The caller may examine or modify the attribute set, but must not destroy it. Destroying V, or calling var_set_attributes() on V, will also destroy its @@ -1053,7 +915,7 @@ var_has_obs_vals (const struct variable *v) struct attrset * var_get_attributes (const struct variable *v) { - return (struct attrset *) &v->attributes; + return CONST_CAST (struct attrset *, &v->attributes); } /* Replaces variable V's attributes set by a copy of ATTRS. */ @@ -1071,33 +933,41 @@ var_has_attributes (const struct variable *v) return attrset_count (&v->attributes) > 0; } +/* Returns the encoding of values of variable VAR. (This is actually a + property of the dictionary.) Returns null if no specific encoding has been + set. */ +const char * +var_get_encoding (const struct variable *var) +{ + return (var_has_vardict (var) + ? dict_get_encoding (vardict_get_dictionary (var->vardict)) + : NULL); +} + /* Returns V's vardict structure. */ -const struct vardict_info * +struct vardict_info * var_get_vardict (const struct variable *v) { - assert (var_has_vardict (v)); - return &v->vardict; + return CONST_CAST (struct vardict_info *, v->vardict); } /* Sets V's vardict data to VARDICT. */ void -var_set_vardict (struct variable *v, const struct vardict_info *vardict) +var_set_vardict (struct variable *v, struct vardict_info *vardict) { - assert (vardict->dict_index >= 0); - assert (vardict->case_index >= 0); - v->vardict = *vardict; + v->vardict = vardict; } /* Returns true if V has vardict data. */ bool var_has_vardict (const struct variable *v) { - return v->vardict.dict_index != -1; + return v->vardict != NULL; } /* Clears V's vardict data. */ void var_clear_vardict (struct variable *v) { - v->vardict.dict_index = v->vardict.case_index = -1; + v->vardict = NULL; }