/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "variable.h"
-#include <stdlib.h>
-
-#include "category.h"
-#include "data-out.h"
-#include "format.h"
-#include "dictionary.h"
-#include "identifier.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "vardict.h"
+#include "data/variable.h"
-#include <libpspp/misc.h>
-#include <libpspp/assertion.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
+#include <stdlib.h>
-#include "xalloc.h"
+#include "data/attributes.h"
+#include "data/data-out.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/identifier.h"
+#include "data/missing-values.h"
+#include "data/value-labels.h"
+#include "data/vardict.h"
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/str.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
struct variable
{
/* Dictionary information. */
- char name[VAR_NAME_LEN + 1]; /* Variable name. Mixed case. */
+ char *name; /* Variable name. Mixed case. */
int width; /* 0 for numeric, otherwise string width. */
struct missing_values miss; /* Missing values. */
struct fmt_spec print; /* Default format for PRINT. */
bool leave; /* Leave value from case to case? */
/* Data for use by containing dictionary. */
- struct vardict_info vardict;
+ struct vardict_info *vardict;
/* Used only for system and portable file input and output.
See short-names.h. */
void *aux;
void (*aux_dtor) (struct variable *);
- /* Values of a categorical variable. Procedures need
- vectors with binary entries, so any variable of type ALPHA will
- have its values stored here. */
- struct cat_vals *obs_vals;
+ /* Custom attributes. */
+ struct attrset attributes;
};
\f
/* Creates and returns a new variable with the given NAME and
var_create (const char *name, int width)
{
struct variable *v;
+ enum val_type type;
assert (width >= 0 && width <= MAX_STRING);
v = xmalloc (sizeof *v);
- v->vardict.dict_index = v->vardict.case_index = -1;
+ v->vardict = NULL;
+ v->name = NULL;
var_set_name (v, name);
v->width = width;
mv_init (&v->miss, width);
v->leave = var_must_leave (v);
- if (var_is_numeric (v))
- {
- v->print = fmt_for_output (FMT_F, 8, 2);
- v->alignment = ALIGN_RIGHT;
- v->measure = MEASURE_SCALE;
- }
- else
- {
- v->print = fmt_for_output (FMT_A, var_get_width (v), 0);
- v->alignment = ALIGN_LEFT;
- v->measure = MEASURE_NOMINAL;
- }
+ type = val_type_from_width (width);
+ v->alignment = var_default_alignment (type);
+ v->measure = var_default_measure (type);
v->display_width = var_default_display_width (width);
- v->write = v->print;
+ v->print = v->write = var_default_formats (width);
v->val_labs = NULL;
v->label = NULL;
v->short_names = NULL;
v->short_name_cnt = 0;
v->aux = NULL;
v->aux_dtor = NULL;
- v->obs_vals = NULL;
+ attrset_init (&v->attributes);
return v;
}
- The new variable is not added to OLD_VAR's dictionary by
default. Use dict_clone_var, instead, to do that.
-
- - Auxiliary data and obs_vals are not copied. */
+*/
struct variable *
var_clone (const struct variable *old_var)
{
var_set_print_format (new_var, var_get_print_format (old_var));
var_set_write_format (new_var, var_get_write_format (old_var));
var_set_value_labels (new_var, var_get_value_labels (old_var));
- var_set_label (new_var, var_get_label (old_var));
+ var_set_label (new_var, var_get_label (old_var), NULL, false);
var_set_measure (new_var, var_get_measure (old_var));
var_set_display_width (new_var, var_get_display_width (old_var));
var_set_alignment (new_var, var_get_alignment (old_var));
var_set_leave (new_var, var_get_leave (old_var));
+ var_set_attributes (new_var, var_get_attributes (old_var));
return new_var;
}
if (v != NULL)
{
assert (!var_has_vardict (v));
- cat_stored_values_destroy (v->obs_vals);
+ mv_destroy (&v->miss);
var_clear_short_names (v);
var_clear_aux (v);
val_labs_destroy (v->val_labs);
var_clear_label (v);
+ free (v->name);
free (v);
}
}
\f
/* Variable names. */
-/* Return variable V's name. */
+/* Return variable V's name, as a UTF-8 encoded string. */
const char *
var_get_name (const struct variable *v)
{
return v->name;
}
-/* Sets V's name to NAME.
+/* Sets V's name to NAME, a UTF-8 encoded string.
Do not use this function for a variable in a dictionary. Use
dict_rename_var instead. */
void
var_set_name (struct variable *v, const char *name)
{
- assert (v->vardict.dict_index == -1);
- assert (var_is_plausible_name (name, false));
+ assert (!var_has_vardict (v));
+ assert (id_is_plausible (name, false));
- str_copy_trunc (v->name, sizeof v->name, name);
+ free (v->name);
+ v->name = xstrdup (name);
dict_var_changed (v);
}
-/* Returns true if NAME is an acceptable name for a variable,
- false otherwise. If ISSUE_ERROR is true, issues an
- explanatory error message on failure. */
-bool
-var_is_valid_name (const char *name, bool issue_error)
-{
- bool plausible;
- size_t length, i;
-
- assert (name != NULL);
-
- /* Note that strlen returns number of BYTES, not the number of
- CHARACTERS */
- length = strlen (name);
-
- plausible = var_is_plausible_name(name, issue_error);
-
- if ( ! plausible )
- return false;
-
-
- if (!lex_is_id1 (name[0]))
- {
- if (issue_error)
- msg (SE, _("Character `%c' (in %s) may not appear "
- "as the first character in a variable name."),
- name[0], name);
- return false;
- }
-
-
- for (i = 0; i < length; i++)
- {
- if (!lex_is_idn (name[i]))
- {
- if (issue_error)
- msg (SE, _("Character `%c' (in %s) may not appear in "
- "a variable name."),
- name[i], name);
- return false;
- }
- }
-
- return true;
-}
-
-/* Returns true if NAME is an plausible name for a variable,
- false otherwise. If ISSUE_ERROR is true, issues an
- explanatory error message on failure.
- This function makes no use of LC_CTYPE.
-*/
-bool
-var_is_plausible_name (const char *name, bool issue_error)
-{
- size_t length;
-
- assert (name != NULL);
-
- /* Note that strlen returns number of BYTES, not the number of
- CHARACTERS */
- length = strlen (name);
- if (length < 1)
- {
- if (issue_error)
- msg (SE, _("Variable name cannot be empty string."));
- return false;
- }
- else if (length > VAR_NAME_LEN)
- {
- if (issue_error)
- msg (SE, _("Variable name %s exceeds %d-character limit."),
- name, (int) VAR_NAME_LEN);
- return false;
- }
-
- if (lex_id_to_token (ss_cstr (name)) != T_ID)
- {
- if (issue_error)
- msg (SE, _("`%s' may not be used as a variable name because it "
- "is a reserved word."), name);
- return false;
- }
-
- return true;
-}
-
/* Returns VAR's dictionary class. */
enum dict_class
var_get_dict_class (const struct variable *var)
{
const struct variable *v = v_;
- return hsh_hash_case_string (v->name);
+ return hash_case_string (v->name, 0);
}
/* A hsh_compare_func that orders pointers to variables A and B
return strcasecmp (var_get_name (*a), var_get_name (*b));
}
+/* A hsh_compare_func that orders pointers to variables A and B
+ by their dictionary indexes. */
+int
+compare_var_ptrs_by_dict_index (const void *a_, const void *b_,
+ const void *aux UNUSED)
+{
+ struct variable *const *a = a_;
+ struct variable *const *b = b_;
+ size_t a_index = var_get_dict_index (*a);
+ size_t b_index = var_get_dict_index (*b);
+
+ return a_index < b_index ? -1 : a_index > b_index;
+}
+
/* A hsh_hash_func that hashes pointer to variable V based on its
name. */
unsigned
{
struct variable *const *v = v_;
- return hsh_hash_case_string (var_get_name (*v));
+ return hash_case_string (var_get_name (*v), 0);
}
\f
/* Returns the type of variable V. */
{
const int old_width = v->width;
+ if (old_width == new_width)
+ return;
+
if (mv_is_resizable (&v->miss, new_width))
mv_resize (&v->miss, new_width);
else
- mv_init (&v->miss, new_width);
+ {
+ mv_destroy (&v->miss);
+ mv_init (&v->miss, new_width);
+ }
if (v->val_labs != NULL)
{
fmt_resize (&v->write, new_width);
v->width = new_width;
-
- {
- const int old_val_count = value_cnt_from_width (old_width);
- const int new_val_count = value_cnt_from_width (new_width);
-
- if ( old_val_count != new_val_count)
- dict_var_resized (v, new_val_count - old_val_count);
- }
-
+ dict_var_resized (v, old_width);
dict_var_changed (v);
}
{
return var_get_type (v) == VAL_STRING;
}
-
-/* Returns true if variable V is a short string variable, false
- otherwise. */
-bool
-var_is_short_string (const struct variable *v)
-{
- return v->width > 0 && v->width <= MAX_SHORT_STRING;
-}
-
-/* Returns true if variable V is a long string variable, false
- otherwise. */
-bool
-var_is_long_string (const struct variable *v)
-{
- return v->width > MAX_SHORT_STRING;
-}
-
-/* Returns the number of "union value"s need to store a value of
- variable V. */
-size_t
-var_get_value_cnt (const struct variable *v)
-{
- return value_cnt_from_width (v->width);
-}
\f
/* Returns variable V's missing values. */
const struct missing_values *
if (miss != NULL)
{
assert (mv_is_resizable (miss, v->width));
+ mv_destroy (&v->miss);
mv_copy (&v->miss, miss);
mv_resize (&v->miss, v->width);
}
else
- mv_init (&v->miss, v->width);
+ mv_clear (&v->miss);
dict_var_changed (v);
}
S[] must contain exactly as many characters as V's width.
V must be a string variable. */
bool
-var_is_str_missing (const struct variable *v, const char s[],
+var_is_str_missing (const struct variable *v, const uint8_t s[],
enum mv_class class)
{
return mv_is_str_missing (&v->miss, s, class);
static void
alloc_value_labels (struct variable *v)
{
- assert (!var_is_long_string (v));
if (v->val_labs == NULL)
v->val_labs = val_labs_create (v->width);
}
/* Attempts to add a value label with the given VALUE and LABEL
- to V. Returns true if successful, false if VALUE has an
- existing label or if V is a long string variable. */
+ to V. Returns true if successful, false otherwise (probably
+ due to an existing label). */
bool
var_add_value_label (struct variable *v,
const union value *value, const char *label)
{
alloc_value_labels (v);
- return val_labs_add (v->val_labs, *value, label);
+ return val_labs_add (v->val_labs, value, label);
}
/* Adds or replaces a value label with the given VALUE and LABEL
to V.
- Has no effect if V is a long string variable. */
+*/
void
var_replace_value_label (struct variable *v,
const union value *value, const char *label)
{
alloc_value_labels (v);
- val_labs_replace (v->val_labs, *value, label);
+ val_labs_replace (v->val_labs, value, label);
}
/* Removes V's value labels, if any. */
const char *
var_lookup_value_label (const struct variable *v, const union value *value)
{
- return val_labs_find (v->val_labs, *value);
+ return val_labs_find (v->val_labs, value);
}
/* Append STR with a string representing VALUE for variable V.
const char *name = var_lookup_value_label (v, value);
if (name == NULL)
{
- char *s = ds_put_uninit (str, v->print.w);
- data_out (value, &v->print, s);
+ char *s = data_out (value, var_get_encoding (v), &v->print);
+ ds_put_cstr (str, s);
+ free (s);
}
else
ds_put_cstr (str, name);
}
\f
-\f
/* Print and write formats. */
/* Returns V's print format specification. */
}
/* Sets V's print format specification to PRINT, which must be a
- valid format specification for outputting a variable of V's
- width. */
+ valid format specification for a variable of V's width
+ (ordinarily an output format, but input formats are not
+ rejected). */
void
var_set_print_format (struct variable *v, const struct fmt_spec *print)
{
}
/* Sets V's write format specification to WRITE, which must be a
- valid format specification for outputting a variable of V's
- width. */
+ valid format specification for a variable of V's width
+ (ordinarily an output format, but input formats are not
+ rejected). */
void
var_set_write_format (struct variable *v, const struct fmt_spec *write)
{
}
/* Sets V's print and write format specifications to FORMAT,
- which must be a valid format specification for outputting a
- variable of V's width. */
+ which must be a valid format specification for a variable of
+ V's width (ordinarily an output format, but input formats are
+ not rejected). */
void
var_set_both_formats (struct variable *v, const struct fmt_spec *format)
{
var_set_print_format (v, format);
var_set_write_format (v, format);
}
+
+/* Returns the default print and write format for a variable of
+ the given TYPE, as set by var_create. The return value can be
+ used to reset a variable's print and write formats to the
+ default. */
+struct fmt_spec
+var_default_formats (int width)
+{
+ return (width == 0
+ ? fmt_for_output (FMT_F, 8, 2)
+ : fmt_for_output (FMT_A, width, 0));
+}
\f
/* Return a string representing this variable, in the form most
appropriate from a human factors perspective, that is, its
return v->label;
}
-/* Sets V's variable label to LABEL, stripping off leading and
- trailing white space and truncating to 255 characters.
- If LABEL is a null pointer or if LABEL is an empty string
- (after stripping white space), then V's variable label (if
- any) is removed. */
-void
-var_set_label (struct variable *v, const char *label)
+/* Sets V's variable label to UTF-8 encoded string LABEL, stripping off leading
+ and trailing white space. If LABEL is a null pointer or if LABEL is an
+ empty string (after stripping white space), then V's variable label (if any)
+ is removed.
+
+ Variable labels are limited to 255 bytes in the dictionary encoding, which
+ should be specified as DICT_ENCODING. If LABEL fits within this limit, this
+ function returns true. Otherwise, the variable label is set to a truncated
+ value, this function returns false and, if ISSUE_WARNING is true, issues a
+ warning. */
+bool
+var_set_label (struct variable *v, const char *label,
+ const char *dict_encoding, bool issue_warning)
{
+ bool truncated = false;
+
free (v->label);
v->label = NULL;
if (label != NULL)
{
struct substring s = ss_cstr (label);
+ size_t trunc_len;
+
+ if (dict_encoding != NULL)
+ {
+ enum { MAX_LABEL_LEN = 255 };
+
+ trunc_len = utf8_encoding_trunc_len (label, dict_encoding,
+ MAX_LABEL_LEN);
+ if (ss_length (s) > trunc_len)
+ {
+ if (issue_warning)
+ msg (SW, _("Truncating variable label for variable `%s' to %d "
+ "bytes."), var_get_name (v), MAX_LABEL_LEN);
+ ss_truncate (&s, trunc_len);
+ truncated = true;
+ }
+ }
+
ss_trim (&s, ss_cstr (CC_SPACES));
- ss_truncate (&s, 255);
if (!ss_is_empty (s))
v->label = ss_xstrdup (s);
- dict_var_changed (v);
}
+
+ dict_var_changed (v);
+
+ return truncated;
}
/* Removes any variable label from V. */
void
var_clear_label (struct variable *v)
{
- var_set_label (v, NULL);
+ var_set_label (v, NULL, NULL, false);
}
/* Returns true if V has a variable V,
v->measure = measure;
dict_var_changed (v);
}
+
+/* Returns the default measurement level for a variable of the
+ given TYPE, as set by var_create. The return value can be
+ used to reset a variable's measurement level to the
+ default. */
+enum measure
+var_default_measure (enum val_type type)
+{
+ return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL;
+}
\f
/* Returns V's display width, which applies only to GUIs. */
int
/* Sets V's display width to DISPLAY_WIDTH. */
void
-var_set_display_width (struct variable *v, int display_width)
+var_set_display_width (struct variable *v, int new_width)
{
- v->display_width = display_width;
+ int old_width = v->display_width;
+
+ v->display_width = new_width;
+
+ if ( old_width != new_width)
+ dict_var_display_width_changed (v);
+
dict_var_changed (v);
}
v->alignment = alignment;
dict_var_changed (v);
}
+
+/* Returns the default display alignment for a variable of the
+ given TYPE, as set by var_create. The return value can be
+ used to reset a variable's display alignment to the default. */
+enum alignment
+var_default_alignment (enum val_type type)
+{
+ return type == VAL_NUMERIC ? ALIGN_RIGHT : ALIGN_LEFT;
+}
\f
/* Whether variables' values should be preserved from case to
case. */
void
var_set_short_name (struct variable *var, size_t idx, const char *short_name)
{
- assert (var != NULL);
- assert (short_name == NULL || var_is_plausible_name (short_name, false));
+ assert (short_name == NULL || id_is_plausible (short_name, false));
/* Clear old short name numbered IDX, if any. */
if (idx < var->short_name_cnt)
size_t
var_get_dict_index (const struct variable *v)
{
- assert (v->vardict.dict_index != -1);
- return v->vardict.dict_index;
+ assert (var_has_vardict (v));
+ return vardict_get_dict_index (v->vardict);
}
/* Returns V's index within the case represented by its
size_t
var_get_case_index (const struct variable *v)
{
- assert (v->vardict.case_index != -1);
- return v->vardict.case_index;
+ assert (var_has_vardict (v));
+ return vardict_get_case_index (v->vardict);
}
\f
/* Returns V's auxiliary data, or a null pointer if none has been
var_attach_aux (const struct variable *v_,
void *aux, void (*aux_dtor) (struct variable *))
{
- struct variable *v = (struct variable *) v_ ; /* cast away const */
+ struct variable *v = CONST_CAST (struct variable *, v_);
assert (v->aux == NULL);
assert (aux != NULL);
v->aux = aux;
void
var_clear_aux (struct variable *v)
{
- assert (v != NULL);
if (v->aux != NULL)
{
if (v->aux_dtor != NULL)
free (v->aux);
}
\f
-/* Observed categorical values. */
-
-/* Returns V's observed categorical values,
- which V must have. */
-struct cat_vals *
-var_get_obs_vals (const struct variable *v)
+/* Returns variable V's attribute set. The caller may examine or
+ modify the attribute set, but must not destroy it. Destroying
+ V, or calling var_set_attributes() on V, will also destroy its
+ attribute set. */
+struct attrset *
+var_get_attributes (const struct variable *v)
{
- assert (v->obs_vals != NULL);
- return v->obs_vals;
+ return CONST_CAST (struct attrset *, &v->attributes);
}
-/* Sets V's observed categorical values to CAT_VALS.
- V becomes the owner of CAT_VALS. */
+/* Replaces variable V's attributes set by a copy of ATTRS. */
void
-var_set_obs_vals (const struct variable *v_, struct cat_vals *cat_vals)
+var_set_attributes (struct variable *v, const struct attrset *attrs)
{
- struct variable *v = (struct variable *) v_ ; /* cast away const */
- cat_stored_values_destroy (v->obs_vals);
- v->obs_vals = cat_vals;
+ attrset_destroy (&v->attributes);
+ attrset_clone (&v->attributes, attrs);
}
-/* Returns true if V has observed categorical values,
- false otherwise. */
+/* Returns true if V has any custom attributes, false if it has none. */
bool
-var_has_obs_vals (const struct variable *v)
+var_has_attributes (const struct variable *v)
+{
+ return attrset_count (&v->attributes) > 0;
+}
+\f
+/* Returns the encoding of values of variable VAR. (This is actually a
+ property of the dictionary.) Returns null if no specific encoding has been
+ set. */
+const char *
+var_get_encoding (const struct variable *var)
{
- return v->obs_vals != NULL;
+ return (var_has_vardict (var)
+ ? dict_get_encoding (vardict_get_dictionary (var->vardict))
+ : NULL);
}
\f
/* Returns V's vardict structure. */
-const struct vardict_info *
+struct vardict_info *
var_get_vardict (const struct variable *v)
{
- assert (var_has_vardict (v));
- return &v->vardict;
+ return CONST_CAST (struct vardict_info *, v->vardict);
}
/* Sets V's vardict data to VARDICT. */
void
-var_set_vardict (struct variable *v, const struct vardict_info *vardict)
+var_set_vardict (struct variable *v, struct vardict_info *vardict)
{
- assert (vardict->dict_index >= 0);
- assert (vardict->case_index >= 0);
- v->vardict = *vardict;
+ v->vardict = vardict;
}
/* Returns true if V has vardict data. */
bool
var_has_vardict (const struct variable *v)
{
- return v->vardict.dict_index != -1;
+ return v->vardict != NULL;
}
/* Clears V's vardict data. */
void
var_clear_vardict (struct variable *v)
{
- v->vardict.dict_index = v->vardict.case_index = -1;
+ v->vardict = NULL;
}