+2007-04-22 John Darrington <john@darrington.wattle.id.au>
+
+ * Deleted existing category.h and moved cat-routines.h into
+ category.h Encapsulated struct cat_vals better.
+
2007-04-19 John Darrington <john@darrington.wattle.id.au>
* sys-file-reader.c: When reading a system file which has no
src/data/case.h \
src/data/category.c \
src/data/category.h \
- src/data/cat-routines.h \
src/data/data-in.c \
src/data/data-in.h \
src/data/data-out.c \
+++ /dev/null
-/* PSPP - Binary encodings for categorical variables.
- Copyright (C) 2005 Free Software Foundation, Inc.
- Written by Jason H Stover <jason@sakla.net>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
-
-/*
- Functions and data structures to recode categorical variables into
- vectors and sub-rows of matrices.
-
- To fit many types of statistical models, it is necessary
- to change each value of a categorical variable to a vector with binary
- entries. These vectors are then stored as sub-rows within a matrix
- during model-fitting. We need functions and data strucutres to,
- e.g., map a value, say 'a', of a variable named 'cat_var', to a
- vector, say (0 1 0 0 0), and vice versa. We also need to be able
- to map the vector back to the value 'a', and if the vector is a
- sub-row of a matrix, we need to know which sub-row corresponds to
- the variable 'cat_var'.
-
- */
-
-#ifndef CAT_ROUTINES_H
-#define CAT_ROUTINES_H
-#define CAT_VALUE_NOT_FOUND -2
-#include <stdbool.h>
-#include "category.h"
-
-size_t cat_value_find (const struct variable *, const union value *);
-
-union value *cat_subscript_to_value (const size_t, struct variable *);
-
-void cat_stored_values_create (const struct variable *);
-
-void cat_value_update (const struct variable *, const union value *);
-
-void cat_create_value_matrix (const struct variable *);
-
-void cat_stored_values_destroy (struct cat_vals *);
-#endif
#include <libpspp/alloc.h>
#include <libpspp/message.h>
-#include "cat-routines.h"
+#include "category.h"
#include "value.h"
#include "variable.h"
+#define CAT_VALUE_NOT_FOUND -2
+
#define N_INITIAL_CATEGORIES 1
+/*
+ This structure contains the observed values of a
+ categorical variable.
+ */
+struct cat_vals
+{
+ union value *vals;
+ size_t n_categories;
+ size_t n_allocated_categories; /* This is used only during
+ initialization to keep
+ track of the number of
+ values stored.
+ */
+};
+
void
cat_stored_values_create (const struct variable *v)
{
if (!var_has_obs_vals (v))
{
struct cat_vals *obs_vals = xmalloc (sizeof *obs_vals);
+
obs_vals->n_categories = 0;
obs_vals->n_allocated_categories = N_INITIAL_CATEGORIES;
obs_vals->vals = xnmalloc (N_INITIAL_CATEGORIES, sizeof *obs_vals->vals);
void
cat_stored_values_destroy (struct cat_vals *obs_vals)
{
- if (obs_vals != NULL)
+ if (obs_vals != NULL)
{
if (obs_vals->n_allocated_categories > 0)
free (obs_vals->vals);
}
}
-union value *
-cat_subscript_to_value (const size_t s, struct variable *v)
+const union value *
+cat_subscript_to_value (const size_t s, const struct variable *v)
{
struct cat_vals *obs_vals = var_get_obs_vals (v);
return s < obs_vals->n_categories ? obs_vals->vals + s : NULL;
*/
-#ifndef CAT_H
-#define CAT_H
-#define CAT_VALUE_NOT_FOUND -2
-#include <stdbool.h>
+#ifndef CATEGORY_H
+#define CATEGORY_H
+
#include <stddef.h>
-union value;
+struct cat_vals;
struct variable ;
+union value;
+
+void cat_stored_values_create (const struct variable *);
+void cat_stored_values_destroy (struct cat_vals *);
+
+size_t cat_value_find (const struct variable *, const union value *);
+
+const union value *cat_subscript_to_value (const size_t,
+ const struct variable *);
+
+
+void cat_value_update (const struct variable *, const union value *);
-/*
- This structure contains the observed values of a
- categorical variable.
- */
-struct cat_vals
-{
- union value *vals;
- size_t n_categories;
- size_t n_allocated_categories; /* This is used only during
- initialization to keep
- track of the number of
- values stored.
- */
-};
/*
Return the number of categories of a categorical variable.
#include <ctype.h>
#include "case.h"
-#include "cat-routines.h"
#include "category.h"
#include "settings.h"
#include "value-labels.h"
struct variable *var = dict_get_var (*dict, i);
char short_name [SHORT_NAME_LEN + 1];
char long_name [SHORT_NAME_LEN + 1];
- char *s = short_name;
- char *d = long_name;
strcpy (short_name, var_get_name (var));
#include <stdlib.h>
-#include "cat-routines.h"
+
+#include "category.h"
#include "data-out.h"
#include "format.h"
#include "dictionary.h"
#include "regression-export.h"
#include <data/case.h>
#include <data/casefile.h>
-#include <data/cat-routines.h>
#include <data/category.h>
#include <data/dictionary.h>
#include <data/missing-values.h>
pspp_linreg_cache *model;
union value *output = NULL;
const union value **vals = NULL;
- struct variable **vars = NULL;
+ const struct variable **vars = NULL;
assert (trns != NULL);
model = trns->c;
union value *output = NULL;
const union value **vals = NULL;
const union value *obs = NULL;
- struct variable **vars = NULL;
+ const struct variable **vars = NULL;
assert (trns != NULL);
model = trns->c;
for (j = 0; j < n_categories; j++)
{
- union value *val = cat_subscript_to_value (j, varlist[i]);
+ const union value *val = cat_subscript_to_value (j, varlist[i]);
fprintf (fp, "%s.values[%d] = \"%s\";\n\t",
var_get_name (varlist[i]), j,
var_get_value_name (varlist[i], val));
j++;
if (var_is_alpha (v_variables[i]))
{
- /* Make a place to hold the binary vectors
+ /* Make a place to hold the binary vectors
corresponding to this variable's values. */
cat_stored_values_create (v_variables[i]);
}
if (n_data > 0)
{
Y = gsl_vector_alloc (n_data);
-
X =
design_matrix_create (n_indep, (const struct variable **) indep_vars,
n_data);
*/
c[i]->v_info = xnmalloc (c[i]->n_vars, sizeof (*c[i]->v_info));
assert (c[i]->v_info != NULL);
- c[i]->v_info->v =
- (const struct variable *) design_matrix_col_to_var (X, i);
+ c[i]->v_info->v = design_matrix_col_to_var (X, i);
if (var_is_alpha (c[i]->v_info->v))
{
assert (k <= i);
k = i - k;
c[i]->v_info->val =
- cat_subscript_to_value (k, (struct variable *) c[i]->v_info->v);
+ cat_subscript_to_value (k, c[i]->v_info->v);
}
}
}
#define DM_COLUMN_NOT_FOUND -1
#define DM_INDEX_NOT_FOUND -3
-/*
- Which element of a vector is equal to the value x?
- */
-static size_t
-cat_which_element_eq (const gsl_vector * vec, double x)
-{
- size_t i;
-
- for (i = 0; i < vec->size; i++)
- {
- if (fabs (gsl_vector_get (vec, i) - x) < GSL_DBL_EPSILON)
- {
- return i;
- }
- }
- return CAT_VALUE_NOT_FOUND;
-}
-static int
-cat_is_zero_vector (const gsl_vector * vec)
-{
- size_t i;
-
- for (i = 0; i < vec->size; i++)
- {
- if (gsl_vector_get (vec, i) != 0.0)
- {
- return 0;
- }
- }
- return 1;
-}
-
-/*
- Return the value of v corresponding to the vector vec.
- */
-union value *
-cat_vector_to_value (const gsl_vector * vec, struct variable *v)
-{
- size_t i;
-
- i = cat_which_element_eq (vec, 1.0);
- if (i != CAT_VALUE_NOT_FOUND)
- {
- return cat_subscript_to_value (i + 1, v);
- }
- if (cat_is_zero_vector (vec))
- {
- return cat_subscript_to_value (0, v);
- }
- return NULL;
-}
struct design_matrix *
design_matrix_create (int n_variables,
}
else if (var_is_alpha (v))
{
- struct cat_vals *obs_vals = var_get_obs_vals (v);
+ size_t n_categories = cat_get_n_categories (v);
(dm->vars + i)->last_column =
- (dm->vars + i)->first_column + obs_vals->n_categories - 2;
- n_cols += obs_vals->n_categories - 1;
+ (dm->vars + i)->first_column + n_categories - 2;
+ n_cols += n_categories - 1;
}
}
dm->m = gsl_matrix_calloc (n_data, n_cols);
Return the index of the variable for the
given column.
*/
-struct variable *
+const struct variable *
design_matrix_col_to_var (const struct design_matrix *dm, size_t col)
{
size_t i;
{
v = dm->vars[i];
if (v.first_column <= col && col <= v.last_column)
- return (struct variable *) v.v;
+ return v.v;
}
return NULL;
}
gsl_matrix_set (dm->m, row, col, entry);
}
}
+
void
design_matrix_set_numeric (struct design_matrix *dm, size_t row,
const struct variable *var, const union value *val)
#include <gsl/gsl_matrix.h>
#include <stdbool.h>
#include <data/category.h>
-#include <data/cat-routines.h>
+
struct design_matrix_var
{
size_t first_column; /* First column for this variable in
size_t last_column;
const struct variable *v;
};
+
struct design_matrix
{
gsl_matrix *m;
*/
size_t n_vars;
};
-union value *cat_vector_to_value (const gsl_vector *, struct variable *);
+
struct design_matrix *design_matrix_create (int, const struct variable *[],
const size_t);
const union value *);
void design_matrix_set_numeric (struct design_matrix *, size_t,
- const struct variable *, const union value *);
+ const struct variable *,
+ const union value *);
size_t design_matrix_var_to_column (const struct design_matrix *,
const struct variable *);
-struct variable *design_matrix_col_to_var (const struct design_matrix *,
+const struct variable *design_matrix_col_to_var (const struct design_matrix *,
size_t);
#endif
The return value is the number of distinct variables found.
*/
int
-pspp_linreg_get_vars (const void *c_, struct variable **v)
+pspp_linreg_get_vars (const void *c_, const struct variable **v)
{
const pspp_linreg_cache *c = c_;
struct pspp_coeff *coef = NULL;
/*
Start at c->coeff[1] to avoid the intercept.
*/
- v[result] = (struct variable *) pspp_coeff_get_var (c->coeff[1], 0);
+ v[result] = pspp_coeff_get_var (c->coeff[1], 0);
result = (v[result] == NULL) ? 0 : 1;
for (coef = c->coeff[2]; coef < c->coeff[c->n_coeffs]; coef++)
}
if (i < 0 && result < c->n_coeffs)
{
- v[result] = (struct variable *) tmp;
+ v[result] = tmp;
result++;
}
}
/*
Returns pointers to the variables used in the model.
*/
- int (*get_vars) (const void *, struct variable **);
+ int (*get_vars) (const void *, const struct variable **);
struct variable *resid;
struct variable *pred;
/*
All variables used in the model.
*/
-int pspp_linreg_get_vars (const void *, struct variable **);
+int pspp_linreg_get_vars (const void *, const struct variable **);
#endif