From 65e4b2dda3939a9c35913a79b7e79c3120713a6c Mon Sep 17 00:00:00 2001 From: Jason Stover Date: Sun, 1 Jan 2006 00:03:51 +0000 Subject: [PATCH] Categorical dependent variables for EXPORTed models --- src/regression.q | 52 ++++++++++++++++++++++++++++++----------- src/regression_export.h | 23 +++++++++++++----- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/regression.q b/src/regression.q index 74befc2e..1fe9cdec 100644 --- a/src/regression.q +++ b/src/regression.q @@ -38,6 +38,7 @@ #include "missing-values.h" #include "regression_export.h" #include "tab.h" +#include "value-labels.h" #include "var.h" #include "vfm.h" @@ -472,42 +473,67 @@ subcommand_statistics (int *keywords, pspp_linreg_cache * c) statistics_keyword_output (reg_stats_tol, keywords[tol], c); statistics_keyword_output (reg_stats_selection, keywords[selection], c); } +static +int reg_inserted (struct variable *v, struct variable **varlist, int n_vars) +{ + int i; + for (i = 0; i < n_vars; i++) + { + if (v->index == varlist[i]->index) + { + return 1; + } + } + return 0; +} static void reg_print_categorical_encoding (FILE *fp, pspp_linreg_cache *c) { int i; size_t j; + int n_vars = 0; + struct variable **varlist; struct pspp_linreg_coeff coeff; union value *val; fprintf (fp, "%s", reg_export_categorical_encode_1); + varlist = xnmalloc (c->n_indeps, sizeof (*varlist)); for (i = 1; i < c->n_indeps; i++) /* c->coeff[0] is the intercept. */ { coeff = c->coeff[i]; if (coeff.v->type == ALPHA) { - fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t", coeff.v->name); + if (!reg_inserted (coeff.v, varlist, n_vars)) + { + fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t", coeff.v->name); + varlist[n_vars] = coeff.v; + n_vars++; + } } } - for (i = 1; i < c->n_indeps; i++) + fprintf (fp, "int n_vars = %d;\n\t", n_vars); + fprintf (fp, "struct pspp_reg_categorical_variable *varlist[%d] = {", n_vars); + for (i = 0; i < n_vars - 1; i++) + { + fprintf (fp, "&%s,\n\t\t", varlist[i]->name); + } + fprintf (fp, "&%s};\n\t", varlist[i]->name); + + for (i = 0; i < n_vars; i++) { coeff = c->coeff[i]; - if (coeff.v->type == ALPHA) + fprintf (fp, "%s.name = \"%s\";\n\t", varlist[i]->name, varlist[i]->name); + fprintf (fp, "%s.n_vals = %d;\n\t", varlist[i]->name, varlist[i]->obs_vals->n_categories); + + for (j = 0; j < varlist[i]->obs_vals->n_categories; j++) { - fprintf (fp, "%s.name = \"%s\";\n\t", coeff.v->name, coeff.v->name); - fprintf (fp, "%s.n_vals = %d;\n\t", coeff.v->name, coeff.v->obs_vals->n_categories); - fprintf (fp, "%s.values = {", coeff.v->name); - for (j = 0; j < coeff.v->obs_vals->n_categories - 1; j++) - { - val = cat_subscript_to_value ( (const size_t) j, coeff.v); - fprintf (fp, "\"%s\",\n\t\t", val->s); - } - val = cat_subscript_to_value ( (const size_t) j, coeff.v); - fprintf (fp, "\"%s\"};\n\n\t", val->s); + val = cat_subscript_to_value ( (const size_t) j, varlist[i]); + fprintf (fp, "%s.values[%d] = \"%s\";\n\t", varlist[i]->name, j, value_to_string (val, varlist[i])); } } + fprintf (fp, "%s", reg_export_categorical_encode_2); } static void diff --git a/src/regression_export.h b/src/regression_export.h index 8798027a..a9583d7b 100644 --- a/src/regression_export.h +++ b/src/regression_export.h @@ -127,10 +127,21 @@ const char reg_export_prediction_interval_3[] = " + pspp_reg_variance (var_vals, a vector which does not match its categorical value in the model. */ const char reg_export_categorical_encode_1[] = "struct pspp_reg_categorical_variable\n" -"{\n\tchar * name;\n\tsize_t n_vals;\n\tchar *[] values;\n};\n\n" -"static\ndouble * get_value_vector (char *v)\n{\n\tdouble *result;\n\t"; - -const char reg_export_categorical_encode_2[] = "; i++)\n\t{\n\t\tif (strcmp (v, values[i]) == 0)" -"\n\t\t{\n\t\t\tresult[i] = 1.0;\n\t\t}\n\t}\n\treturn result;\n}\n"; - +"{\n\tchar * name;\n\tsize_t n_vals;\n\tchar *values[1024];\n};\n\n" +"/*\n This function returns the binary vector which corresponds to the value\n" +" of the categorical variable stored in 'value'. The name of the variable is\n" +" stored in the 'var' argument. Notice the values stored in the\n" +" pspp_categorical_variable structures all end with a space character.\n" +" That means the values of the categorical variables you pass to any function\n" +" in this program should also end with a space character.\n*/\n" +"static\ndouble * pspp_reg_get_value_vector (char *var, char *value)\n{\n\tdouble *result;\n\t" +"int i;\n\t"; + +const char reg_export_categorical_encode_2[] = "int v_index = 0;\n\t" +"while (v_index < n_vars && strcmp (var, varlist[i]->name) != 0)\n\t{\n\t\t" +"v_index++;\n\t}\n\tresult = (double *) malloc (varlist[v_index]->n_vals * sizeof (*result));\n\t" +"for (i = 0; i < varlist[v_index]->n_vals; i++)\n\t{\n\t\t" +"if (strcmp ( (varlist[v_index]->values)[i], value) == 0)\n\t\t{\n\t\t\t" +"result[i] = 1.0;\n\t\t}\n\t\telse result[i] = 0.0;\n\t}\n\n\t" +"return result;\n}\n\n"; #endif -- 2.30.2