statistics_keyword_output (reg_stats_tol, keywords[tol], c);
statistics_keyword_output (reg_stats_selection, keywords[selection], c);
}
+
+static void
+reg_print_categorical_encoding (FILE *fp, pspp_linreg_cache *c)
+{
+ int i;
+ size_t j;
+ struct pspp_linreg_coeff coeff;
+ union value *val;
+
+ fprintf (fp, "%s", reg_export_categorical_encode_1);
+
+ for (i = 1; i < c->n_indeps; i++) /* c->coeff[0] is the intercept. */
+ {
+ coeff = c->coeff[i];
+ if (coeff.v->type == ALPHA)
+ {
+ fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t", coeff.v->name);
+ }
+ }
+ for (i = 1; i < c->n_indeps; i++)
+ {
+ coeff = c->coeff[i];
+ if (coeff.v->type == ALPHA)
+ {
+ fprintf (fp, "%s.name = \"%s\";\n\t", coeff.v->name, coeff.v->name);
+ fprintf (fp, "%s.n_vals = %d;\n\t", coeff.v->name, coeff.v->obs_vals->n_categories);
+ fprintf (fp, "%s.values = {", coeff.v->name);
+ for (j = 0; j < coeff.v->obs_vals->n_categories - 1; j++)
+ {
+ val = cat_subscript_to_value ( (const size_t) j, coeff.v);
+ fprintf (fp, "\"%s\",\n\t\t", val->s);
+ }
+ val = cat_subscript_to_value ( (const size_t) j, coeff.v);
+ fprintf (fp, "\"%s\"};\n\n\t", val->s);
+ }
+ }
+}
+
static void
reg_print_depvars (FILE *fp, pspp_linreg_cache *c)
{
fprintf (fp, "%s", reg_preamble);
fprintf (fp, "#include <string.h>\n#include <math.h>\n\n");
reg_print_getvar (fp, c);
+ reg_print_categorical_encoding (fp, c);
fprintf (fp, "%s", reg_export_t_quantiles_1);
increment = 0.5 / (double) increment;
for (i = 0; i < n_quantiles - 1; i++)
"\n\tresult *= pspp_reg_t_quantile ((1.0 + p) / 2.0);\n\t"
"result += pspp_reg_estimate (var_vals, var_names);\n\treturn result;\n}\n";
+/*
+ Change categorical values to binary vectors. The routine will use
+ an encoding in which a categorical variable with n values is mapped
+ to a vector with n-1 entries. Value 0 is mapped to the zero vector,
+ value 1 is mapped to a vector whose first entry is 1 and all others are
+ 0, etc. For example, if a variable can have 'a', 'b' or 'c' as values,
+ then the value 'a' will be encoded as (0,0), 'b' as (1,0) and 'c' as
+ (0,1). If the design matrix used to create the model used a different
+ encoding, then the function pspp_reg_categorical_encode () will return
+ a vector which does not match its categorical value in the model.
+ */
+const char reg_export_categorical_encode_1[] = "struct pspp_reg_categorical_variable\n"
+"{\n\tchar * name;\n\tsize_t n_vals;\n\tchar *[] values;\n};\n\n"
+"static\ndouble * get_value_vector (char *v)\n{\n\tdouble *result;\n\t";
+
+const char reg_export_categorical_encode_2[] = "; i++)\n\t{\n\t\tif (strcmp (v, values[i]) == 0)"
+"\n\t\t{\n\t\t\tresult[i] = 1.0;\n\t\t}\n\t}\n\treturn result;\n}\n";
+
#endif