From 65e4b2dda3939a9c35913a79b7e79c3120713a6c Mon Sep 17 00:00:00 2001
From: Jason Stover <jhs@math.gcsu.edu>
Date: Sun, 1 Jan 2006 00:03:51 +0000
Subject: [PATCH] Categorical dependent variables for EXPORTed models

---
 src/regression.q        | 52 ++++++++++++++++++++++++++++++-----------
 src/regression_export.h | 23 +++++++++++++-----
 2 files changed, 56 insertions(+), 19 deletions(-)

diff --git a/src/regression.q b/src/regression.q
index 74befc2e..1fe9cdec 100644
--- a/src/regression.q
+++ b/src/regression.q
@@ -38,6 +38,7 @@
 #include "missing-values.h"
 #include "regression_export.h"
 #include "tab.h"
+#include "value-labels.h"
 #include "var.h"
 #include "vfm.h"
 
@@ -472,42 +473,67 @@ subcommand_statistics (int *keywords, pspp_linreg_cache * c)
   statistics_keyword_output (reg_stats_tol, keywords[tol], c);
   statistics_keyword_output (reg_stats_selection, keywords[selection], c);
 }
+static
+int reg_inserted (struct variable *v, struct variable **varlist, int n_vars)
+{
+  int i;
 
+  for (i = 0; i < n_vars; i++)
+    {
+      if (v->index == varlist[i]->index)
+	{
+	  return 1;
+	}
+    }
+  return 0;
+}
 static void
 reg_print_categorical_encoding (FILE *fp, pspp_linreg_cache *c)
 {
   int i;
   size_t j;
+  int n_vars = 0;
+  struct variable **varlist;
   struct pspp_linreg_coeff coeff;
   union value *val;
   
   fprintf (fp, "%s", reg_export_categorical_encode_1);
 
+  varlist = xnmalloc (c->n_indeps, sizeof (*varlist));
   for (i = 1; i < c->n_indeps; i++)   /* c->coeff[0] is the intercept. */
     {
       coeff = c->coeff[i];
       if (coeff.v->type == ALPHA)
 	{
-	  fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t", coeff.v->name);
+	  if (!reg_inserted (coeff.v, varlist, n_vars))
+	  {
+	    fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t", coeff.v->name);
+	    varlist[n_vars] = coeff.v;
+	    n_vars++;
+	  }
 	}
     }
-  for (i = 1; i < c->n_indeps; i++)
+  fprintf (fp, "int n_vars = %d;\n\t", n_vars);
+  fprintf (fp, "struct pspp_reg_categorical_variable *varlist[%d] = {", n_vars);
+  for (i = 0; i < n_vars - 1; i++)
+    {
+      fprintf (fp, "&%s,\n\t\t", varlist[i]->name);
+    }
+  fprintf (fp, "&%s};\n\t", varlist[i]->name);
+
+  for (i = 0; i < n_vars; i++)
     {
       coeff = c->coeff[i];
-      if (coeff.v->type == ALPHA)
+      fprintf (fp, "%s.name = \"%s\";\n\t", varlist[i]->name, varlist[i]->name);
+      fprintf (fp, "%s.n_vals = %d;\n\t", varlist[i]->name, varlist[i]->obs_vals->n_categories);
+
+      for (j = 0; j < varlist[i]->obs_vals->n_categories; j++)
 	{
-	  fprintf (fp, "%s.name = \"%s\";\n\t", coeff.v->name, coeff.v->name);
-	  fprintf (fp, "%s.n_vals = %d;\n\t", coeff.v->name, coeff.v->obs_vals->n_categories);
-	  fprintf (fp, "%s.values = {", coeff.v->name);
-	  for (j = 0; j < coeff.v->obs_vals->n_categories - 1; j++)
-	    {
-	      val = cat_subscript_to_value ( (const size_t) j, coeff.v);
-	      fprintf (fp, "\"%s\",\n\t\t", val->s);
-	    }
-	  val = cat_subscript_to_value ( (const size_t) j, coeff.v);
-	  fprintf (fp, "\"%s\"};\n\n\t", val->s);
+	  val = cat_subscript_to_value ( (const size_t) j, varlist[i]);
+	  fprintf (fp, "%s.values[%d] = \"%s\";\n\t", varlist[i]->name, j, value_to_string (val, varlist[i]));
 	}
     }
+  fprintf (fp, "%s", reg_export_categorical_encode_2);
 }
 
 static void
diff --git a/src/regression_export.h b/src/regression_export.h
index 8798027a..a9583d7b 100644
--- a/src/regression_export.h
+++ b/src/regression_export.h
@@ -127,10 +127,21 @@ const char reg_export_prediction_interval_3[] = " + pspp_reg_variance (var_vals,
   a vector which does not match its categorical value in the model.
  */
 const char reg_export_categorical_encode_1[] = "struct pspp_reg_categorical_variable\n"
-"{\n\tchar * name;\n\tsize_t n_vals;\n\tchar *[] values;\n};\n\n"
-"static\ndouble * get_value_vector (char *v)\n{\n\tdouble *result;\n\t";
-
-const char reg_export_categorical_encode_2[] = "; i++)\n\t{\n\t\tif (strcmp (v, values[i]) == 0)"
-"\n\t\t{\n\t\t\tresult[i] = 1.0;\n\t\t}\n\t}\n\treturn result;\n}\n";
-
+"{\n\tchar * name;\n\tsize_t n_vals;\n\tchar *values[1024];\n};\n\n"
+"/*\n   This function returns the binary vector which corresponds to the value\n"
+"   of the categorical variable stored in 'value'. The name of the variable is\n"
+"   stored in the 'var' argument. Notice the values stored in the\n"
+"   pspp_categorical_variable structures all end with a space character.\n"
+"   That means the values of the categorical variables you pass to any function\n"
+"   in this program should also end with a space character.\n*/\n"
+"static\ndouble * pspp_reg_get_value_vector (char *var, char *value)\n{\n\tdouble *result;\n\t"
+"int i;\n\t";
+
+const char reg_export_categorical_encode_2[] = "int v_index = 0;\n\t"
+"while (v_index < n_vars && strcmp (var, varlist[i]->name) != 0)\n\t{\n\t\t"
+"v_index++;\n\t}\n\tresult = (double *) malloc (varlist[v_index]->n_vals * sizeof (*result));\n\t"
+"for (i = 0; i < varlist[v_index]->n_vals; i++)\n\t{\n\t\t"
+"if (strcmp ( (varlist[v_index]->values)[i], value) == 0)\n\t\t{\n\t\t\t"
+"result[i] = 1.0;\n\t\t}\n\t\telse result[i] = 0.0;\n\t}\n\n\t"
+"return result;\n}\n\n";
 #endif
-- 
2.30.2