From 00fd47eccf62f5657bc32655bdddcf3758d882e2 Mon Sep 17 00:00:00 2001
From: John Darrington <john@darrington.wattle.id.au>
Date: Tue, 20 Nov 2012 10:32:14 +0100
Subject: [PATCH] Logistic Regression: Optimise the classification
 calculations.

Instead of calculating the logit of the prediction for each case,
pre-calculate the inverse logit of the threshold before the process
starts.  This should give a small performance gain..
---
 src/language/stats/logistic.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/language/stats/logistic.c b/src/language/stats/logistic.c
index cdd31c268d..4645ee14d2 100644
--- a/src/language/stats/logistic.c
+++ b/src/language/stats/logistic.c
@@ -135,7 +135,8 @@ struct lr_spec
   /* What results should be presented */
   unsigned int print;
 
-  double cut_point;
+  /* Inverse logit of the cut point */
+  double ilogit_cut_point;
 };
 
 
@@ -372,11 +373,9 @@ xt_times_y_pi (const struct lr_spec *cmd,
 	  pred_y += gsl_vector_get (res->beta_hat, v0) * in0;
 	}
 
-      pred_y = 1 / (1.0 + exp(-pred_y));
-      assert (pred_y >= 0);
-      assert (pred_y <= 1);
-
-      if (pred_y <= cmd->cut_point)
+      /* Count the number of cases which would be correctly/incorrectly classified by this
+	 estimated model */
+      if (pred_y <= cmd->ilogit_cut_point)
 	{
 	  if (y == 0)
 	    res->tn += weight;
@@ -735,6 +734,7 @@ cmd_logistic (struct lexer *lexer, struct dataset *ds)
      These may or may not include the categorical predictors */
   const struct variable **pred_vars;
   size_t n_pred_vars;
+  double cp = 0.5;
 
   int v, x;
   struct lr_spec lr;
@@ -747,7 +747,6 @@ cmd_logistic (struct lexer *lexer, struct dataset *ds)
   lr.lcon = 0.0000;
   lr.bcon = 0.001;
   lr.min_epsilon = 0.00000001;
-  lr.cut_point = 0.5;
   lr.constant = true;
   lr.confidence = 95;
   lr.print = PRINT_DEFAULT;
@@ -967,8 +966,9 @@ cmd_logistic (struct lexer *lexer, struct dataset *ds)
 			  lex_error (lexer, NULL);
 			  goto error;
 			}
-		      lr.cut_point = lex_number (lexer);
-		      if (lr.cut_point < 0 || lr.cut_point > 1.0)
+		      cp = lex_number (lexer);
+		      
+		      if (cp < 0 || cp > 1.0)
 			{
 			  msg (ME, _("Cut point value must be in the range [0,1]"));
 			  goto error;
@@ -995,11 +995,13 @@ cmd_logistic (struct lexer *lexer, struct dataset *ds)
 	}
     }
 
+  lr.ilogit_cut_point = - log (1/cp - 1);
+  
+
   /* Copy the predictor variables from the temporary location into the 
      final one, dropping any categorical variables which appear there.
      FIXME: This is O(NxM).
   */
-
   {
   struct variable_node *vn, *next;
   struct hmap allvars;
-- 
2.30.2