pintos-os.org Git - pspp/blob - src/language/stats/logistic.c

   1 /* pspp - a program for statistical analysis.
   2    Copyright (C) 2012 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17
  18 /*
  19    References:
  20    1. "Coding Logistic Regression with Newton-Raphson", James McCaffrey
  21    http://msdn.microsoft.com/en-us/magazine/jj618304.aspx
  22
  23    2. "SPSS Statistical Algorithms" Chapter LOGISTIC REGRESSION Algorithms
  24
  25
  26    The Newton Raphson method finds successive approximations to $\bf b$ where
  27    approximation ${\bf b}_t$ is (hopefully) better than the previous ${\bf b}_{t-1}$.
  28
  29    $ {\bf b}_t = {\bf b}_{t -1} + ({\bf X}^T{\bf W}_{t-1}{\bf X})^{-1}{\bf X}^T({\bf y} - {\bf \pi}_{t-1})$
  30    where:
  31
  32    $\bf X$ is the $n \times p$ design matrix, $n$ being the number of cases,
  33    $p$ the number of parameters, \par
  34    $\bf W$ is the diagonal matrix whose diagonal elements are
  35    $\hat{\pi}_0(1 - \hat{\pi}_0), \, \hat{\pi}_1(1 - \hat{\pi}_2)\dots \hat{\pi}_{n-1}(1 - \hat{\pi}_{n-1})$
  36    \par
  37
  38 */
  39
  40 #include <config.h>
  41
  42 #include <gsl/gsl_blas.h>
  43
  44 #include <gsl/gsl_linalg.h>
  45 #include <gsl/gsl_cdf.h>
  46 #include <gsl/gsl_matrix.h>
  47 #include <gsl/gsl_vector.h>
  48 #include <math.h>
  49
  50 #include "data/case.h"
  51 #include "data/casegrouper.h"
  52 #include "data/casereader.h"
  53 #include "data/dataset.h"
  54 #include "data/dictionary.h"
  55 #include "data/format.h"
  56 #include "data/value.h"
  57 #include "language/command.h"
  58 #include "language/dictionary/split-file.h"
  59 #include "language/lexer/lexer.h"
  60 #include "language/lexer/value-parser.h"
  61 #include "language/lexer/variable-parser.h"
  62 #include "libpspp/assertion.h"
  63 #include "libpspp/ll.h"
  64 #include "libpspp/message.h"
  65 #include "libpspp/misc.h"
  66 #include "math/categoricals.h"
  67 #include "output/tab.h"
  68
  69 #include "gettext.h"
  70 #define _(msgid) gettext (msgid)
  71
  72
  73
  74
  75 #define   PRINT_EACH_STEP  0x01
  76 #define   PRINT_SUMMARY    0x02
  77 #define   PRINT_CORR       0x04
  78 #define   PRINT_ITER       0x08
  79 #define   PRINT_GOODFIT    0x10
  80 #define   PRINT_CI         0x20
  81
  82
  83 #define PRINT_DEFAULT (PRINT_SUMMARY | PRINT_EACH_STEP)
  84
  85 /*
  86   The constant parameters of the procedure.
  87   That is, those which are set by the user.
  88 */
  89 struct lr_spec
  90 {
  91   /* The dependent variable */
  92   const struct variable *dep_var;
  93
  94   size_t n_predictor_vars;
  95   const struct variable **predictor_vars;
  96
  97   /* Which classes of missing vars are to be excluded */
  98   enum mv_class exclude;
  99
 100   /* The weight variable */
 101   const struct variable *wv;
 102
 103   const struct dictionary *dict;
 104
 105   /* True iff the constant (intercept) is to be included in the model */
 106   bool constant;
 107
 108   /* Ths maximum number of iterations */
 109   int max_iter;
 110
 111   /* Other iteration limiting conditions */
 112   double bcon;
 113   double min_epsilon;
 114   double lcon;
 115
 116   /* The confidence interval (in percent) */
 117   int confidence;
 118
 119   /* What results should be presented */
 120   unsigned int print;
 121
 122   double cut_point;
 123 };
 124
 125 /* The results and intermediate result of the procedure.
 126    These are mutated as the procedure runs. Used for
 127    temporary variables etc.
 128 */
 129 struct lr_result
 130 {
 131   bool warn_bad_weight;
 132
 133
 134   /* The two values of the dependent variable. */
 135   union value y0;
 136   union value y1;
 137
 138
 139   /* The sum of caseweights */
 140   double cc;
 141
 142   casenumber n_missing;
 143   casenumber n_nonmissing;
 144 };
 145
 146
 147 /*
 148   Convert INPUT into a dichotomous scalar.  For simple cases, this is a 1:1 mapping
 149   The return value is always either 0 or 1
 150 */
 151 static double
 152 map_dependent_var (const struct lr_spec *cmd, const struct lr_result *res, const union value *input)
 153 {
 154   int width = var_get_width (cmd->dep_var);
 155   if (value_equal (input, &res->y0, width))
 156     return 0;
 157
 158   if (value_equal (input, &res->y1, width))
 159     return 1;
 160
 161   NOT_REACHED ();
 162
 163   return SYSMIS;
 164 }
 165
 166
 167
 168 static void output_depvarmap (const struct lr_spec *cmd, const struct lr_result *);
 169
 170 static void output_variables (const struct lr_spec *cmd,
 171                               const gsl_vector *,
 172                               const gsl_vector *);
 173
 174 static void output_model_summary (const struct lr_result *,
 175                                   double initial_likelihood, double likelihood);
 176
 177 static void case_processing_summary (const struct lr_result *);
 178
 179
 180 /*
 181   Return the probability estimator (that is the estimator of logit(y) )
 182   corresponding to the coefficient estimator beta_hat for case C
 183 */
 184 static double
 185 pi_hat (const struct lr_spec *cmd,
 186         const gsl_vector *beta_hat,
 187         const struct variable **x, size_t n_x,
 188         const struct ccase *c)
 189 {
 190   int v0;
 191   double pi = 0;
 192   for (v0 = 0; v0 < n_x; ++v0)
 193     {
 194       pi += gsl_vector_get (beta_hat, v0) *
 195         case_data (c, x[v0])->f;
 196     }
 197
 198   if (cmd->constant)
 199     pi += gsl_vector_get (beta_hat, beta_hat->size - 1);
 200
 201   pi = 1.0 / (1.0 + exp(-pi));
 202
 203   return pi;
 204 }
 205
 206
 207 /*
 208   Calculates the Hessian matrix X' V  X,
 209   where: X is the n by N_X matrix comprising the n cases in INPUT
 210   V is a diagonal matrix { (pi_hat_0)(1 - pi_hat_0), (pi_hat_1)(1 - pi_hat_1), ... (pi_hat_{N-1})(1 - pi_hat_{N-1})}
 211   (the partial derivative of the predicted values)
 212
 213   If ALL predicted values derivatives are close to zero or one, then CONVERGED
 214   will be set to true.
 215 */
 216 static gsl_matrix *
 217 hessian (const struct lr_spec *cmd,
 218          struct lr_result *res,
 219          struct casereader *input,
 220          const struct variable **x, size_t n_x,
 221          const gsl_vector *beta_hat,
 222          bool *converged
 223          )
 224 {
 225   struct casereader *reader;
 226   struct ccase *c;
 227   gsl_matrix *output = gsl_matrix_calloc (beta_hat->size, beta_hat->size);
 228
 229   double max_w = -DBL_MAX;
 230
 231   for (reader = casereader_clone (input);
 232        (c = casereader_read (reader)) != NULL; case_unref (c))
 233     {
 234       int v0, v1;
 235       double pi = pi_hat (cmd, beta_hat, x, n_x, c);
 236
 237       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 238       double w = pi * (1 - pi);
 239       if (w > max_w)
 240         max_w = w;
 241       w *= weight;
 242
 243       for (v0 = 0; v0 < beta_hat->size; ++v0)
 244         {
 245           double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0;
 246           for (v1 = 0; v1 < beta_hat->size; ++v1)
 247             {
 248               double in1 = v1 < n_x ? case_data (c, x[v1])->f : 1.0 ;
 249               double *o = gsl_matrix_ptr (output, v0, v1);
 250               *o += in0 * w * in1;
 251             }
 252         }
 253     }
 254   casereader_destroy (reader);
 255
 256
 257   if ( max_w < cmd->min_epsilon)
 258     {
 259       *converged = true;
 260       msg (MN, _("All predicted values are either 1 or 0"));
 261     }
 262
 263   return output;
 264 }
 265
 266
 267 /* Calculates the value  X' (y - pi)
 268    where X is the design model,
 269    y is the vector of observed independent variables
 270    pi is the vector of estimates for y
 271
 272    As a side effect, the likelihood is stored in LIKELIHOOD
 273 */
 274 static gsl_vector *
 275 xt_times_y_pi (const struct lr_spec *cmd,
 276                struct lr_result *res,
 277                struct casereader *input,
 278                const struct variable **x, size_t n_x,
 279                const struct variable *y_var,
 280                const gsl_vector *beta_hat,
 281                double *likelihood)
 282 {
 283   struct casereader *reader;
 284   struct ccase *c;
 285   gsl_vector *output = gsl_vector_calloc (beta_hat->size);
 286
 287   *likelihood = 1.0;
 288   for (reader = casereader_clone (input);
 289        (c = casereader_read (reader)) != NULL; case_unref (c))
 290     {
 291       int v0;
 292       double pi = pi_hat (cmd, beta_hat, x, n_x, c);
 293       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 294
 295
 296       double y = map_dependent_var (cmd, res, case_data (c, y_var));
 297
 298       *likelihood *= pow (pi, weight * y) * pow (1 - pi, weight * (1 - y));
 299
 300       for (v0 = 0; v0 < beta_hat->size; ++v0)
 301         {
 302           double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0;
 303           double *o = gsl_vector_ptr (output, v0);
 304           *o += in0 * (y - pi) * weight;
 305         }
 306     }
 307
 308   casereader_destroy (reader);
 309
 310   return output;
 311 }
 312
 313
 314 /*
 315    Makes an initial pass though the data, checks that the dependent variable is
 316    dichotomous, and calculates necessary initial values.
 317
 318    Returns an initial value for \hat\beta the vector of estimators of \beta
 319 */
 320 static gsl_vector *
 321 beta_hat_initial (const struct lr_spec *cmd, struct lr_result *res, struct casereader *input)
 322 {
 323   const int width = var_get_width (cmd->dep_var);
 324
 325   struct ccase *c;
 326   struct casereader *reader;
 327   gsl_vector *b0 ;
 328   double sum;
 329   double sumA = 0.0;
 330   double sumB = 0.0;
 331
 332   bool v0set = false;
 333   bool v1set = false;
 334
 335   size_t n_coefficients = cmd->n_predictor_vars;
 336   if (cmd->constant)
 337     n_coefficients++;
 338
 339   b0 = gsl_vector_calloc (n_coefficients);
 340
 341   res->cc = 0;
 342   for (reader = casereader_clone (input);
 343        (c = casereader_read (reader)) != NULL; case_unref (c))
 344     {
 345       int v;
 346       bool missing = false;
 347       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 348       const union value *depval = case_data (c, cmd->dep_var);
 349
 350       for (v = 0; v < cmd->n_predictor_vars; ++v)
 351         {
 352           const union value *val = case_data (c, cmd->predictor_vars[v]);
 353           if (var_is_value_missing (cmd->predictor_vars[v], val, cmd->exclude))
 354             {
 355               missing = true;
 356               break;
 357             }
 358         }
 359
 360       if (missing)
 361         {
 362           res->n_missing++;
 363           continue;
 364         }
 365
 366       res->n_nonmissing++;
 367
 368       if (!v0set)
 369         {
 370           value_clone (&res->y0, depval, width);
 371           v0set = true;
 372         }
 373       else if (!v1set)
 374         {
 375           if ( !value_equal (&res->y0, depval, width))
 376             {
 377               value_clone (&res->y1, depval, width);
 378               v1set = true;
 379             }
 380         }
 381       else
 382         {
 383           if (! value_equal (&res->y0, depval, width)
 384               &&
 385               ! value_equal (&res->y1, depval, width)
 386               )
 387             {
 388               msg (ME, _("Dependent variable's values are not dichotomous."));
 389               goto error;
 390             }
 391         }
 392
 393       if (value_equal (&res->y0, depval, width))
 394           sumA += weight;
 395
 396       if (value_equal (&res->y1, depval, width))
 397           sumB += weight;
 398
 399
 400       res->cc += weight;
 401     }
 402   casereader_destroy (reader);
 403
 404   sum = sumB;
 405
 406   /* Ensure that Y0 is less than Y1.  Otherwise the mapping gets
 407      inverted, which is confusing to users */
 408   if (var_is_numeric (cmd->dep_var) && value_compare_3way (&res->y0, &res->y1, width) > 0)
 409     {
 410       union value tmp;
 411       value_clone (&tmp, &res->y0, width);
 412       value_copy (&res->y0, &res->y1, width);
 413       value_copy (&res->y1, &tmp, width);
 414       value_destroy (&tmp, width);
 415       sum = sumA;
 416     }
 417
 418   if ( cmd->constant)
 419     {
 420       double mean = sum / res->cc;
 421       gsl_vector_set (b0, b0->size - 1, log (mean / (1 - mean)));
 422     }
 423
 424   return b0;
 425
 426  error:
 427   casereader_destroy (reader);
 428   return NULL;
 429 }
 430
 431
 432
 433 static bool
 434 run_lr (const struct lr_spec *cmd, struct casereader *input,
 435         const struct dataset *ds UNUSED)
 436 {
 437   int i,j;
 438
 439   gsl_vector *beta_hat;
 440   gsl_vector *se ;
 441
 442   bool converged = false;
 443   double likelihood = -1;
 444   double prev_likelihood = -1;
 445   double initial_likelihood = -1;
 446
 447   struct lr_result work;
 448   work.n_missing = 0;
 449   work.n_nonmissing = 0;
 450   work.warn_bad_weight = true;
 451
 452
 453   /* Get the initial estimates of \beta and their standard errors */
 454   beta_hat = beta_hat_initial (cmd, &work, input);
 455   if (NULL == beta_hat)
 456     return false;
 457
 458   output_depvarmap (cmd, &work);
 459
 460   se = gsl_vector_alloc (beta_hat->size);
 461
 462   case_processing_summary (&work);
 463
 464
 465   input = casereader_create_filter_missing (input,
 466                                             cmd->predictor_vars,
 467                                             cmd->n_predictor_vars,
 468                                             cmd->exclude,
 469                                             NULL,
 470                                             NULL);
 471
 472
 473   /* Start the Newton Raphson iteration process... */
 474   for( i = 0 ; i < cmd->max_iter ; ++i)
 475     {
 476       double min, max;
 477       gsl_matrix *m ;
 478       gsl_vector *v ;
 479
 480       m = hessian (cmd, &work, input,
 481                    cmd->predictor_vars, cmd->n_predictor_vars,
 482                    beta_hat,
 483                    &converged);
 484
 485       gsl_linalg_cholesky_decomp (m);
 486       gsl_linalg_cholesky_invert (m);
 487
 488       v = xt_times_y_pi (cmd, &work, input,
 489                          cmd->predictor_vars, cmd->n_predictor_vars,
 490                          cmd->dep_var,
 491                          beta_hat,
 492                          &likelihood);
 493
 494       {
 495         /* delta = M.v */
 496         gsl_vector *delta = gsl_vector_alloc (v->size);
 497         gsl_blas_dgemv (CblasNoTrans, 1.0, m, v, 0, delta);
 498         gsl_vector_free (v);
 499
 500         for (j = 0; j < se->size; ++j)
 501           {
 502             double *ptr = gsl_vector_ptr (se, j);
 503             *ptr = gsl_matrix_get (m, j, j);
 504           }
 505
 506         gsl_matrix_free (m);
 507
 508         gsl_vector_add (beta_hat, delta);
 509
 510         gsl_vector_minmax (delta, &min, &max);
 511
 512         if ( fabs (min) < cmd->bcon && fabs (max) < cmd->bcon)
 513           {
 514             msg (MN, _("Estimation terminated at iteration number %d because parameter estimates changed by less than %g"),
 515                  i + 1, cmd->bcon);
 516             converged = true;
 517           }
 518
 519         gsl_vector_free (delta);
 520       }
 521
 522       if ( prev_likelihood >= 0)
 523         {
 524           if (-log (likelihood) > -(1.0 - cmd->lcon) * log (prev_likelihood))
 525             {
 526               msg (MN, _("Estimation terminated at iteration number %d because Log Likelihood decreased by less than %g%%"), i + 1, 100 * cmd->lcon);
 527               converged = true;
 528             }
 529         }
 530       if (i == 0)
 531         initial_likelihood = likelihood;
 532       prev_likelihood = likelihood;
 533
 534       if (converged)
 535         break;
 536     }
 537   casereader_destroy (input);
 538   assert (initial_likelihood >= 0);
 539
 540   for (i = 0; i < se->size; ++i)
 541     {
 542       double *ptr = gsl_vector_ptr (se, i);
 543       *ptr = sqrt (*ptr);
 544     }
 545
 546   output_model_summary (&work, initial_likelihood, likelihood);
 547   output_variables (cmd, beta_hat, se);
 548
 549   gsl_vector_free (beta_hat);
 550   gsl_vector_free (se);
 551
 552   return true;
 553 }
 554
 555 /* Parse the LOGISTIC REGRESSION command syntax */
 556 int
 557 cmd_logistic (struct lexer *lexer, struct dataset *ds)
 558 {
 559   struct lr_spec lr;
 560   lr.dict = dataset_dict (ds);
 561   lr.n_predictor_vars = 0;
 562   lr.predictor_vars = NULL;
 563   lr.exclude = MV_ANY;
 564   lr.wv = dict_get_weight (lr.dict);
 565   lr.max_iter = 20;
 566   lr.lcon = 0.0000;
 567   lr.bcon = 0.001;
 568   lr.min_epsilon = 0.00000001;
 569   lr.cut_point = 0.5;
 570   lr.constant = true;
 571   lr.confidence = 95;
 572   lr.print = PRINT_DEFAULT;
 573
 574
 575   if (lex_match_id (lexer, "VARIABLES"))
 576     lex_match (lexer, T_EQUALS);
 577
 578   if (! (lr.dep_var = parse_variable_const (lexer, lr.dict)))
 579     goto error;
 580
 581   lex_force_match (lexer, T_WITH);
 582
 583   if (!parse_variables_const (lexer, lr.dict,
 584                               &lr.predictor_vars, &lr.n_predictor_vars,
 585                               PV_NO_DUPLICATE | PV_NUMERIC))
 586     goto error;
 587
 588
 589   while (lex_token (lexer) != T_ENDCMD)
 590     {
 591       lex_match (lexer, T_SLASH);
 592
 593       if (lex_match_id (lexer, "MISSING"))
 594         {
 595           lex_match (lexer, T_EQUALS);
 596           while (lex_token (lexer) != T_ENDCMD
 597                  && lex_token (lexer) != T_SLASH)
 598             {
 599               if (lex_match_id (lexer, "INCLUDE"))
 600                 {
 601                   lr.exclude = MV_SYSTEM;
 602                 }
 603               else if (lex_match_id (lexer, "EXCLUDE"))
 604                 {
 605                   lr.exclude = MV_ANY;
 606                 }
 607               else
 608                 {
 609                   lex_error (lexer, NULL);
 610                   goto error;
 611                 }
 612             }
 613         }
 614       else if (lex_match_id (lexer, "ORIGIN"))
 615         {
 616           lr.constant = false;
 617         }
 618       else if (lex_match_id (lexer, "NOORIGIN"))
 619         {
 620           lr.constant = true;
 621         }
 622       else if (lex_match_id (lexer, "NOCONST"))
 623         {
 624           lr.constant = false;
 625         }
 626       else if (lex_match_id (lexer, "EXTERNAL"))
 627         {
 628           /* This is for compatibility.  It does nothing */
 629         }
 630       else if (lex_match_id (lexer, "PRINT"))
 631         {
 632           lex_match (lexer, T_EQUALS);
 633           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 634             {
 635               if (lex_match_id (lexer, "DEFAULT"))
 636                 {
 637                   lr.print |= PRINT_DEFAULT;
 638                 }
 639               else if (lex_match_id (lexer, "SUMMARY"))
 640                 {
 641                   lr.print |= PRINT_SUMMARY;
 642                 }
 643 #if 0
 644               else if (lex_match_id (lexer, "CORR"))
 645                 {
 646                   lr.print |= PRINT_CORR;
 647                 }
 648               else if (lex_match_id (lexer, "ITER"))
 649                 {
 650                   lr.print |= PRINT_ITER;
 651                 }
 652               else if (lex_match_id (lexer, "GOODFIT"))
 653                 {
 654                   lr.print |= PRINT_GOODFIT;
 655                 }
 656 #endif
 657               else if (lex_match_id (lexer, "CI"))
 658                 {
 659                   lr.print |= PRINT_CI;
 660                   if (lex_force_match (lexer, T_LPAREN))
 661                     {
 662                       if (! lex_force_int (lexer))
 663                         {
 664                           lex_error (lexer, NULL);
 665                           goto error;
 666                         }
 667                       lr.confidence = lex_integer (lexer);
 668                       lex_get (lexer);
 669                       if ( ! lex_force_match (lexer, T_RPAREN))
 670                         {
 671                           lex_error (lexer, NULL);
 672                           goto error;
 673                         }
 674                     }
 675                 }
 676               else if (lex_match_id (lexer, "ALL"))
 677                 {
 678                   lr.print = ~0x0000;
 679                 }
 680               else
 681                 {
 682                   lex_error (lexer, NULL);
 683                   goto error;
 684                 }
 685             }
 686         }
 687       else if (lex_match_id (lexer, "CRITERIA"))
 688         {
 689           lex_match (lexer, T_EQUALS);
 690           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 691             {
 692               if (lex_match_id (lexer, "BCON"))
 693                 {
 694                   if (lex_force_match (lexer, T_LPAREN))
 695                     {
 696                       if (! lex_force_num (lexer))
 697                         {
 698                           lex_error (lexer, NULL);
 699                           goto error;
 700                         }
 701                       lr.bcon = lex_number (lexer);
 702                       lex_get (lexer);
 703                       if ( ! lex_force_match (lexer, T_RPAREN))
 704                         {
 705                           lex_error (lexer, NULL);
 706                           goto error;
 707                         }
 708                     }
 709                 }
 710               else if (lex_match_id (lexer, "ITERATE"))
 711                 {
 712                   if (lex_force_match (lexer, T_LPAREN))
 713                     {
 714                       if (! lex_force_int (lexer))
 715                         {
 716                           lex_error (lexer, NULL);
 717                           goto error;
 718                         }
 719                       lr.max_iter = lex_integer (lexer);
 720                       lex_get (lexer);
 721                       if ( ! lex_force_match (lexer, T_RPAREN))
 722                         {
 723                           lex_error (lexer, NULL);
 724                           goto error;
 725                         }
 726                     }
 727                 }
 728               else if (lex_match_id (lexer, "LCON"))
 729                 {
 730                   if (lex_force_match (lexer, T_LPAREN))
 731                     {
 732                       if (! lex_force_num (lexer))
 733                         {
 734                           lex_error (lexer, NULL);
 735                           goto error;
 736                         }
 737                       lr.lcon = lex_number (lexer);
 738                       lex_get (lexer);
 739                       if ( ! lex_force_match (lexer, T_RPAREN))
 740                         {
 741                           lex_error (lexer, NULL);
 742                           goto error;
 743                         }
 744                     }
 745                 }
 746               else if (lex_match_id (lexer, "EPS"))
 747                 {
 748                   if (lex_force_match (lexer, T_LPAREN))
 749                     {
 750                       if (! lex_force_num (lexer))
 751                         {
 752                           lex_error (lexer, NULL);
 753                           goto error;
 754                         }
 755                       lr.min_epsilon = lex_number (lexer);
 756                       lex_get (lexer);
 757                       if ( ! lex_force_match (lexer, T_RPAREN))
 758                         {
 759                           lex_error (lexer, NULL);
 760                           goto error;
 761                         }
 762                     }
 763                 }
 764               else
 765                 {
 766                   lex_error (lexer, NULL);
 767                   goto error;
 768                 }
 769             }
 770         }
 771       else
 772         {
 773           lex_error (lexer, NULL);
 774           goto error;
 775         }
 776     }
 777
 778
 779
 780   {
 781     struct casegrouper *grouper;
 782     struct casereader *group;
 783     bool ok;
 784
 785     grouper = casegrouper_create_splits (proc_open (ds), lr.dict);
 786     while (casegrouper_get_next_group (grouper, &group))
 787       ok = run_lr (&lr, group, ds);
 788     ok = casegrouper_destroy (grouper);
 789     ok = proc_commit (ds) && ok;
 790   }
 791
 792   free (lr.predictor_vars);
 793   return CMD_SUCCESS;
 794
 795  error:
 796
 797   free (lr.predictor_vars);
 798   return CMD_FAILURE;
 799 }
 800
 801
 802 \f
 803
 804 /* Show the Dependent Variable Encoding box.
 805    This indicates how the dependent variable
 806    is mapped to the internal zero/one values.
 807 */
 808 static void
 809 output_depvarmap (const struct lr_spec *cmd, const struct lr_result *res)
 810 {
 811   const int heading_columns = 0;
 812   const int heading_rows = 1;
 813   struct tab_table *t;
 814   struct string str;
 815
 816   const int nc = 2;
 817   int nr = heading_rows + 2;
 818
 819   t = tab_create (nc, nr);
 820   tab_title (t, _("Dependent Variable Encoding"));
 821
 822   tab_headers (t, heading_columns, 0, heading_rows, 0);
 823
 824   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 825
 826   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 827   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 828
 829   tab_text (t,  0, 0, TAB_CENTER | TAT_TITLE, _("Original Value"));
 830   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("Internal Value"));
 831
 832
 833
 834   ds_init_empty (&str);
 835   var_append_value_name (cmd->dep_var, &res->y0, &str);
 836   tab_text (t,  0, 0 + heading_rows,  0, ds_cstr (&str));
 837
 838   ds_clear (&str);
 839   var_append_value_name (cmd->dep_var, &res->y1, &str);
 840   tab_text (t,  0, 1 + heading_rows,  0, ds_cstr (&str));
 841
 842
 843   tab_double (t, 1, 0 + heading_rows, 0, map_dependent_var (cmd, res, &res->y0), &F_8_0);
 844   tab_double (t, 1, 1 + heading_rows, 0, map_dependent_var (cmd, res, &res->y1), &F_8_0);
 845   ds_destroy (&str);
 846
 847   tab_submit (t);
 848 }
 849
 850
 851 /* Show the Variables in the Equation box */
 852 static void
 853 output_variables (const struct lr_spec *cmd,
 854                   const gsl_vector *beta,
 855                   const gsl_vector *se)
 856 {
 857   int row = 0;
 858   const int heading_columns = 1;
 859   int heading_rows = 1;
 860   struct tab_table *t;
 861
 862   int idx;
 863   int n_rows = cmd->n_predictor_vars;
 864
 865   int nc = 8;
 866   int nr ;
 867   if (cmd->print & PRINT_CI)
 868     {
 869       nc += 2;
 870       heading_rows += 1;
 871       row++;
 872     }
 873   nr = heading_rows + cmd->n_predictor_vars;
 874   if (cmd->constant)
 875     nr++;
 876
 877   t = tab_create (nc, nr);
 878   tab_title (t, _("Variables in the Equation"));
 879
 880   tab_headers (t, heading_columns, 0, heading_rows, 0);
 881
 882   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 883
 884   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 885   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 886
 887   tab_text (t,  0, row + 1, TAB_CENTER | TAT_TITLE, _("Step 1"));
 888
 889   tab_text (t,  2, row, TAB_CENTER | TAT_TITLE, _("B"));
 890   tab_text (t,  3, row, TAB_CENTER | TAT_TITLE, _("S.E."));
 891   tab_text (t,  4, row, TAB_CENTER | TAT_TITLE, _("Wald"));
 892   tab_text (t,  5, row, TAB_CENTER | TAT_TITLE, _("df"));
 893   tab_text (t,  6, row, TAB_CENTER | TAT_TITLE, _("Sig."));
 894   tab_text (t,  7, row, TAB_CENTER | TAT_TITLE, _("Exp(B)"));
 895
 896   if (cmd->print & PRINT_CI)
 897     {
 898       tab_joint_text_format (t, 8, 0, 9, 0,
 899                              TAB_CENTER | TAT_TITLE, _("%d%% CI for Exp(B)"), cmd->confidence);
 900
 901       tab_text (t,  8, row, TAB_CENTER | TAT_TITLE, _("Lower"));
 902       tab_text (t,  9, row, TAB_CENTER | TAT_TITLE, _("Upper"));
 903     }
 904
 905   if (cmd->constant)
 906     n_rows++;
 907
 908   for (idx = 0 ; idx < n_rows; ++idx)
 909     {
 910       const int r = idx + heading_rows;
 911
 912       const double b = gsl_vector_get (beta, idx);
 913       const double sigma = gsl_vector_get (se, idx);
 914       const double wald = pow2 (b / sigma);
 915       const double df = 1;
 916
 917       if (idx < cmd->n_predictor_vars)
 918         tab_text (t, 1, r, TAB_LEFT | TAT_TITLE,
 919                   var_to_string (cmd->predictor_vars[idx]));
 920
 921       tab_double (t, 2, r, 0, b, 0);
 922       tab_double (t, 3, r, 0, sigma, 0);
 923       tab_double (t, 4, r, 0, wald, 0);
 924       tab_double (t, 5, r, 0, df, &F_8_0);
 925       tab_double (t, 6, r, 0,  gsl_cdf_chisq_Q (wald, df), 0);
 926       tab_double (t, 7, r, 0, exp (b), 0);
 927
 928       if (cmd->print & PRINT_CI)
 929         {
 930           double wc = gsl_cdf_ugaussian_Pinv (0.5 + cmd->confidence / 200.0);
 931           wc *= sigma;
 932
 933           if (idx < cmd->n_predictor_vars)
 934             {
 935               tab_double (t, 8, r, 0, exp (b - wc), 0);
 936               tab_double (t, 9, r, 0, exp (b + wc), 0);
 937             }
 938         }
 939     }
 940
 941   if ( cmd->constant)
 942     tab_text (t, 1, nr - 1, TAB_LEFT | TAT_TITLE, _("Constant"));
 943
 944   tab_submit (t);
 945 }
 946
 947
 948 /* Show the model summary box */
 949 static void
 950 output_model_summary (const struct lr_result *res,
 951                       double initial_likelihood, double likelihood)
 952 {
 953   const int heading_columns = 0;
 954   const int heading_rows = 1;
 955   struct tab_table *t;
 956
 957   const int nc = 4;
 958   int nr = heading_rows + 1;
 959   double cox;
 960
 961   t = tab_create (nc, nr);
 962   tab_title (t, _("Model Summary"));
 963
 964   tab_headers (t, heading_columns, 0, heading_rows, 0);
 965
 966   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 967
 968   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 969   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 970
 971   tab_text (t,  0, 0, TAB_LEFT | TAT_TITLE, _("Step 1"));
 972   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("-2 Log likelihood"));
 973   tab_double (t,  1, 1, 0, -2 * log (likelihood), 0);
 974
 975
 976   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("Cox & Snell R Square"));
 977   cox =  1.0 - pow (initial_likelihood /likelihood, 2 / res->cc);
 978   tab_double (t,  2, 1, 0, cox, 0);
 979
 980   tab_text (t,  3, 0, TAB_CENTER | TAT_TITLE, _("Nagelkerke R Square"));
 981   tab_double (t,  3, 1, 0, cox / ( 1.0 - pow (initial_likelihood, 2 / res->cc)), 0);
 982
 983
 984   tab_submit (t);
 985 }
 986
 987 /* Show the case processing summary box */
 988 static void
 989 case_processing_summary (const struct lr_result *res)
 990 {
 991   const int heading_columns = 1;
 992   const int heading_rows = 1;
 993   struct tab_table *t;
 994
 995   const int nc = 3;
 996   const int nr = heading_rows + 3;
 997   casenumber total;
 998
 999   t = tab_create (nc, nr);
1000   tab_title (t, _("Case Processing Summary"));
1001
1002   tab_headers (t, heading_columns, 0, heading_rows, 0);
1003
1004   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
1005
1006   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1007   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1008
1009   tab_text (t,  0, 0, TAB_LEFT | TAT_TITLE, _("Unweighted Cases"));
1010   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("N"));
1011   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("Percent"));
1012
1013
1014   tab_text (t,  0, 1, TAB_LEFT | TAT_TITLE, _("Included in Analysis"));
1015   tab_text (t,  0, 2, TAB_LEFT | TAT_TITLE, _("Missing Cases"));
1016   tab_text (t,  0, 3, TAB_LEFT | TAT_TITLE, _("Total"));
1017
1018   tab_double (t,  1, 1, 0, res->n_nonmissing, &F_8_0);
1019   tab_double (t,  1, 2, 0, res->n_missing, &F_8_0);
1020
1021   total = res->n_nonmissing + res->n_missing;
1022   tab_double (t,  1, 3, 0, total , &F_8_0);
1023
1024   tab_double (t,  2, 1, 0, 100 * res->n_nonmissing / (double) total, 0);
1025   tab_double (t,  2, 2, 0, 100 * res->n_missing / (double) total, 0);
1026   tab_double (t,  2, 3, 0, 100 * total / (double) total, 0);
1027
1028   tab_submit (t);
1029 }
1030