pintos-os.org Git - pspp/blob - src/language/stats/logistic.c

   1 /* pspp - a program for statistical analysis.
   2    Copyright (C) 2012 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17
  18 /*
  19    References:
  20    1. "Coding Logistic Regression with Newton-Raphson", James McCaffrey
  21    http://msdn.microsoft.com/en-us/magazine/jj618304.aspx
  22
  23    2. "SPSS Statistical Algorithms" Chapter LOGISTIC REGRESSION Algorithms
  24
  25
  26    The Newton Raphson method finds successive approximations to $\bf b$ where
  27    approximation ${\bf b}_t$ is (hopefully) better than the previous ${\bf b}_{t-1}$.
  28
  29    $ {\bf b}_t = {\bf b}_{t -1} + ({\bf X}^T{\bf W}_{t-1}{\bf X})^{-1}{\bf X}^T({\bf y} - {\bf \pi}_{t-1})$
  30    where:
  31
  32    $\bf X$ is the $n \times p$ design matrix, $n$ being the number of cases,
  33    $p$ the number of parameters, \par
  34    $\bf W$ is the diagonal matrix whose diagonal elements are
  35    $\hat{\pi}_0(1 - \hat{\pi}_0), \, \hat{\pi}_1(1 - \hat{\pi}_2)\dots \hat{\pi}_{n-1}(1 - \hat{\pi}_{n-1})$
  36    \par
  37
  38 */
  39
  40 #include <config.h>
  41
  42 #include <gsl/gsl_blas.h>
  43
  44 #include <gsl/gsl_linalg.h>
  45 #include <gsl/gsl_cdf.h>
  46 #include <gsl/gsl_matrix.h>
  47 #include <gsl/gsl_vector.h>
  48 #include <math.h>
  49
  50 #include "data/case.h"
  51 #include "data/casegrouper.h"
  52 #include "data/casereader.h"
  53 #include "data/dataset.h"
  54 #include "data/dictionary.h"
  55 #include "data/format.h"
  56 #include "data/value.h"
  57 #include "language/command.h"
  58 #include "language/dictionary/split-file.h"
  59 #include "language/lexer/lexer.h"
  60 #include "language/lexer/value-parser.h"
  61 #include "language/lexer/variable-parser.h"
  62 #include "libpspp/assertion.h"
  63 #include "libpspp/ll.h"
  64 #include "libpspp/message.h"
  65 #include "libpspp/misc.h"
  66 #include "math/categoricals.h"
  67 #include "output/tab.h"
  68
  69 #include "gettext.h"
  70 #define _(msgid) gettext (msgid)
  71
  72
  73
  74
  75 #define   PRINT_EACH_STEP  0x01
  76 #define   PRINT_SUMMARY    0x02
  77 #define   PRINT_CORR       0x04
  78 #define   PRINT_ITER       0x08
  79 #define   PRINT_GOODFIT    0x10
  80 #define   PRINT_CI         0x20
  81
  82
  83 #define PRINT_DEFAULT (PRINT_SUMMARY | PRINT_EACH_STEP)
  84
  85 /*
  86   The constant parameters of the procedure.
  87   That is, those which are set by the user.
  88 */
  89 struct lr_spec
  90 {
  91   /* The dependent variable */
  92   const struct variable *dep_var;
  93
  94   size_t n_predictor_vars;
  95   const struct variable **predictor_vars;
  96
  97   /* Which classes of missing vars are to be excluded */
  98   enum mv_class exclude;
  99
 100   /* The weight variable */
 101   const struct variable *wv;
 102
 103   const struct dictionary *dict;
 104
 105   /* True iff the constant (intercept) is to be included in the model */
 106   bool constant;
 107
 108   /* Ths maximum number of iterations */
 109   int max_iter;
 110
 111   /* Other iteration limiting conditions */
 112   double bcon;
 113   double min_epsilon;
 114   double lcon;
 115
 116   /* The confidence interval (in percent) */
 117   int confidence;
 118
 119   /* What results should be presented */
 120   unsigned int print;
 121
 122   double cut_point;
 123 };
 124
 125 /* The results and intermediate result of the procedure.
 126    These are mutated as the procedure runs. Used for
 127    temporary variables etc.
 128 */
 129 struct lr_result
 130 {
 131   bool warn_bad_weight;
 132
 133
 134   /* The two values of the dependent variable. */
 135   union value y0;
 136   union value y1;
 137
 138
 139   /* The sum of caseweights */
 140   double cc;
 141
 142   casenumber n_missing;
 143   casenumber n_nonmissing;
 144 };
 145
 146
 147 /*
 148   Convert INPUT into a dichotomous scalar.  For simple cases, this is a 1:1 mapping
 149   The return value is always either 0 or 1
 150 */
 151 static double
 152 map_dependent_var (const struct lr_spec *cmd, const struct lr_result *res, const union value *input)
 153 {
 154   int width = var_get_width (cmd->dep_var);
 155   if (value_equal (input, &res->y0, width))
 156     return 0;
 157
 158   if (value_equal (input, &res->y1, width))
 159     return 1;
 160
 161   NOT_REACHED ();
 162
 163   return SYSMIS;
 164 }
 165
 166
 167
 168 static void output_depvarmap (const struct lr_spec *cmd, const struct lr_result *);
 169
 170 static void output_variables (const struct lr_spec *cmd,
 171                               const gsl_vector *,
 172                               const gsl_vector *);
 173
 174 static void output_model_summary (const struct lr_result *,
 175                                   double initial_likelihood, double likelihood);
 176
 177 static void case_processing_summary (const struct lr_result *);
 178
 179
 180 /*
 181   Return the probability estimator (that is the estimator of logit(y) )
 182   corresponding to the coefficient estimator beta_hat for case C
 183 */
 184 static double
 185 pi_hat (const struct lr_spec *cmd,
 186         const gsl_vector *beta_hat,
 187         const struct variable **x, size_t n_x,
 188         const struct ccase *c)
 189 {
 190   int v0;
 191   double pi = 0;
 192   for (v0 = 0; v0 < n_x; ++v0)
 193     {
 194       pi += gsl_vector_get (beta_hat, v0) *
 195         case_data (c, x[v0])->f;
 196     }
 197
 198   if (cmd->constant)
 199     pi += gsl_vector_get (beta_hat, beta_hat->size - 1);
 200
 201   pi = 1.0 / (1.0 + exp(-pi));
 202
 203   return pi;
 204 }
 205
 206
 207 /*
 208   Calculates the Hessian matrix X' V  X,
 209   where: X is the n by N_X matrix comprising the n cases in INPUT
 210   V is a diagonal matrix { (pi_hat_0)(1 - pi_hat_0), (pi_hat_1)(1 - pi_hat_1), ... (pi_hat_{N-1})(1 - pi_hat_{N-1})}
 211   (the partial derivative of the predicted values)
 212
 213   If ALL predicted values derivatives are close to zero or one, then CONVERGED
 214   will be set to true.
 215 */
 216 static gsl_matrix *
 217 hessian (const struct lr_spec *cmd,
 218          struct lr_result *res,
 219          struct casereader *input,
 220          const struct variable **x, size_t n_x,
 221          const gsl_vector *beta_hat,
 222          bool *converged
 223          )
 224 {
 225   struct casereader *reader;
 226   struct ccase *c;
 227   gsl_matrix *output = gsl_matrix_calloc (beta_hat->size, beta_hat->size);
 228
 229   double max_w = -DBL_MAX;
 230
 231   for (reader = casereader_clone (input);
 232        (c = casereader_read (reader)) != NULL; case_unref (c))
 233     {
 234       int v0, v1;
 235       double pi = pi_hat (cmd, beta_hat, x, n_x, c);
 236
 237       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 238       double w = pi * (1 - pi);
 239       if (w > max_w)
 240         max_w = w;
 241       w *= weight;
 242
 243       for (v0 = 0; v0 < beta_hat->size; ++v0)
 244         {
 245           double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0;
 246           for (v1 = 0; v1 < beta_hat->size; ++v1)
 247             {
 248               double in1 = v1 < n_x ? case_data (c, x[v1])->f : 1.0 ;
 249               double *o = gsl_matrix_ptr (output, v0, v1);
 250               *o += in0 * w * in1;
 251             }
 252         }
 253     }
 254   casereader_destroy (reader);
 255
 256
 257   if ( max_w < cmd->min_epsilon)
 258     {
 259       *converged = true;
 260       msg (MN, _("All predicted values are either 1 or 0"));
 261     }
 262
 263   return output;
 264 }
 265
 266
 267 /* Calculates the value  X' (y - pi)
 268    where X is the design model,
 269    y is the vector of observed independent variables
 270    pi is the vector of estimates for y
 271
 272    As a side effect, the likelihood is stored in LIKELIHOOD
 273 */
 274 static gsl_vector *
 275 xt_times_y_pi (const struct lr_spec *cmd,
 276                struct lr_result *res,
 277                struct casereader *input,
 278                const struct variable **x, size_t n_x,
 279                const struct variable *y_var,
 280                const gsl_vector *beta_hat,
 281                double *likelihood)
 282 {
 283   struct casereader *reader;
 284   struct ccase *c;
 285   gsl_vector *output = gsl_vector_calloc (beta_hat->size);
 286
 287   *likelihood = 1.0;
 288   for (reader = casereader_clone (input);
 289        (c = casereader_read (reader)) != NULL; case_unref (c))
 290     {
 291       int v0;
 292       double pi = pi_hat (cmd, beta_hat, x, n_x, c);
 293       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 294
 295
 296       double y = map_dependent_var (cmd, res, case_data (c, y_var));
 297
 298       *likelihood *= pow (pi, weight * y) * pow (1 - pi, weight * (1 - y));
 299
 300       for (v0 = 0; v0 < beta_hat->size; ++v0)
 301         {
 302           double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0;
 303           double *o = gsl_vector_ptr (output, v0);
 304           *o += in0 * (y - pi) * weight;
 305         }
 306     }
 307
 308   casereader_destroy (reader);
 309
 310   return output;
 311 }
 312
 313
 314 /*
 315    Makes an initial pass though the data, checks that the dependent variable is
 316    dichotomous, and calculates necessary initial values.
 317
 318    Returns an initial value for \hat\beta the vector of estimators of \beta
 319 */
 320 static gsl_vector *
 321 beta_hat_initial (const struct lr_spec *cmd, struct lr_result *res, struct casereader *input)
 322 {
 323   const int width = var_get_width (cmd->dep_var);
 324
 325   struct ccase *c;
 326   struct casereader *reader;
 327   gsl_vector *b0 ;
 328   double sum;
 329   double sumA = 0.0;
 330   double sumB = 0.0;
 331
 332   bool v0set = false;
 333   bool v1set = false;
 334
 335   size_t n_coefficients = cmd->n_predictor_vars;
 336   if (cmd->constant)
 337     n_coefficients++;
 338
 339   b0 = gsl_vector_calloc (n_coefficients);
 340
 341   res->cc = 0;
 342   for (reader = casereader_clone (input);
 343        (c = casereader_read (reader)) != NULL; case_unref (c))
 344     {
 345       int v;
 346       bool missing = false;
 347       double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight);
 348       const union value *depval = case_data (c, cmd->dep_var);
 349
 350       for (v = 0; v < cmd->n_predictor_vars; ++v)
 351         {
 352           const union value *val = case_data (c, cmd->predictor_vars[v]);
 353           if (var_is_value_missing (cmd->predictor_vars[v], val, cmd->exclude))
 354             {
 355               missing = true;
 356               break;
 357             }
 358         }
 359
 360       if (missing)
 361         {
 362           res->n_missing++;
 363           continue;
 364         }
 365
 366       res->n_nonmissing++;
 367
 368       if (!v0set)
 369         {
 370           value_clone (&res->y0, depval, width);
 371           v0set = true;
 372         }
 373       else if (!v1set)
 374         {
 375           if ( !value_equal (&res->y0, depval, width))
 376             {
 377               value_clone (&res->y1, depval, width);
 378               v1set = true;
 379             }
 380         }
 381       else
 382         {
 383           if (! value_equal (&res->y0, depval, width)
 384               &&
 385               ! value_equal (&res->y1, depval, width)
 386               )
 387             {
 388               msg (ME, _("Dependent variable's values are not dichotomous."));
 389               goto error;
 390             }
 391         }
 392
 393       if (value_equal (&res->y0, depval, width))
 394           sumA += weight;
 395
 396       if (value_equal (&res->y1, depval, width))
 397           sumB += weight;
 398
 399
 400       res->cc += weight;
 401     }
 402   casereader_destroy (reader);
 403
 404   sum = sumB;
 405
 406   /* Ensure that Y0 is less than Y1.  Otherwise the mapping gets
 407      inverted, which is confusing to users */
 408   if (var_is_numeric (cmd->dep_var) && value_compare_3way (&res->y0, &res->y1, width) > 0)
 409     {
 410       union value tmp;
 411       value_clone (&tmp, &res->y0, width);
 412       value_copy (&res->y0, &res->y1, width);
 413       value_copy (&res->y1, &tmp, width);
 414       value_destroy (&tmp, width);
 415       sum = sumA;
 416     }
 417
 418   if ( cmd->constant)
 419     {
 420       double mean = sum / res->cc;
 421       gsl_vector_set (b0, b0->size - 1, log (mean / (1 - mean)));
 422     }
 423
 424   return b0;
 425
 426  error:
 427   casereader_destroy (reader);
 428   return NULL;
 429 }
 430
 431
 432
 433 static bool
 434 run_lr (const struct lr_spec *cmd, struct casereader *input,
 435         const struct dataset *ds UNUSED)
 436 {
 437   int i,j;
 438
 439   gsl_vector *beta_hat;
 440   gsl_vector *se ;
 441
 442   bool converged = false;
 443   double likelihood;
 444   double prev_likelihood = -1;
 445   double initial_likelihood ;
 446
 447   struct lr_result work;
 448   work.n_missing = 0;
 449   work.n_nonmissing = 0;
 450   work.warn_bad_weight = true;
 451
 452
 453   /* Get the initial estimates of \beta and their standard errors */
 454   beta_hat = beta_hat_initial (cmd, &work, input);
 455   if (NULL == beta_hat)
 456     return false;
 457
 458   output_depvarmap (cmd, &work);
 459
 460   se = gsl_vector_alloc (beta_hat->size);
 461
 462   case_processing_summary (&work);
 463
 464
 465   input = casereader_create_filter_missing (input,
 466                                             cmd->predictor_vars,
 467                                             cmd->n_predictor_vars,
 468                                             cmd->exclude,
 469                                             NULL,
 470                                             NULL);
 471
 472
 473   /* Start the Newton Raphson iteration process... */
 474   for( i = 0 ; i < cmd->max_iter ; ++i)
 475     {
 476       double min, max;
 477       gsl_matrix *m ;
 478       gsl_vector *v ;
 479
 480       m = hessian (cmd, &work, input,
 481                    cmd->predictor_vars, cmd->n_predictor_vars,
 482                    beta_hat,
 483                    &converged);
 484
 485       gsl_linalg_cholesky_decomp (m);
 486       gsl_linalg_cholesky_invert (m);
 487
 488       v = xt_times_y_pi (cmd, &work, input,
 489                          cmd->predictor_vars, cmd->n_predictor_vars,
 490                          cmd->dep_var,
 491                          beta_hat,
 492                          &likelihood);
 493
 494       {
 495         /* delta = M.v */
 496         gsl_vector *delta = gsl_vector_alloc (v->size);
 497         gsl_blas_dgemv (CblasNoTrans, 1.0, m, v, 0, delta);
 498         gsl_vector_free (v);
 499
 500         for (j = 0; j < se->size; ++j)
 501           {
 502             double *ptr = gsl_vector_ptr (se, j);
 503             *ptr = gsl_matrix_get (m, j, j);
 504           }
 505
 506         gsl_matrix_free (m);
 507
 508         gsl_vector_add (beta_hat, delta);
 509
 510         gsl_vector_minmax (delta, &min, &max);
 511
 512         if ( fabs (min) < cmd->bcon && fabs (max) < cmd->bcon)
 513           {
 514             msg (MN, _("Estimation terminated at iteration number %d because parameter estimates changed by less than %g"),
 515                  i + 1, cmd->bcon);
 516             converged = true;
 517           }
 518
 519         gsl_vector_free (delta);
 520       }
 521
 522       if ( prev_likelihood >= 0)
 523         {
 524           if (-log (likelihood) > -(1.0 - cmd->lcon) * log (prev_likelihood))
 525             {
 526               msg (MN, _("Estimation terminated at iteration number %d because Log Likelihood decreased by less than %g%%"), i + 1, 100 * cmd->lcon);
 527               converged = true;
 528             }
 529         }
 530       if (i == 0)
 531         initial_likelihood = likelihood;
 532       prev_likelihood = likelihood;
 533
 534       if (converged)
 535         break;
 536     }
 537   casereader_destroy (input);
 538
 539   for (i = 0; i < se->size; ++i)
 540     {
 541       double *ptr = gsl_vector_ptr (se, i);
 542       *ptr = sqrt (*ptr);
 543     }
 544
 545   output_model_summary (&work, initial_likelihood, likelihood);
 546   output_variables (cmd, beta_hat, se);
 547
 548   gsl_vector_free (beta_hat);
 549   gsl_vector_free (se);
 550
 551   return true;
 552 }
 553
 554 /* Parse the LOGISTIC REGRESSION command syntax */
 555 int
 556 cmd_logistic (struct lexer *lexer, struct dataset *ds)
 557 {
 558   struct lr_spec lr;
 559   lr.dict = dataset_dict (ds);
 560   lr.n_predictor_vars = 0;
 561   lr.predictor_vars = NULL;
 562   lr.exclude = MV_ANY;
 563   lr.wv = dict_get_weight (lr.dict);
 564   lr.max_iter = 20;
 565   lr.lcon = 0.0000;
 566   lr.bcon = 0.001;
 567   lr.min_epsilon = 0.00000001;
 568   lr.cut_point = 0.5;
 569   lr.constant = true;
 570   lr.confidence = 95;
 571   lr.print = PRINT_DEFAULT;
 572
 573
 574   if (lex_match_id (lexer, "VARIABLES"))
 575     lex_match (lexer, T_EQUALS);
 576
 577   if (! (lr.dep_var = parse_variable_const (lexer, lr.dict)))
 578     goto error;
 579
 580   lex_force_match (lexer, T_WITH);
 581
 582   if (!parse_variables_const (lexer, lr.dict,
 583                               &lr.predictor_vars, &lr.n_predictor_vars,
 584                               PV_NO_DUPLICATE | PV_NUMERIC))
 585     goto error;
 586
 587
 588   while (lex_token (lexer) != T_ENDCMD)
 589     {
 590       lex_match (lexer, T_SLASH);
 591
 592       if (lex_match_id (lexer, "MISSING"))
 593         {
 594           lex_match (lexer, T_EQUALS);
 595           while (lex_token (lexer) != T_ENDCMD
 596                  && lex_token (lexer) != T_SLASH)
 597             {
 598               if (lex_match_id (lexer, "INCLUDE"))
 599                 {
 600                   lr.exclude = MV_SYSTEM;
 601                 }
 602               else if (lex_match_id (lexer, "EXCLUDE"))
 603                 {
 604                   lr.exclude = MV_ANY;
 605                 }
 606               else
 607                 {
 608                   lex_error (lexer, NULL);
 609                   goto error;
 610                 }
 611             }
 612         }
 613       else if (lex_match_id (lexer, "ORIGIN"))
 614         {
 615           lr.constant = false;
 616         }
 617       else if (lex_match_id (lexer, "NOORIGIN"))
 618         {
 619           lr.constant = true;
 620         }
 621       else if (lex_match_id (lexer, "NOCONST"))
 622         {
 623           lr.constant = false;
 624         }
 625       else if (lex_match_id (lexer, "EXTERNAL"))
 626         {
 627           /* This is for compatibility.  It does nothing */
 628         }
 629       else if (lex_match_id (lexer, "PRINT"))
 630         {
 631           lex_match (lexer, T_EQUALS);
 632           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 633             {
 634               if (lex_match_id (lexer, "DEFAULT"))
 635                 {
 636                   lr.print |= PRINT_DEFAULT;
 637                 }
 638               else if (lex_match_id (lexer, "SUMMARY"))
 639                 {
 640                   lr.print |= PRINT_SUMMARY;
 641                 }
 642 #if 0
 643               else if (lex_match_id (lexer, "CORR"))
 644                 {
 645                   lr.print |= PRINT_CORR;
 646                 }
 647               else if (lex_match_id (lexer, "ITER"))
 648                 {
 649                   lr.print |= PRINT_ITER;
 650                 }
 651               else if (lex_match_id (lexer, "GOODFIT"))
 652                 {
 653                   lr.print |= PRINT_GOODFIT;
 654                 }
 655 #endif
 656               else if (lex_match_id (lexer, "CI"))
 657                 {
 658                   lr.print |= PRINT_CI;
 659                   if (lex_force_match (lexer, T_LPAREN))
 660                     {
 661                       if (! lex_force_int (lexer))
 662                         {
 663                           lex_error (lexer, NULL);
 664                           goto error;
 665                         }
 666                       lr.confidence = lex_integer (lexer);
 667                       lex_get (lexer);
 668                       if ( ! lex_force_match (lexer, T_RPAREN))
 669                         {
 670                           lex_error (lexer, NULL);
 671                           goto error;
 672                         }
 673                     }
 674                 }
 675               else if (lex_match_id (lexer, "ALL"))
 676                 {
 677                   lr.print = ~0x0000;
 678                 }
 679               else
 680                 {
 681                   lex_error (lexer, NULL);
 682                   goto error;
 683                 }
 684             }
 685         }
 686       else if (lex_match_id (lexer, "CRITERIA"))
 687         {
 688           lex_match (lexer, T_EQUALS);
 689           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 690             {
 691               if (lex_match_id (lexer, "BCON"))
 692                 {
 693                   if (lex_force_match (lexer, T_LPAREN))
 694                     {
 695                       if (! lex_force_num (lexer))
 696                         {
 697                           lex_error (lexer, NULL);
 698                           goto error;
 699                         }
 700                       lr.bcon = lex_number (lexer);
 701                       lex_get (lexer);
 702                       if ( ! lex_force_match (lexer, T_RPAREN))
 703                         {
 704                           lex_error (lexer, NULL);
 705                           goto error;
 706                         }
 707                     }
 708                 }
 709               else if (lex_match_id (lexer, "ITERATE"))
 710                 {
 711                   if (lex_force_match (lexer, T_LPAREN))
 712                     {
 713                       if (! lex_force_int (lexer))
 714                         {
 715                           lex_error (lexer, NULL);
 716                           goto error;
 717                         }
 718                       lr.max_iter = lex_integer (lexer);
 719                       lex_get (lexer);
 720                       if ( ! lex_force_match (lexer, T_RPAREN))
 721                         {
 722                           lex_error (lexer, NULL);
 723                           goto error;
 724                         }
 725                     }
 726                 }
 727               else if (lex_match_id (lexer, "LCON"))
 728                 {
 729                   if (lex_force_match (lexer, T_LPAREN))
 730                     {
 731                       if (! lex_force_num (lexer))
 732                         {
 733                           lex_error (lexer, NULL);
 734                           goto error;
 735                         }
 736                       lr.lcon = lex_number (lexer);
 737                       lex_get (lexer);
 738                       if ( ! lex_force_match (lexer, T_RPAREN))
 739                         {
 740                           lex_error (lexer, NULL);
 741                           goto error;
 742                         }
 743                     }
 744                 }
 745               else if (lex_match_id (lexer, "EPS"))
 746                 {
 747                   if (lex_force_match (lexer, T_LPAREN))
 748                     {
 749                       if (! lex_force_num (lexer))
 750                         {
 751                           lex_error (lexer, NULL);
 752                           goto error;
 753                         }
 754                       lr.min_epsilon = lex_number (lexer);
 755                       lex_get (lexer);
 756                       if ( ! lex_force_match (lexer, T_RPAREN))
 757                         {
 758                           lex_error (lexer, NULL);
 759                           goto error;
 760                         }
 761                     }
 762                 }
 763               else
 764                 {
 765                   lex_error (lexer, NULL);
 766                   goto error;
 767                 }
 768             }
 769         }
 770       else
 771         {
 772           lex_error (lexer, NULL);
 773           goto error;
 774         }
 775     }
 776
 777
 778
 779   {
 780     struct casegrouper *grouper;
 781     struct casereader *group;
 782     bool ok;
 783
 784     grouper = casegrouper_create_splits (proc_open (ds), lr.dict);
 785     while (casegrouper_get_next_group (grouper, &group))
 786       ok = run_lr (&lr, group, ds);
 787     ok = casegrouper_destroy (grouper);
 788     ok = proc_commit (ds) && ok;
 789   }
 790
 791   free (lr.predictor_vars);
 792   return CMD_SUCCESS;
 793
 794  error:
 795
 796   free (lr.predictor_vars);
 797   return CMD_FAILURE;
 798 }
 799
 800
 801 \f
 802
 803 /* Show the Dependent Variable Encoding box.
 804    This indicates how the dependent variable
 805    is mapped to the internal zero/one values.
 806 */
 807 static void
 808 output_depvarmap (const struct lr_spec *cmd, const struct lr_result *res)
 809 {
 810   const int heading_columns = 0;
 811   const int heading_rows = 1;
 812   struct tab_table *t;
 813   struct string str;
 814
 815   const int nc = 2;
 816   int nr = heading_rows + 2;
 817
 818   t = tab_create (nc, nr);
 819   tab_title (t, _("Dependent Variable Encoding"));
 820
 821   tab_headers (t, heading_columns, 0, heading_rows, 0);
 822
 823   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 824
 825   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 826   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 827
 828   tab_text (t,  0, 0, TAB_CENTER | TAT_TITLE, _("Original Value"));
 829   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("Internal Value"));
 830
 831
 832
 833   ds_init_empty (&str);
 834   var_append_value_name (cmd->dep_var, &res->y0, &str);
 835   tab_text (t,  0, 0 + heading_rows,  0, ds_cstr (&str));
 836
 837   ds_clear (&str);
 838   var_append_value_name (cmd->dep_var, &res->y1, &str);
 839   tab_text (t,  0, 1 + heading_rows,  0, ds_cstr (&str));
 840
 841
 842   tab_double (t, 1, 0 + heading_rows, 0, map_dependent_var (cmd, res, &res->y0), &F_8_0);
 843   tab_double (t, 1, 1 + heading_rows, 0, map_dependent_var (cmd, res, &res->y1), &F_8_0);
 844   ds_destroy (&str);
 845
 846   tab_submit (t);
 847 }
 848
 849
 850 /* Show the Variables in the Equation box */
 851 static void
 852 output_variables (const struct lr_spec *cmd,
 853                   const gsl_vector *beta,
 854                   const gsl_vector *se)
 855 {
 856   int row = 0;
 857   const int heading_columns = 1;
 858   int heading_rows = 1;
 859   struct tab_table *t;
 860
 861   int idx;
 862   int n_rows = cmd->n_predictor_vars;
 863
 864   int nc = 8;
 865   int nr ;
 866   if (cmd->print & PRINT_CI)
 867     {
 868       nc += 2;
 869       heading_rows += 1;
 870       row++;
 871     }
 872   nr = heading_rows + cmd->n_predictor_vars;
 873   if (cmd->constant)
 874     nr++;
 875
 876   t = tab_create (nc, nr);
 877   tab_title (t, _("Variables in the Equation"));
 878
 879   tab_headers (t, heading_columns, 0, heading_rows, 0);
 880
 881   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 882
 883   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 884   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 885
 886   tab_text (t,  0, row + 1, TAB_CENTER | TAT_TITLE, _("Step 1"));
 887
 888   tab_text (t,  2, row, TAB_CENTER | TAT_TITLE, _("B"));
 889   tab_text (t,  3, row, TAB_CENTER | TAT_TITLE, _("S.E."));
 890   tab_text (t,  4, row, TAB_CENTER | TAT_TITLE, _("Wald"));
 891   tab_text (t,  5, row, TAB_CENTER | TAT_TITLE, _("df"));
 892   tab_text (t,  6, row, TAB_CENTER | TAT_TITLE, _("Sig."));
 893   tab_text (t,  7, row, TAB_CENTER | TAT_TITLE, _("Exp(B)"));
 894
 895   if (cmd->print & PRINT_CI)
 896     {
 897       tab_joint_text_format (t, 8, 0, 9, 0,
 898                              TAB_CENTER | TAT_TITLE, _("%d%% CI for Exp(B)"), cmd->confidence);
 899
 900       tab_text (t,  8, row, TAB_CENTER | TAT_TITLE, _("Lower"));
 901       tab_text (t,  9, row, TAB_CENTER | TAT_TITLE, _("Upper"));
 902     }
 903
 904   if (cmd->constant)
 905     n_rows++;
 906
 907   for (idx = 0 ; idx < n_rows; ++idx)
 908     {
 909       const int r = idx + heading_rows;
 910
 911       const double b = gsl_vector_get (beta, idx);
 912       const double sigma = gsl_vector_get (se, idx);
 913       const double wald = pow2 (b / sigma);
 914       const double df = 1;
 915
 916       if (idx < cmd->n_predictor_vars)
 917         tab_text (t, 1, r, TAB_LEFT | TAT_TITLE,
 918                   var_to_string (cmd->predictor_vars[idx]));
 919
 920       tab_double (t, 2, r, 0, b, 0);
 921       tab_double (t, 3, r, 0, sigma, 0);
 922       tab_double (t, 4, r, 0, wald, 0);
 923       tab_double (t, 5, r, 0, df, &F_8_0);
 924       tab_double (t, 6, r, 0,  gsl_cdf_chisq_Q (wald, df), 0);
 925       tab_double (t, 7, r, 0, exp (b), 0);
 926
 927       if (cmd->print & PRINT_CI)
 928         {
 929           double wc = gsl_cdf_ugaussian_Pinv (0.5 + cmd->confidence / 200.0);
 930           wc *= sigma;
 931
 932           if (idx < cmd->n_predictor_vars)
 933             {
 934               tab_double (t, 8, r, 0, exp (b - wc), 0);
 935               tab_double (t, 9, r, 0, exp (b + wc), 0);
 936             }
 937         }
 938     }
 939
 940   if ( cmd->constant)
 941     tab_text (t, 1, nr - 1, TAB_LEFT | TAT_TITLE, _("Constant"));
 942
 943   tab_submit (t);
 944 }
 945
 946
 947 /* Show the model summary box */
 948 static void
 949 output_model_summary (const struct lr_result *res,
 950                       double initial_likelihood, double likelihood)
 951 {
 952   const int heading_columns = 0;
 953   const int heading_rows = 1;
 954   struct tab_table *t;
 955
 956   const int nc = 4;
 957   int nr = heading_rows + 1;
 958   double cox;
 959
 960   t = tab_create (nc, nr);
 961   tab_title (t, _("Model Summary"));
 962
 963   tab_headers (t, heading_columns, 0, heading_rows, 0);
 964
 965   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
 966
 967   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
 968   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
 969
 970   tab_text (t,  0, 0, TAB_LEFT | TAT_TITLE, _("Step 1"));
 971   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("-2 Log likelihood"));
 972   tab_double (t,  1, 1, 0, -2 * log (likelihood), 0);
 973
 974
 975   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("Cox & Snell R Square"));
 976   cox =  1.0 - pow (initial_likelihood /likelihood, 2 / res->cc);
 977   tab_double (t,  2, 1, 0, cox, 0);
 978
 979   tab_text (t,  3, 0, TAB_CENTER | TAT_TITLE, _("Nagelkerke R Square"));
 980   tab_double (t,  3, 1, 0, cox / ( 1.0 - pow (initial_likelihood, 2 / res->cc)), 0);
 981
 982
 983   tab_submit (t);
 984 }
 985
 986 /* Show the case processing summary box */
 987 static void
 988 case_processing_summary (const struct lr_result *res)
 989 {
 990   const int heading_columns = 1;
 991   const int heading_rows = 1;
 992   struct tab_table *t;
 993
 994   const int nc = 3;
 995   const int nr = heading_rows + 3;
 996   casenumber total;
 997
 998   t = tab_create (nc, nr);
 999   tab_title (t, _("Case Processing Summary"));
1000
1001   tab_headers (t, heading_columns, 0, heading_rows, 0);
1002
1003   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
1004
1005   tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1006   tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1007
1008   tab_text (t,  0, 0, TAB_LEFT | TAT_TITLE, _("Unweighted Cases"));
1009   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("N"));
1010   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("Percent"));
1011
1012
1013   tab_text (t,  0, 1, TAB_LEFT | TAT_TITLE, _("Included in Analysis"));
1014   tab_text (t,  0, 2, TAB_LEFT | TAT_TITLE, _("Missing Cases"));
1015   tab_text (t,  0, 3, TAB_LEFT | TAT_TITLE, _("Total"));
1016
1017   tab_double (t,  1, 1, 0, res->n_nonmissing, &F_8_0);
1018   tab_double (t,  1, 2, 0, res->n_missing, &F_8_0);
1019
1020   total = res->n_nonmissing + res->n_missing;
1021   tab_double (t,  1, 3, 0, total , &F_8_0);
1022
1023   tab_double (t,  2, 1, 0, 100 * res->n_nonmissing / (double) total, 0);
1024   tab_double (t,  2, 2, 0, 100 * res->n_missing / (double) total, 0);
1025   tab_double (t,  2, 3, 0, 100 * total / (double) total, 0);
1026
1027   tab_submit (t);
1028 }
1029