pintos-os.org Git - pspp/blob - src/language/stats/regression.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include <stdbool.h>
  20
  21 #include <gsl/gsl_cdf.h>
  22 #include <gsl/gsl_matrix.h>
  23
  24 #include <data/dataset.h>
  25 #include <data/casewriter.h>
  26
  27 #include "language/command.h"
  28 #include "language/lexer/lexer.h"
  29 #include "language/lexer/value-parser.h"
  30 #include "language/lexer/variable-parser.h"
  31
  32
  33 #include "data/casegrouper.h"
  34 #include "data/casereader.h"
  35 #include "data/dictionary.h"
  36
  37 #include "math/covariance.h"
  38 #include "math/linreg.h"
  39 #include "math/moments.h"
  40
  41 #include "libpspp/message.h"
  42 #include "libpspp/taint.h"
  43
  44 #include "output/tab.h"
  45
  46 #include "gettext.h"
  47 #define _(msgid) gettext (msgid)
  48 #define N_(msgid) msgid
  49
  50
  51 #include <gl/intprops.h>
  52
  53 #define REG_LARGE_DATA 1000
  54
  55 struct regression
  56 {
  57   struct dataset *ds;
  58
  59   const struct variable **vars;
  60   size_t n_vars;
  61
  62   const struct variable **dep_vars;
  63   size_t n_dep_vars;
  64
  65   bool r;
  66   bool coeff;
  67   bool anova;
  68   bool bcov;
  69
  70
  71   bool resid;
  72   bool pred;
  73 };
  74
  75 struct per_split_ws
  76 {
  77   linreg **models;
  78 };
  79
  80 struct regression_workspace
  81 {
  82   struct per_split_ws *psw;
  83
  84   struct casewriter *writer;
  85   struct casereader *reader;
  86
  87   int res_idx;
  88   int pred_idx;
  89   int extras;
  90
  91   const struct variable **predvars;
  92   const struct variable **residvars;
  93 };
  94
  95 static void run_regression (const struct regression *cmd,
  96                             struct per_split_ws *psw,
  97                             struct regression_workspace *ws,
  98                             struct casereader *input);
  99
 100
 101
 102 static char *
 103 reg_get_name (const struct dictionary *dict, const char *prefix)
 104 {
 105   char *name;
 106   int i;
 107
 108   /* XXX handle too-long prefixes */
 109   name = xmalloc (strlen (prefix) + INT_BUFSIZE_BOUND (i) + 1);
 110   for (i = 1;; i++)
 111     {
 112       sprintf (name, "%s%d", prefix, i);
 113       if (dict_lookup_var (dict, name) == NULL)
 114         return name;
 115     }
 116 }
 117
 118
 119 static const struct variable *
 120 create_aux_var (struct dataset *ds, const char *prefix)
 121 {
 122   struct variable *var;
 123   struct dictionary *dict = dataset_dict (ds);
 124   char *name = reg_get_name (dict, prefix);
 125   var = dict_create_var_assert (dict, name, 0);
 126   free (name);
 127   return var;
 128 }
 129
 130 struct thing
 131 {
 132   int n_dep_vars;
 133   struct regression_workspace *ws;
 134 };
 135
 136 static int
 137 transX (void *aux, struct ccase **c, casenumber x UNUSED)
 138 {
 139   struct thing *thing = aux;
 140   struct regression_workspace *ws = thing->ws;
 141   const struct ccase *in =  casereader_read (ws->reader);
 142
 143   if (in)
 144     {
 145       int k;
 146       *c = case_unshare (*c);
 147
 148       for (k = 0; k < thing->n_dep_vars; ++k)
 149         {
 150           if (ws->pred_idx != -1)
 151             {
 152               double pred = case_data_idx (in, ws->extras * k + ws->pred_idx)->f;
 153               case_data_rw (*c, ws->predvars[k])->f = pred;
 154             }
 155
 156           if (ws->res_idx != -1)
 157             {
 158               double resid = case_data_idx (in, ws->extras * k + ws->res_idx)->f;
 159               case_data_rw (*c, ws->residvars[k])->f = resid;
 160             }
 161         }
 162     }
 163
 164   return TRNS_CONTINUE;
 165 }
 166
 167
 168 int
 169 cmd_regression (struct lexer *lexer, struct dataset *ds)
 170 {
 171   int n_splits = 0;
 172   struct regression_workspace workspace;
 173   struct regression regression;
 174   const struct dictionary *dict = dataset_dict (ds);
 175   bool save;
 176   workspace.psw = NULL;
 177
 178   memset (&regression, 0, sizeof (struct regression));
 179
 180   regression.anova = true;
 181   regression.coeff = true;
 182   regression.r = true;
 183
 184   regression.pred = false;
 185   regression.resid = false;
 186
 187   regression.ds = ds;
 188
 189   /* Accept an optional, completely pointless "/VARIABLES=" */
 190   lex_match (lexer, T_SLASH);
 191   if (lex_match_id (lexer, "VARIABLES"))
 192     {
 193       if (!lex_force_match (lexer, T_EQUALS))
 194         goto error;
 195     }
 196
 197   if (!parse_variables_const (lexer, dict,
 198                               &regression.vars, &regression.n_vars,
 199                               PV_NO_DUPLICATE | PV_NUMERIC))
 200     goto error;
 201
 202
 203   while (lex_token (lexer) != T_ENDCMD)
 204     {
 205       lex_match (lexer, T_SLASH);
 206
 207       if (lex_match_id (lexer, "DEPENDENT"))
 208         {
 209           if (!lex_force_match (lexer, T_EQUALS))
 210             goto error;
 211
 212           if (!parse_variables_const (lexer, dict,
 213                                       &regression.dep_vars,
 214                                       &regression.n_dep_vars,
 215                                       PV_NO_DUPLICATE | PV_NUMERIC))
 216             goto error;
 217         }
 218       else if (lex_match_id (lexer, "METHOD"))
 219         {
 220           lex_match (lexer, T_EQUALS);
 221
 222           if (!lex_force_match_id (lexer, "ENTER"))
 223             {
 224               goto error;
 225             }
 226         }
 227       else if (lex_match_id (lexer, "STATISTICS"))
 228         {
 229           lex_match (lexer, T_EQUALS);
 230
 231           while (lex_token (lexer) != T_ENDCMD
 232                  && lex_token (lexer) != T_SLASH)
 233             {
 234               if (lex_match (lexer, T_ALL))
 235                 {
 236                 }
 237               else if (lex_match_id (lexer, "DEFAULTS"))
 238                 {
 239                 }
 240               else if (lex_match_id (lexer, "R"))
 241                 {
 242                 }
 243               else if (lex_match_id (lexer, "COEFF"))
 244                 {
 245                 }
 246               else if (lex_match_id (lexer, "ANOVA"))
 247                 {
 248                 }
 249               else if (lex_match_id (lexer, "BCOV"))
 250                 {
 251                 }
 252               else
 253                 {
 254                   lex_error (lexer, NULL);
 255                   goto error;
 256                 }
 257             }
 258         }
 259       else if (lex_match_id (lexer, "SAVE"))
 260         {
 261           lex_match (lexer, T_EQUALS);
 262
 263           while (lex_token (lexer) != T_ENDCMD
 264                  && lex_token (lexer) != T_SLASH)
 265             {
 266               if (lex_match_id (lexer, "PRED"))
 267                 {
 268                   regression.pred = true;
 269                 }
 270               else if (lex_match_id (lexer, "RESID"))
 271                 {
 272                   regression.resid = true;
 273                 }
 274               else
 275                 {
 276                   lex_error (lexer, NULL);
 277                   goto error;
 278                 }
 279             }
 280         }
 281       else
 282         {
 283           lex_error (lexer, NULL);
 284           goto error;
 285         }
 286     }
 287
 288   if (!regression.vars)
 289     {
 290       dict_get_vars (dict, &regression.vars, &regression.n_vars, 0);
 291     }
 292
 293   save = regression.pred || regression.resid;
 294   workspace.extras = 0;
 295   workspace.res_idx = -1;
 296   workspace.pred_idx = -1;
 297   workspace.writer = NULL;
 298   workspace.reader = NULL;
 299   if (save)
 300     {
 301       int i;
 302       struct caseproto *proto = caseproto_create ();
 303
 304       if (regression.resid)
 305         {
 306           workspace.extras ++;
 307           workspace.res_idx = 0;
 308           workspace.residvars = xcalloc (regression.n_dep_vars, sizeof (*workspace.residvars));
 309
 310           for (i = 0; i < regression.n_dep_vars; ++i)
 311             {
 312               workspace.residvars[i] = create_aux_var (ds, "RES");
 313               proto = caseproto_add_width (proto, 0);
 314             }
 315         }
 316
 317       if (regression.pred)
 318         {
 319           workspace.extras ++;
 320           workspace.pred_idx = 1;
 321           workspace.predvars = xcalloc (regression.n_dep_vars, sizeof (*workspace.predvars));
 322
 323           for (i = 0; i < regression.n_dep_vars; ++i)
 324             {
 325               workspace.predvars[i] = create_aux_var (ds, "PRED");
 326               proto = caseproto_add_width (proto, 0);
 327             }
 328         }
 329
 330       if (proc_make_temporary_transformations_permanent (ds))
 331         msg (SW, _("REGRESSION with SAVE ignores TEMPORARY.  "
 332                    "Temporary transformations will be made permanent."));
 333
 334       workspace.writer = autopaging_writer_create (proto);
 335     }
 336
 337
 338   n_splits = 0;
 339   {
 340     struct casegrouper *grouper;
 341     struct casereader *group;
 342     bool ok;
 343
 344     grouper = casegrouper_create_splits (proc_open_filtering (ds, !save), dict);
 345
 346
 347     while (casegrouper_get_next_group (grouper, &group))
 348       {
 349         workspace.psw = xrealloc (workspace.psw, ++n_splits * sizeof (*workspace.psw));
 350
 351         run_regression (&regression, &workspace.psw[n_splits - 1],
 352                         &workspace,
 353                         group);
 354
 355       }
 356     ok = casegrouper_destroy (grouper);
 357     ok = proc_commit (ds) && ok;
 358   }
 359
 360     {
 361       if (workspace.writer)
 362         {
 363           struct thing *thing = xmalloc (sizeof *thing);
 364           struct casereader *r = casewriter_make_reader (workspace.writer);
 365           workspace.writer = NULL;
 366           workspace.reader = r;
 367           thing->ws = xmalloc (sizeof (workspace));
 368           memcpy (thing->ws, &workspace, sizeof (workspace));
 369           thing->n_dep_vars = regression.n_dep_vars;
 370
 371           add_transformation (ds, transX, NULL, thing);
 372         }
 373     }
 374
 375
 376   free (regression.vars);
 377   free (regression.dep_vars);
 378   return CMD_SUCCESS;
 379
 380 error:
 381
 382   free (regression.vars);
 383   free (regression.dep_vars);
 384   return CMD_FAILURE;
 385 }
 386
 387
 388 static size_t
 389 get_n_all_vars (const struct regression *cmd)
 390 {
 391   size_t result = cmd->n_vars;
 392   size_t i;
 393   size_t j;
 394
 395   result += cmd->n_dep_vars;
 396   for (i = 0; i < cmd->n_dep_vars; i++)
 397     {
 398       for (j = 0; j < cmd->n_vars; j++)
 399         {
 400           if (cmd->vars[j] == cmd->dep_vars[i])
 401             {
 402               result--;
 403             }
 404         }
 405     }
 406   return result;
 407 }
 408
 409 static void
 410 fill_all_vars (const struct variable **vars, const struct regression *cmd)
 411 {
 412   size_t i;
 413   size_t j;
 414   bool absent;
 415
 416   for (i = 0; i < cmd->n_vars; i++)
 417     {
 418       vars[i] = cmd->vars[i];
 419     }
 420   for (i = 0; i < cmd->n_dep_vars; i++)
 421     {
 422       absent = true;
 423       for (j = 0; j < cmd->n_vars; j++)
 424         {
 425           if (cmd->dep_vars[i] == cmd->vars[j])
 426             {
 427               absent = false;
 428               break;
 429             }
 430         }
 431       if (absent)
 432         {
 433           vars[i + cmd->n_vars] = cmd->dep_vars[i];
 434         }
 435     }
 436 }
 437
 438 /*
 439   Is variable k the dependent variable?
 440 */
 441 static bool
 442 is_depvar (const struct regression *cmd, size_t k, const struct variable *v)
 443 {
 444   return v == cmd->vars[k];
 445 }
 446
 447
 448 /* Identify the explanatory variables in v_variables.  Returns
 449    the number of independent variables. */
 450 static int
 451 identify_indep_vars (const struct regression *cmd,
 452                      const struct variable **indep_vars,
 453                      const struct variable *depvar)
 454 {
 455   int n_indep_vars = 0;
 456   int i;
 457
 458   for (i = 0; i < cmd->n_vars; i++)
 459     if (!is_depvar (cmd, i, depvar))
 460       indep_vars[n_indep_vars++] = cmd->vars[i];
 461   if ((n_indep_vars < 1) && is_depvar (cmd, 0, depvar))
 462     {
 463       /*
 464          There is only one independent variable, and it is the same
 465          as the dependent variable. Print a warning and continue.
 466        */
 467       msg (SW,
 468            gettext
 469            ("The dependent variable is equal to the independent variable."
 470             "The least squares line is therefore Y=X."
 471             "Standard errors and related statistics may be meaningless."));
 472       n_indep_vars = 1;
 473       indep_vars[0] = cmd->vars[0];
 474     }
 475   return n_indep_vars;
 476 }
 477
 478
 479 static double
 480 fill_covariance (gsl_matrix * cov, struct covariance *all_cov,
 481                  const struct variable **vars,
 482                  size_t n_vars, const struct variable *dep_var,
 483                  const struct variable **all_vars, size_t n_all_vars,
 484                  double *means)
 485 {
 486   size_t i;
 487   size_t j;
 488   size_t dep_subscript;
 489   size_t *rows;
 490   const gsl_matrix *ssizes;
 491   const gsl_matrix *mean_matrix;
 492   const gsl_matrix *ssize_matrix;
 493   double result = 0.0;
 494
 495   gsl_matrix *cm = covariance_calculate_unnormalized (all_cov);
 496
 497   if (cm == NULL)
 498     return 0;
 499
 500   rows = xnmalloc (cov->size1 - 1, sizeof (*rows));
 501
 502   for (i = 0; i < n_all_vars; i++)
 503     {
 504       for (j = 0; j < n_vars; j++)
 505         {
 506           if (vars[j] == all_vars[i])
 507             {
 508               rows[j] = i;
 509             }
 510         }
 511       if (all_vars[i] == dep_var)
 512         {
 513           dep_subscript = i;
 514         }
 515     }
 516   mean_matrix = covariance_moments (all_cov, MOMENT_MEAN);
 517   ssize_matrix = covariance_moments (all_cov, MOMENT_NONE);
 518   for (i = 0; i < cov->size1 - 1; i++)
 519     {
 520       means[i] = gsl_matrix_get (mean_matrix, rows[i], 0)
 521         / gsl_matrix_get (ssize_matrix, rows[i], 0);
 522       for (j = 0; j < cov->size2 - 1; j++)
 523         {
 524           gsl_matrix_set (cov, i, j, gsl_matrix_get (cm, rows[i], rows[j]));
 525           gsl_matrix_set (cov, j, i, gsl_matrix_get (cm, rows[j], rows[i]));
 526         }
 527     }
 528   means[cov->size1 - 1] = gsl_matrix_get (mean_matrix, dep_subscript, 0)
 529     / gsl_matrix_get (ssize_matrix, dep_subscript, 0);
 530   ssizes = covariance_moments (all_cov, MOMENT_NONE);
 531   result = gsl_matrix_get (ssizes, dep_subscript, rows[0]);
 532   for (i = 0; i < cov->size1 - 1; i++)
 533     {
 534       gsl_matrix_set (cov, i, cov->size1 - 1,
 535                       gsl_matrix_get (cm, rows[i], dep_subscript));
 536       gsl_matrix_set (cov, cov->size1 - 1, i,
 537                       gsl_matrix_get (cm, rows[i], dep_subscript));
 538       if (result > gsl_matrix_get (ssizes, rows[i], dep_subscript))
 539         {
 540           result = gsl_matrix_get (ssizes, rows[i], dep_subscript);
 541         }
 542     }
 543   gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1,
 544                   gsl_matrix_get (cm, dep_subscript, dep_subscript));
 545   free (rows);
 546   gsl_matrix_free (cm);
 547   return result;
 548 }
 549
 550
 551 /*
 552   STATISTICS subcommand output functions.
 553 */
 554 static void reg_stats_r (linreg *, void *, const struct variable *);
 555 static void reg_stats_coeff (linreg *, void *, const struct variable *);
 556 static void reg_stats_anova (linreg *, void *, const struct variable *);
 557 static void reg_stats_bcov (linreg *, void *, const struct variable *);
 558
 559 static void
 560 statistics_keyword_output (void (*)
 561                            (linreg *, void *, const struct variable *), bool,
 562                            linreg *, void *, const struct variable *);
 563
 564
 565
 566 static void
 567 subcommand_statistics (const struct regression *cmd, linreg * c, void *aux,
 568                        const struct variable *var)
 569 {
 570   statistics_keyword_output (reg_stats_r, cmd->r, c, aux, var);
 571   statistics_keyword_output (reg_stats_anova, cmd->anova, c, aux, var);
 572   statistics_keyword_output (reg_stats_coeff, cmd->coeff, c, aux, var);
 573   statistics_keyword_output (reg_stats_bcov, cmd->bcov, c, aux, var);
 574 }
 575
 576
 577 static void
 578 run_regression (const struct regression *cmd,
 579                 struct per_split_ws *psw,
 580                 struct regression_workspace *ws,
 581                 struct casereader *input)
 582 {
 583   size_t i;
 584
 585   int k;
 586   struct ccase *c;
 587   struct covariance *cov;
 588   struct casereader *reader;
 589   size_t n_all_vars = get_n_all_vars (cmd);
 590   const struct variable **all_vars = xnmalloc (n_all_vars, sizeof (*all_vars));
 591
 592   double *means = xnmalloc (n_all_vars, sizeof (*means));
 593
 594   fill_all_vars (all_vars, cmd);
 595   cov = covariance_1pass_create (n_all_vars, all_vars,
 596                                  dict_get_weight (dataset_dict (cmd->ds)),
 597                                  MV_ANY);
 598
 599   reader = casereader_clone (input);
 600   reader = casereader_create_filter_missing (reader, all_vars, n_all_vars,
 601                                              MV_ANY, NULL, NULL);
 602
 603
 604   {
 605     struct casereader *r = casereader_clone (reader);
 606
 607     for (; (c = casereader_read (r)) != NULL; case_unref (c))
 608       {
 609         covariance_accumulate (cov, c);
 610       }
 611     casereader_destroy (r);
 612   }
 613
 614   psw->models = xcalloc (cmd->n_dep_vars, sizeof (*psw->models));
 615   for (k = 0; k < cmd->n_dep_vars; k++)
 616     {
 617
 618       const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars));
 619       const struct variable *dep_var = cmd->dep_vars[k];
 620       int n_indep = identify_indep_vars (cmd, vars, dep_var);
 621       gsl_matrix *this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1);
 622       double n_data = fill_covariance (this_cm, cov, vars, n_indep,
 623                                 dep_var, all_vars, n_all_vars, means);
 624       psw->models[k] = linreg_alloc (dep_var, vars,  n_data, n_indep);
 625       psw->models[k]->depvar = dep_var;
 626       for (i = 0; i < n_indep; i++)
 627         {
 628           linreg_set_indep_variable_mean (psw->models[k], i, means[i]);
 629         }
 630       linreg_set_depvar_mean (psw->models[k], means[i]);
 631       /*
 632          For large data sets, use QR decomposition.
 633        */
 634       if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
 635         {
 636           psw->models[k]->method = LINREG_QR;
 637         }
 638
 639       if (n_data > 0)
 640         {
 641           /*
 642              Find the least-squares estimates and other statistics.
 643            */
 644           linreg_fit (this_cm, psw->models[k]);
 645
 646           if (!taint_has_tainted_successor (casereader_get_taint (input)))
 647             {
 648               subcommand_statistics (cmd, psw->models[k], this_cm, dep_var);
 649             }
 650         }
 651       else
 652         {
 653           msg (SE, _("No valid data found. This command was skipped."));
 654         }
 655       gsl_matrix_free (this_cm);
 656       free (vars);
 657     }
 658
 659
 660   if (ws->extras > 0)
 661    {
 662       struct casereader *r = casereader_clone (reader);
 663
 664       for (; (c = casereader_read (r)) != NULL; case_unref (c))
 665         {
 666           struct ccase *outc = case_clone (c);
 667           for (k = 0; k < cmd->n_dep_vars; k++)
 668             {
 669               const struct variable **vars = xnmalloc (cmd->n_vars, sizeof (*vars));
 670               const struct variable *dep_var = cmd->dep_vars[k];
 671               int n_indep = identify_indep_vars (cmd, vars, dep_var);
 672               double *vals = xnmalloc (n_indep, sizeof (*vals));
 673               for (i = 0; i < n_indep; i++)
 674                 {
 675                   const union value *tmp = case_data (c, vars[i]);
 676                   vals[i] = tmp->f;
 677                 }
 678
 679               if (cmd->pred)
 680                 {
 681                   double pred = linreg_predict (psw->models[k], vals, n_indep);
 682                   case_data_rw_idx (outc, k * ws->extras + ws->pred_idx)->f = pred;
 683                 }
 684
 685               if (cmd->resid)
 686                 {
 687                   double obs = case_data (c, psw->models[k]->depvar)->f;
 688                   double res = linreg_residual (psw->models[k], obs,  vals, n_indep);
 689                   case_data_rw_idx (outc, k * ws->extras + ws->res_idx)->f = res;
 690                 }
 691             }
 692           casewriter_write (ws->writer, outc);
 693         }
 694       casereader_destroy (r);
 695     }
 696
 697   casereader_destroy (reader);
 698
 699
 700   free (all_vars);
 701   free (means);
 702   casereader_destroy (input);
 703   covariance_destroy (cov);
 704 }
 705 \f
 706
 707
 708
 709
 710 static void
 711 reg_stats_r (linreg * c, void *aux UNUSED, const struct variable *var)
 712 {
 713   struct tab_table *t;
 714   int n_rows = 2;
 715   int n_cols = 5;
 716   double rsq;
 717   double adjrsq;
 718   double std_error;
 719
 720   assert (c != NULL);
 721   rsq = linreg_ssreg (c) / linreg_sst (c);
 722   adjrsq = rsq -
 723     (1.0 - rsq) * linreg_n_coeffs (c) / (linreg_n_obs (c) -
 724                                          linreg_n_coeffs (c) - 1);
 725   std_error = sqrt (linreg_mse (c));
 726   t = tab_create (n_cols, n_rows);
 727   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
 728   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 729   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 730   tab_vline (t, TAL_0, 1, 0, 0);
 731
 732   tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("R"));
 733   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square"));
 734   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square"));
 735   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate"));
 736   tab_double (t, 1, 1, TAB_RIGHT, sqrt (rsq), NULL);
 737   tab_double (t, 2, 1, TAB_RIGHT, rsq, NULL);
 738   tab_double (t, 3, 1, TAB_RIGHT, adjrsq, NULL);
 739   tab_double (t, 4, 1, TAB_RIGHT, std_error, NULL);
 740   tab_title (t, _("Model Summary (%s)"), var_to_string (var));
 741   tab_submit (t);
 742 }
 743
 744 /*
 745   Table showing estimated regression coefficients.
 746 */
 747 static void
 748 reg_stats_coeff (linreg * c, void *aux_, const struct variable *var)
 749 {
 750   size_t j;
 751   int n_cols = 7;
 752   int n_rows;
 753   int this_row;
 754   double t_stat;
 755   double pval;
 756   double std_err;
 757   double beta;
 758   const char *label;
 759
 760   const struct variable *v;
 761   struct tab_table *t;
 762   gsl_matrix *cov = aux_;
 763
 764   assert (c != NULL);
 765   n_rows = linreg_n_coeffs (c) + 3;
 766
 767   t = tab_create (n_cols, n_rows);
 768   tab_headers (t, 2, 0, 1, 0);
 769   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
 770   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 771   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 772   tab_vline (t, TAL_0, 1, 0, 0);
 773
 774   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("B"));
 775   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
 776   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Beta"));
 777   tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t"));
 778   tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 779   tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(Constant)"));
 780   tab_double (t, 2, 1, 0, linreg_intercept (c), NULL);
 781   std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0));
 782   tab_double (t, 3, 1, 0, std_err, NULL);
 783   tab_double (t, 4, 1, 0, 0.0, NULL);
 784   t_stat = linreg_intercept (c) / std_err;
 785   tab_double (t, 5, 1, 0, t_stat, NULL);
 786   pval =
 787     2 * gsl_cdf_tdist_Q (fabs (t_stat),
 788                          (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
 789   tab_double (t, 6, 1, 0, pval, NULL);
 790   for (j = 0; j < linreg_n_coeffs (c); j++)
 791     {
 792       struct string tstr;
 793       ds_init_empty (&tstr);
 794       this_row = j + 2;
 795
 796       v = linreg_indep_var (c, j);
 797       label = var_to_string (v);
 798       /* Do not overwrite the variable's name. */
 799       ds_put_cstr (&tstr, label);
 800       tab_text (t, 1, this_row, TAB_CENTER, ds_cstr (&tstr));
 801       /*
 802          Regression coefficients.
 803        */
 804       tab_double (t, 2, this_row, 0, linreg_coeff (c, j), NULL);
 805       /*
 806          Standard error of the coefficients.
 807        */
 808       std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1));
 809       tab_double (t, 3, this_row, 0, std_err, NULL);
 810       /*
 811          Standardized coefficient, i.e., regression coefficient
 812          if all variables had unit variance.
 813        */
 814       beta = sqrt (gsl_matrix_get (cov, j, j));
 815       beta *= linreg_coeff (c, j) /
 816         sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1));
 817       tab_double (t, 4, this_row, 0, beta, NULL);
 818
 819       /*
 820          Test statistic for H0: coefficient is 0.
 821        */
 822       t_stat = linreg_coeff (c, j) / std_err;
 823       tab_double (t, 5, this_row, 0, t_stat, NULL);
 824       /*
 825          P values for the test statistic above.
 826        */
 827       pval =
 828         2 * gsl_cdf_tdist_Q (fabs (t_stat),
 829                              (double) (linreg_n_obs (c) -
 830                                        linreg_n_coeffs (c) - 1));
 831       tab_double (t, 6, this_row, 0, pval, NULL);
 832       ds_destroy (&tstr);
 833     }
 834   tab_title (t, _("Coefficients (%s)"), var_to_string (var));
 835   tab_submit (t);
 836 }
 837
 838 /*
 839   Display the ANOVA table.
 840 */
 841 static void
 842 reg_stats_anova (linreg * c, void *aux UNUSED, const struct variable *var)
 843 {
 844   int n_cols = 7;
 845   int n_rows = 4;
 846   const double msm = linreg_ssreg (c) / linreg_dfmodel (c);
 847   const double mse = linreg_mse (c);
 848   const double F = msm / mse;
 849   const double pval = gsl_cdf_fdist_Q (F, c->dfm, c->dfe);
 850
 851   struct tab_table *t;
 852
 853   assert (c != NULL);
 854   t = tab_create (n_cols, n_rows);
 855   tab_headers (t, 2, 0, 1, 0);
 856
 857   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
 858
 859   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 860   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 861   tab_vline (t, TAL_0, 1, 0, 0);
 862
 863   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
 864   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
 865   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
 866   tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
 867   tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 868
 869   tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Regression"));
 870   tab_text (t, 1, 2, TAB_LEFT | TAT_TITLE, _("Residual"));
 871   tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total"));
 872
 873   /* Sums of Squares */
 874   tab_double (t, 2, 1, 0, linreg_ssreg (c), NULL);
 875   tab_double (t, 2, 3, 0, linreg_sst (c), NULL);
 876   tab_double (t, 2, 2, 0, linreg_sse (c), NULL);
 877
 878
 879   /* Degrees of freedom */
 880   tab_text_format (t, 3, 1, TAB_RIGHT, "%g", c->dfm);
 881   tab_text_format (t, 3, 2, TAB_RIGHT, "%g", c->dfe);
 882   tab_text_format (t, 3, 3, TAB_RIGHT, "%g", c->dft);
 883
 884   /* Mean Squares */
 885   tab_double (t, 4, 1, TAB_RIGHT, msm, NULL);
 886   tab_double (t, 4, 2, TAB_RIGHT, mse, NULL);
 887
 888   tab_double (t, 5, 1, 0, F, NULL);
 889
 890   tab_double (t, 6, 1, 0, pval, NULL);
 891
 892   tab_title (t, _("ANOVA (%s)"), var_to_string (var));
 893   tab_submit (t);
 894 }
 895
 896
 897 static void
 898 reg_stats_bcov (linreg * c, void *aux UNUSED, const struct variable *var)
 899 {
 900   int n_cols;
 901   int n_rows;
 902   int i;
 903   int k;
 904   int row;
 905   int col;
 906   const char *label;
 907   struct tab_table *t;
 908
 909   assert (c != NULL);
 910   n_cols = c->n_indeps + 1 + 2;
 911   n_rows = 2 * (c->n_indeps + 1);
 912   t = tab_create (n_cols, n_rows);
 913   tab_headers (t, 2, 0, 1, 0);
 914   tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
 915   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 916   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 917   tab_vline (t, TAL_0, 1, 0, 0);
 918   tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Model"));
 919   tab_text (t, 1, 1, TAB_CENTER | TAT_TITLE, _("Covariances"));
 920   for (i = 0; i < linreg_n_coeffs (c); i++)
 921     {
 922       const struct variable *v = linreg_indep_var (c, i);
 923       label = var_to_string (v);
 924       tab_text (t, 2, i, TAB_CENTER, label);
 925       tab_text (t, i + 2, 0, TAB_CENTER, label);
 926       for (k = 1; k < linreg_n_coeffs (c); k++)
 927         {
 928           col = (i <= k) ? k : i;
 929           row = (i <= k) ? i : k;
 930           tab_double (t, k + 2, i, TAB_CENTER,
 931                       gsl_matrix_get (c->cov, row, col), NULL);
 932         }
 933     }
 934   tab_title (t, _("Coefficient Correlations (%s)"), var_to_string (var));
 935   tab_submit (t);
 936 }
 937
 938 static void
 939 statistics_keyword_output (void (*function)
 940                            (linreg *, void *, const struct variable * var),
 941                            bool keyword, linreg * c, void *aux,
 942                            const struct variable *var)
 943 {
 944   if (keyword)
 945     {
 946       (*function) (c, aux, var);
 947     }
 948 }