pintos-os.org Git - pspp/blob - src/language/stats/oneway.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include <gsl/gsl_cdf.h>
  20 #include <gsl/gsl_matrix.h>
  21 #include <math.h>
  22
  23 #include "data/case.h"
  24 #include "data/casegrouper.h"
  25 #include "data/casereader.h"
  26 #include "data/dataset.h"
  27 #include "data/dictionary.h"
  28 #include "data/format.h"
  29 #include "data/value.h"
  30 #include "language/command.h"
  31 #include "language/dictionary/split-file.h"
  32 #include "language/lexer/lexer.h"
  33 #include "language/lexer/value-parser.h"
  34 #include "language/lexer/variable-parser.h"
  35 #include "libpspp/ll.h"
  36 #include "libpspp/message.h"
  37 #include "libpspp/misc.h"
  38 #include "libpspp/taint.h"
  39 #include "linreg/sweep.h"
  40 #include "math/categoricals.h"
  41 #include "math/covariance.h"
  42 #include "math/levene.h"
  43 #include "math/moments.h"
  44 #include "output/tab.h"
  45
  46 #include "gettext.h"
  47 #define _(msgid) gettext (msgid)
  48
  49
  50 enum missing_type
  51   {
  52     MISS_LISTWISE,
  53     MISS_ANALYSIS,
  54   };
  55
  56 enum statistics
  57   {
  58     STATS_DESCRIPTIVES = 0x0001,
  59     STATS_HOMOGENEITY = 0x0002
  60   };
  61
  62 struct coeff_node
  63 {
  64   struct ll ll;
  65   double coeff;
  66 };
  67
  68
  69 struct contrasts_node
  70 {
  71   struct ll ll;
  72   struct ll_list coefficient_list;
  73
  74   bool bad_count; /* True if the number of coefficients does not equal the number of groups */
  75 };
  76
  77 struct oneway_spec
  78 {
  79   size_t n_vars;
  80   const struct variable **vars;
  81
  82   const struct variable *indep_var;
  83
  84   enum statistics stats;
  85
  86   enum missing_type missing_type;
  87   enum mv_class exclude;
  88
  89   /* List of contrasts */
  90   struct ll_list contrast_list;
  91
  92   /* The weight variable */
  93   const struct variable *wv;
  94
  95 };
  96
  97 /* Per category data */
  98 struct descriptive_data
  99 {
 100   const struct variable *var;
 101   struct moments1 *mom;
 102
 103   double minimum;
 104   double maximum;
 105 };
 106
 107 /* Workspace variable for each dependent variable */
 108 struct per_var_ws
 109 {
 110   struct categoricals *cat;
 111   struct covariance *cov;
 112
 113   double sst;
 114   double sse;
 115   double ssa;
 116
 117   int n_groups;
 118
 119   double mse;
 120   double levene_w;
 121 };
 122
 123 struct oneway_workspace
 124 {
 125   /* The number of distinct values of the independent variable, when all
 126      missing values are disregarded */
 127   int actual_number_of_groups;
 128
 129   struct per_var_ws *vws;
 130
 131   /* An array of descriptive data.  One for each dependent variable */
 132   struct descriptive_data **dd_total;
 133 };
 134
 135 /* Routines to show the output tables */
 136 static void show_anova_table (const struct oneway_spec *, const struct oneway_workspace *);
 137 static void show_descriptives (const struct oneway_spec *, const struct oneway_workspace *);
 138 static void show_homogeneity (const struct oneway_spec *, const struct oneway_workspace *);
 139
 140 static void output_oneway (const struct oneway_spec *, struct oneway_workspace *ws);
 141 static void run_oneway (const struct oneway_spec *cmd, struct casereader *input, const struct dataset *ds);
 142
 143 int
 144 cmd_oneway (struct lexer *lexer, struct dataset *ds)
 145 {
 146   const struct dictionary *dict = dataset_dict (ds);
 147   struct oneway_spec oneway ;
 148   oneway.n_vars = 0;
 149   oneway.vars = NULL;
 150   oneway.indep_var = NULL;
 151   oneway.stats = 0;
 152   oneway.missing_type = MISS_ANALYSIS;
 153   oneway.exclude = MV_ANY;
 154   oneway.wv = dict_get_weight (dict);
 155
 156   ll_init (&oneway.contrast_list);
 157
 158
 159   if ( lex_match (lexer, T_SLASH))
 160     {
 161       if (!lex_force_match_id (lexer, "VARIABLES"))
 162         {
 163           goto error;
 164         }
 165       lex_match (lexer, T_EQUALS);
 166     }
 167
 168   if (!parse_variables_const (lexer, dict,
 169                               &oneway.vars, &oneway.n_vars,
 170                               PV_NO_DUPLICATE | PV_NUMERIC))
 171     goto error;
 172
 173   lex_force_match (lexer, T_BY);
 174
 175   oneway.indep_var = parse_variable_const (lexer, dict);
 176
 177   while (lex_token (lexer) != T_ENDCMD)
 178     {
 179       lex_match (lexer, T_SLASH);
 180
 181       if (lex_match_id (lexer, "STATISTICS"))
 182         {
 183           lex_match (lexer, T_EQUALS);
 184           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 185             {
 186               if (lex_match_id (lexer, "DESCRIPTIVES"))
 187                 {
 188                   oneway.stats |= STATS_DESCRIPTIVES;
 189                 }
 190               else if (lex_match_id (lexer, "HOMOGENEITY"))
 191                 {
 192                   oneway.stats |= STATS_HOMOGENEITY;
 193                 }
 194               else
 195                 {
 196                   lex_error (lexer, NULL);
 197                   goto error;
 198                 }
 199             }
 200         }
 201       else if (lex_match_id (lexer, "CONTRAST"))
 202         {
 203           struct contrasts_node *cl = xzalloc (sizeof *cl);
 204
 205           struct ll_list *coefficient_list = &cl->coefficient_list;
 206           lex_match (lexer, T_EQUALS);
 207
 208           ll_init (coefficient_list);
 209
 210           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 211             {
 212               if ( lex_is_number (lexer))
 213                 {
 214                   struct coeff_node *cc = xmalloc (sizeof *cc);
 215                   cc->coeff = lex_number (lexer);
 216
 217                   ll_push_tail (coefficient_list, &cc->ll);
 218                   lex_get (lexer);
 219                 }
 220               else
 221                 {
 222                   lex_error (lexer, NULL);
 223                   goto error;
 224                 }
 225             }
 226
 227           ll_push_tail (&oneway.contrast_list, &cl->ll);
 228         }
 229       else if (lex_match_id (lexer, "MISSING"))
 230         {
 231           lex_match (lexer, T_EQUALS);
 232           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 233             {
 234               if (lex_match_id (lexer, "INCLUDE"))
 235                 {
 236                   oneway.exclude = MV_SYSTEM;
 237                 }
 238               else if (lex_match_id (lexer, "EXCLUDE"))
 239                 {
 240                   oneway.exclude = MV_ANY;
 241                 }
 242               else if (lex_match_id (lexer, "LISTWISE"))
 243                 {
 244                   oneway.missing_type = MISS_LISTWISE;
 245                 }
 246               else if (lex_match_id (lexer, "ANALYSIS"))
 247                 {
 248                   oneway.missing_type = MISS_ANALYSIS;
 249                 }
 250               else
 251                 {
 252                   lex_error (lexer, NULL);
 253                   goto error;
 254                 }
 255             }
 256         }
 257       else
 258         {
 259           lex_error (lexer, NULL);
 260           goto error;
 261         }
 262     }
 263
 264
 265   {
 266     struct casegrouper *grouper;
 267     struct casereader *group;
 268     bool ok;
 269
 270     grouper = casegrouper_create_splits (proc_open (ds), dict);
 271     while (casegrouper_get_next_group (grouper, &group))
 272       run_oneway (&oneway, group, ds);
 273     ok = casegrouper_destroy (grouper);
 274     ok = proc_commit (ds) && ok;
 275   }
 276
 277   free (oneway.vars);
 278   return CMD_SUCCESS;
 279
 280  error:
 281   free (oneway.vars);
 282   return CMD_FAILURE;
 283 }
 284
 285
 286 \f
 287
 288
 289 static struct descriptive_data *
 290 dd_create (const struct variable *var)
 291 {
 292   struct descriptive_data *dd = xmalloc (sizeof *dd);
 293
 294   dd->mom = moments1_create (MOMENT_VARIANCE);
 295   dd->minimum = DBL_MAX;
 296   dd->maximum = -DBL_MAX;
 297   dd->var = var;
 298
 299   return dd;
 300 }
 301
 302 static void
 303 dd_destroy (struct descriptive_data *dd)
 304 {
 305   moments1_destroy (dd->mom);
 306   free (dd);
 307 }
 308
 309 static void *
 310 makeit (void *aux1, void *aux2 UNUSED)
 311 {
 312   const struct variable *var = aux1;
 313
 314   struct descriptive_data *dd = dd_create (var);
 315
 316   return dd;
 317 }
 318
 319 static void
 320 updateit (void *user_data,
 321           enum mv_class exclude,
 322           const struct variable *wv,
 323           const struct variable *catvar UNUSED,
 324           const struct ccase *c,
 325           void *aux1, void *aux2)
 326 {
 327   struct descriptive_data *dd = user_data;
 328
 329   const struct variable *varp = aux1;
 330
 331   const union value *valx = case_data (c, varp);
 332
 333   struct descriptive_data *dd_total = aux2;
 334
 335   double weight;
 336
 337   if ( var_is_value_missing (varp, valx, exclude))
 338     return;
 339
 340   weight = wv != NULL ? case_data (c, wv)->f : 1.0;
 341
 342   moments1_add (dd->mom, valx->f, weight);
 343   if (valx->f < dd->minimum)
 344     dd->minimum = valx->f;
 345
 346   if (valx->f > dd->maximum)
 347     dd->maximum = valx->f;
 348
 349   {
 350     const struct variable *var = dd_total->var;
 351     const union value *val = case_data (c, var);
 352
 353     moments1_add (dd_total->mom,
 354                   val->f,
 355                   weight);
 356
 357     if (val->f < dd_total->minimum)
 358       dd_total->minimum = val->f;
 359
 360     if (val->f > dd_total->maximum)
 361       dd_total->maximum = val->f;
 362   }
 363 }
 364
 365 static void
 366 run_oneway (const struct oneway_spec *cmd,
 367             struct casereader *input,
 368             const struct dataset *ds)
 369 {
 370   int v;
 371   struct taint *taint;
 372   struct dictionary *dict = dataset_dict (ds);
 373   struct casereader *reader;
 374   struct ccase *c;
 375
 376   struct oneway_workspace ws;
 377
 378   ws.actual_number_of_groups = 0;
 379   ws.vws = xzalloc (cmd->n_vars * sizeof (*ws.vws));
 380   ws.dd_total = xmalloc (sizeof (struct descriptive_data) * cmd->n_vars);
 381
 382   for (v = 0 ; v < cmd->n_vars; ++v)
 383     ws.dd_total[v] = dd_create (cmd->vars[v]);
 384
 385   for (v = 0; v < cmd->n_vars; ++v)
 386     {
 387       ws.vws[v].cat = categoricals_create (&cmd->indep_var, 1, cmd->wv,
 388                                            cmd->exclude, makeit, updateit,
 389                                            CONST_CAST (struct variable *,
 390                                                        cmd->vars[v]),
 391                                            ws.dd_total[v]);
 392
 393       ws.vws[v].cov = covariance_2pass_create (1, &cmd->vars[v],
 394                                                ws.vws[v].cat,
 395                                                cmd->wv, cmd->exclude);
 396     }
 397
 398   c = casereader_peek (input, 0);
 399   if (c == NULL)
 400     {
 401       casereader_destroy (input);
 402       goto finish;
 403     }
 404   output_split_file_values (ds, c);
 405   case_unref (c);
 406
 407   taint = taint_clone (casereader_get_taint (input));
 408
 409   input = casereader_create_filter_missing (input, &cmd->indep_var, 1,
 410                                             cmd->exclude, NULL, NULL);
 411   if (cmd->missing_type == MISS_LISTWISE)
 412     input = casereader_create_filter_missing (input, cmd->vars, cmd->n_vars,
 413                                               cmd->exclude, NULL, NULL);
 414   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 415
 416
 417   if (cmd->stats & STATS_HOMOGENEITY)
 418     for (v = 0; v < cmd->n_vars; ++v)
 419       {
 420         struct per_var_ws *pvw = &ws.vws[v];
 421
 422         pvw->levene_w = levene (input, cmd->indep_var, cmd->vars[v], cmd->wv, cmd->exclude);
 423       }
 424
 425   reader = casereader_clone (input);
 426
 427   for (; (c = casereader_read (reader)) != NULL; case_unref (c))
 428     {
 429       int i;
 430
 431       for (i = 0; i < cmd->n_vars; ++i)
 432         {
 433           struct per_var_ws *pvw = &ws.vws[i];
 434           const struct variable *v = cmd->vars[i];
 435           const union value *val = case_data (c, v);
 436
 437           if ( MISS_ANALYSIS == cmd->missing_type)
 438             {
 439               if ( var_is_value_missing (v, val, cmd->exclude))
 440                 continue;
 441             }
 442
 443           covariance_accumulate_pass1 (pvw->cov, c);
 444         }
 445     }
 446   casereader_destroy (reader);
 447   reader = casereader_clone (input);
 448   for ( ; (c = casereader_read (reader) ); case_unref (c))
 449     {
 450       int i;
 451       for (i = 0; i < cmd->n_vars; ++i)
 452         {
 453           struct per_var_ws *pvw = &ws.vws[i];
 454           const struct variable *v = cmd->vars[i];
 455           const union value *val = case_data (c, v);
 456
 457           if ( MISS_ANALYSIS == cmd->missing_type)
 458             {
 459               if ( var_is_value_missing (v, val, cmd->exclude))
 460                 continue;
 461             }
 462
 463           covariance_accumulate_pass2 (pvw->cov, c);
 464         }
 465     }
 466   casereader_destroy (reader);
 467
 468   for (v = 0; v < cmd->n_vars; ++v)
 469     {
 470       struct per_var_ws *pvw = &ws.vws[v];
 471       gsl_matrix *cm = covariance_calculate_unnormalized (pvw->cov);
 472       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
 473
 474       double n;
 475       moments1_calculate (ws.dd_total[v]->mom, &n, NULL, NULL, NULL, NULL);
 476
 477       pvw->sst = gsl_matrix_get (cm, 0, 0);
 478
 479       //      gsl_matrix_fprintf (stdout, cm, "%g ");
 480
 481       reg_sweep (cm, 0);
 482
 483       pvw->sse = gsl_matrix_get (cm, 0, 0);
 484
 485       pvw->ssa = pvw->sst - pvw->sse;
 486
 487       pvw->n_groups = categoricals_total (cats);
 488
 489       pvw->mse = (pvw->sst - pvw->ssa) / (n - pvw->n_groups);
 490
 491       gsl_matrix_free (cm);
 492     }
 493
 494   for (v = 0; v < cmd->n_vars; ++v)
 495     {
 496       const struct categoricals *cats = covariance_get_categoricals (ws.vws[v].cov);
 497
 498       categoricals_done (cats);
 499
 500       if (categoricals_total (cats) > ws.actual_number_of_groups)
 501         ws.actual_number_of_groups = categoricals_total (cats);
 502     }
 503
 504   casereader_destroy (input);
 505
 506   if (!taint_has_tainted_successor (taint))
 507     output_oneway (cmd, &ws);
 508
 509   taint_destroy (taint);
 510
 511  finish:
 512   for (v = 0; v < cmd->n_vars; ++v)
 513     {
 514       covariance_destroy (ws.vws[v].cov);
 515       dd_destroy (ws.dd_total[v]);
 516     }
 517   free (ws.vws);
 518   free (ws.dd_total);
 519
 520 }
 521
 522 static void show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws);
 523 static void show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspace *ws);
 524
 525 static void
 526 output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws)
 527 {
 528   size_t i = 0;
 529
 530   /* Check the sanity of the given contrast values */
 531   struct contrasts_node *coeff_list  = NULL;
 532   ll_for_each (coeff_list, struct contrasts_node, ll, &cmd->contrast_list)
 533     {
 534       struct coeff_node *cn = NULL;
 535       double sum = 0;
 536       struct ll_list *cl = &coeff_list->coefficient_list;
 537       ++i;
 538
 539       if (ll_count (cl) != ws->actual_number_of_groups)
 540         {
 541           msg (SW,
 542                _("Number of contrast coefficients must equal the number of groups"));
 543           coeff_list->bad_count = true;
 544           continue;
 545         }
 546
 547       ll_for_each (cn, struct coeff_node, ll, cl)
 548         sum += cn->coeff;
 549
 550       if ( sum != 0.0 )
 551         msg (SW, _("Coefficients for contrast %zu do not total zero"), i);
 552     }
 553
 554   if (cmd->stats & STATS_DESCRIPTIVES)
 555     show_descriptives (cmd, ws);
 556
 557   if (cmd->stats & STATS_HOMOGENEITY)
 558     show_homogeneity (cmd, ws);
 559
 560   show_anova_table (cmd, ws);
 561
 562   if (ll_count (&cmd->contrast_list) > 0)
 563     {
 564       show_contrast_coeffs (cmd, ws);
 565       show_contrast_tests (cmd, ws);
 566     }
 567 }
 568
 569
 570 /* Show the ANOVA table */
 571 static void
 572 show_anova_table (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 573 {
 574   size_t i;
 575   int n_cols =7;
 576   size_t n_rows = cmd->n_vars * 3 + 1;
 577
 578   struct tab_table *t = tab_create (n_cols, n_rows);
 579
 580   tab_headers (t, 2, 0, 1, 0);
 581
 582   tab_box (t,
 583            TAL_2, TAL_2,
 584            -1, TAL_1,
 585            0, 0,
 586            n_cols - 1, n_rows - 1);
 587
 588   tab_hline (t, TAL_2, 0, n_cols - 1, 1 );
 589   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 590   tab_vline (t, TAL_0, 1, 0, 0);
 591
 592   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
 593   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
 594   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
 595   tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
 596   tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 597
 598
 599   for (i = 0; i < cmd->n_vars; ++i)
 600     {
 601       double n;
 602       double df1, df2;
 603       double msa;
 604       const char *s = var_to_string (cmd->vars[i]);
 605       const struct per_var_ws *pvw = &ws->vws[i];
 606
 607       moments1_calculate (ws->dd_total[i]->mom, &n, NULL, NULL, NULL, NULL);
 608
 609       df1 = pvw->n_groups - 1;
 610       df2 = n - pvw->n_groups;
 611       msa = pvw->ssa / df1;
 612
 613       tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s);
 614       tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups"));
 615       tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups"));
 616       tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total"));
 617
 618       if (i > 0)
 619         tab_hline (t, TAL_1, 0, n_cols - 1, i * 3 + 1);
 620
 621
 622       /* Sums of Squares */
 623       tab_double (t, 2, i * 3 + 1, 0, pvw->ssa, NULL);
 624       tab_double (t, 2, i * 3 + 3, 0, pvw->sst, NULL);
 625       tab_double (t, 2, i * 3 + 2, 0, pvw->sse, NULL);
 626
 627
 628       /* Degrees of freedom */
 629       tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0);
 630       tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0);
 631       tab_fixed (t, 3, i * 3 + 3, 0, n - 1, 4, 0);
 632
 633       /* Mean Squares */
 634       tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL);
 635       tab_double (t, 4, i * 3 + 2, TAB_RIGHT, pvw->mse, NULL);
 636
 637       {
 638         const double F = msa / pvw->mse ;
 639
 640         /* The F value */
 641         tab_double (t, 5, i * 3 + 1, 0,  F, NULL);
 642
 643         /* The significance */
 644         tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), NULL);
 645       }
 646     }
 647
 648   tab_title (t, _("ANOVA"));
 649   tab_submit (t);
 650 }
 651
 652
 653 /* Show the descriptives table */
 654 static void
 655 show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 656 {
 657   size_t v;
 658   int n_cols = 10;
 659   struct tab_table *t;
 660   int row;
 661
 662   const double confidence = 0.95;
 663   const double q = (1.0 - confidence) / 2.0;
 664
 665   const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
 666
 667   int n_rows = 2;
 668
 669   for (v = 0; v < cmd->n_vars; ++v)
 670     n_rows += ws->actual_number_of_groups + 1;
 671
 672   t = tab_create (n_cols, n_rows);
 673   tab_headers (t, 2, 0, 2, 0);
 674
 675   /* Put a frame around the entire box, and vertical lines inside */
 676   tab_box (t,
 677            TAL_2, TAL_2,
 678            -1, TAL_1,
 679            0, 0,
 680            n_cols - 1, n_rows - 1);
 681
 682   /* Underline headers */
 683   tab_hline (t, TAL_2, 0, n_cols - 1, 2);
 684   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 685
 686   tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N"));
 687   tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean"));
 688   tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 689   tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Std. Error"));
 690
 691
 692   tab_vline (t, TAL_0, 7, 0, 0);
 693   tab_hline (t, TAL_1, 6, 7, 1);
 694   tab_joint_text_format (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE,
 695                          _("%g%% Confidence Interval for Mean"),
 696                          confidence*100.0);
 697
 698   tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound"));
 699   tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound"));
 700
 701   tab_text (t, 8, 1, TAB_CENTER | TAT_TITLE, _("Minimum"));
 702   tab_text (t, 9, 1, TAB_CENTER | TAT_TITLE, _("Maximum"));
 703
 704   tab_title (t, _("Descriptives"));
 705
 706   row = 2;
 707   for (v = 0; v < cmd->n_vars; ++v)
 708     {
 709       const char *s = var_to_string (cmd->vars[v]);
 710       const struct fmt_spec *fmt = var_get_print_format (cmd->vars[v]);
 711
 712       int count = 0;
 713
 714       struct per_var_ws *pvw = &ws->vws[v];
 715       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
 716
 717       tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s);
 718       if ( v > 0)
 719         tab_hline (t, TAL_1, 0, n_cols - 1, row);
 720
 721       for (count = 0; count < categoricals_total (cats); ++count)
 722         {
 723           double T;
 724           double n, mean, variance;
 725           double std_dev, std_error ;
 726
 727           struct string vstr;
 728
 729           const union value *gval = categoricals_get_value_by_subscript (cats, count);
 730           const struct descriptive_data *dd = categoricals_get_user_data_by_subscript (cats, count);
 731
 732           moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL);
 733
 734           std_dev = sqrt (variance);
 735           std_error = std_dev / sqrt (n) ;
 736
 737           ds_init_empty (&vstr);
 738
 739           var_append_value_name (cmd->indep_var, gval, &vstr);
 740
 741           tab_text (t, 1, row + count,
 742                     TAB_LEFT | TAT_TITLE,
 743                     ds_cstr (&vstr));
 744
 745           ds_destroy (&vstr);
 746
 747           /* Now fill in the numbers ... */
 748
 749           tab_double (t, 2, row + count, 0, n, wfmt);
 750
 751           tab_double (t, 3, row + count, 0, mean, NULL);
 752
 753           tab_double (t, 4, row + count, 0, std_dev, NULL);
 754
 755
 756           tab_double (t, 5, row + count, 0, std_error, NULL);
 757
 758           /* Now the confidence interval */
 759
 760           T = gsl_cdf_tdist_Qinv (q, n - 1);
 761
 762           tab_double (t, 6, row + count, 0,
 763                       mean - T * std_error, NULL);
 764
 765           tab_double (t, 7, row + count, 0,
 766                       mean + T * std_error, NULL);
 767
 768           /* Min and Max */
 769
 770           tab_double (t, 8, row + count, 0,  dd->minimum, fmt);
 771           tab_double (t, 9, row + count, 0,  dd->maximum, fmt);
 772         }
 773
 774       {
 775         double T;
 776         double n, mean, variance;
 777         double std_dev;
 778         double std_error;
 779
 780         moments1_calculate (ws->dd_total[v]->mom, &n, &mean, &variance, NULL, NULL);
 781
 782         std_dev = sqrt (variance);
 783         std_error = std_dev / sqrt (n) ;
 784
 785         tab_text (t, 1, row + count,
 786                   TAB_LEFT | TAT_TITLE, _("Total"));
 787
 788         tab_double (t, 2, row + count, 0, n, wfmt);
 789
 790         tab_double (t, 3, row + count, 0, mean, NULL);
 791
 792         tab_double (t, 4, row + count, 0, std_dev, NULL);
 793
 794         tab_double (t, 5, row + count, 0, std_error, NULL);
 795
 796         /* Now the confidence interval */
 797         T = gsl_cdf_tdist_Qinv (q, n - 1);
 798
 799         tab_double (t, 6, row + count, 0,
 800                     mean - T * std_error, NULL);
 801
 802         tab_double (t, 7, row + count, 0,
 803                     mean + T * std_error, NULL);
 804
 805         /* Min and Max */
 806         tab_double (t, 8, row + count, 0,  ws->dd_total[v]->minimum, fmt);
 807         tab_double (t, 9, row + count, 0,  ws->dd_total[v]->maximum, fmt);
 808       }
 809
 810       row += categoricals_total (cats) + 1;
 811     }
 812
 813   tab_submit (t);
 814 }
 815
 816 /* Show the homogeneity table */
 817 static void
 818 show_homogeneity (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 819 {
 820   size_t v;
 821   int n_cols = 5;
 822   size_t n_rows = cmd->n_vars + 1;
 823
 824   struct tab_table *t = tab_create (n_cols, n_rows);
 825   tab_headers (t, 1, 0, 1, 0);
 826
 827   /* Put a frame around the entire box, and vertical lines inside */
 828   tab_box (t,
 829            TAL_2, TAL_2,
 830            -1, TAL_1,
 831            0, 0,
 832            n_cols - 1, n_rows - 1);
 833
 834
 835   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 836   tab_vline (t, TAL_2, 1, 0, n_rows - 1);
 837
 838   tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic"));
 839   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1"));
 840   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2"));
 841   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 842
 843   tab_title (t, _("Test of Homogeneity of Variances"));
 844
 845   for (v = 0; v < cmd->n_vars; ++v)
 846     {
 847       double n;
 848       const struct per_var_ws *pvw = &ws->vws[v];
 849       double F = pvw->levene_w;
 850
 851       const struct variable *var = cmd->vars[v];
 852       const char *s = var_to_string (var);
 853       double df1, df2;
 854
 855       moments1_calculate (ws->dd_total[v]->mom, &n, NULL, NULL, NULL, NULL);
 856
 857       df1 = pvw->n_groups - 1;
 858       df2 = n - pvw->n_groups;
 859
 860       tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s);
 861
 862       tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL);
 863       tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0);
 864       tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0);
 865
 866       /* Now the significance */
 867       tab_double (t, 4, v + 1, TAB_RIGHT, gsl_cdf_fdist_Q (F, df1, df2), NULL);
 868     }
 869
 870   tab_submit (t);
 871 }
 872
 873
 874 /* Show the contrast coefficients table */
 875 static void
 876 show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 877 {
 878   int c_num = 0;
 879   struct ll *cli;
 880
 881   int n_contrasts = ll_count (&cmd->contrast_list);
 882   int n_cols = 2 + ws->actual_number_of_groups;
 883   int n_rows = 2 + n_contrasts;
 884
 885   struct tab_table *t;
 886
 887   const struct covariance *cov = ws->vws[0].cov ;
 888
 889   t = tab_create (n_cols, n_rows);
 890   tab_headers (t, 2, 0, 2, 0);
 891
 892   /* Put a frame around the entire box, and vertical lines inside */
 893   tab_box (t,
 894            TAL_2, TAL_2,
 895            -1, TAL_1,
 896            0, 0,
 897            n_cols - 1, n_rows - 1);
 898
 899   tab_box (t,
 900            -1, -1,
 901            TAL_0, TAL_0,
 902            2, 0,
 903            n_cols - 1, 0);
 904
 905   tab_box (t,
 906            -1, -1,
 907            TAL_0, TAL_0,
 908            0, 0,
 909            1, 1);
 910
 911   tab_hline (t, TAL_1, 2, n_cols - 1, 1);
 912   tab_hline (t, TAL_2, 0, n_cols - 1, 2);
 913
 914   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 915
 916   tab_title (t, _("Contrast Coefficients"));
 917
 918   tab_text (t,  0, 2, TAB_LEFT | TAT_TITLE, _("Contrast"));
 919
 920
 921   tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
 922                   var_to_string (cmd->indep_var));
 923
 924   for ( cli = ll_head (&cmd->contrast_list);
 925         cli != ll_null (&cmd->contrast_list);
 926         cli = ll_next (cli))
 927     {
 928       int count = 0;
 929       struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll);
 930       struct ll *coeffi ;
 931
 932       tab_text_format (t, 1, c_num + 2, TAB_CENTER, "%d", c_num + 1);
 933
 934       for (coeffi = ll_head (&cn->coefficient_list);
 935            coeffi != ll_null (&cn->coefficient_list);
 936            ++count, coeffi = ll_next (coeffi))
 937         {
 938           const struct categoricals *cats = covariance_get_categoricals (cov);
 939           const union value *val = categoricals_get_value_by_subscript (cats, count);
 940           struct string vstr;
 941
 942           ds_init_empty (&vstr);
 943
 944           var_append_value_name (cmd->indep_var, val, &vstr);
 945
 946           tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr));
 947
 948           ds_destroy (&vstr);
 949
 950           if (cn->bad_count)
 951             tab_text (t, count + 2, c_num + 2, TAB_RIGHT, "?" );
 952           else
 953             {
 954               struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll);
 955
 956               tab_text_format (t, count + 2, c_num + 2, TAB_RIGHT, "%g", coeffn->coeff);
 957             }
 958         }
 959       ++c_num;
 960     }
 961
 962   tab_submit (t);
 963 }
 964
 965
 966 /* Show the results of the contrast tests */
 967 static void
 968 show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 969 {
 970   int n_contrasts = ll_count (&cmd->contrast_list);
 971   size_t v;
 972   int n_cols = 8;
 973   size_t n_rows = 1 + cmd->n_vars * 2 * n_contrasts;
 974
 975   struct tab_table *t;
 976
 977   t = tab_create (n_cols, n_rows);
 978   tab_headers (t, 3, 0, 1, 0);
 979
 980   /* Put a frame around the entire box, and vertical lines inside */
 981   tab_box (t,
 982            TAL_2, TAL_2,
 983            -1, TAL_1,
 984            0, 0,
 985            n_cols - 1, n_rows - 1);
 986
 987   tab_box (t,
 988            -1, -1,
 989            TAL_0, TAL_0,
 990            0, 0,
 991            2, 0);
 992
 993   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 994   tab_vline (t, TAL_2, 3, 0, n_rows - 1);
 995
 996   tab_title (t, _("Contrast Tests"));
 997
 998   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast"));
 999   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast"));
1000   tab_text (t,  4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1001   tab_text (t,  5, 0, TAB_CENTER | TAT_TITLE, _("t"));
1002   tab_text (t,  6, 0, TAB_CENTER | TAT_TITLE, _("df"));
1003   tab_text (t,  7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
1004
1005   for (v = 0; v < cmd->n_vars; ++v)
1006     {
1007       const struct per_var_ws *pvw = &ws->vws[v];
1008       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
1009       struct ll *cli;
1010       int i = 0;
1011       int lines_per_variable = 2 * n_contrasts;
1012
1013       tab_text (t,  0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE,
1014                 var_to_string (cmd->vars[v]));
1015
1016       for ( cli = ll_head (&cmd->contrast_list);
1017             cli != ll_null (&cmd->contrast_list);
1018             ++i, cli = ll_next (cli))
1019         {
1020           struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll);
1021           struct ll *coeffi ;
1022           int ci = 0;
1023           double contrast_value = 0.0;
1024           double coef_msq = 0.0;
1025
1026           double T;
1027           double std_error_contrast;
1028           double df;
1029           double sec_vneq = 0.0;
1030
1031           /* Note: The calculation of the degrees of freedom in the
1032              "variances not equal" case is painfull!!
1033              The following formula may help to understand it:
1034              \frac{\left (\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2}
1035              {
1036              \sum_{i=1}^k\left (
1037              \frac{\left (c_i^2\frac{s_i^2}{n_i}\right)^2}  {n_i-1}
1038              \right)
1039              }
1040           */
1041
1042           double df_denominator = 0.0;
1043           double df_numerator = 0.0;
1044
1045           double grand_n;
1046           moments1_calculate (ws->dd_total[v]->mom, &grand_n, NULL, NULL, NULL, NULL);
1047           df = grand_n - pvw->n_groups;
1048
1049           if ( i == 0 )
1050             {
1051               tab_text (t,  1, (v * lines_per_variable) + i + 1,
1052                         TAB_LEFT | TAT_TITLE,
1053                         _("Assume equal variances"));
1054
1055               tab_text (t,  1, (v * lines_per_variable) + i + 1 + n_contrasts,
1056                         TAB_LEFT | TAT_TITLE,
1057                         _("Does not assume equal"));
1058             }
1059
1060           tab_text_format (t,  2, (v * lines_per_variable) + i + 1,
1061                            TAB_CENTER | TAT_TITLE, "%d", i + 1);
1062
1063
1064           tab_text_format (t,  2,
1065                            (v * lines_per_variable) + i + 1 + n_contrasts,
1066                            TAB_CENTER | TAT_TITLE, "%d", i + 1);
1067
1068           if (cn->bad_count)
1069             continue;
1070
1071           for (coeffi = ll_head (&cn->coefficient_list);
1072                coeffi != ll_null (&cn->coefficient_list);
1073                ++ci, coeffi = ll_next (coeffi))
1074             {
1075               double n, mean, variance;
1076               const struct descriptive_data *dd = categoricals_get_user_data_by_subscript (cats, ci);
1077               struct coeff_node *cn = ll_data (coeffi, struct coeff_node, ll);
1078               const double coef = cn->coeff;
1079               double winv ;
1080
1081               moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL);
1082
1083               winv = variance / n;
1084
1085               contrast_value += coef * mean;
1086
1087               coef_msq += (pow2 (coef)) / n;
1088
1089               sec_vneq += (pow2 (coef)) * variance / n;
1090
1091               df_numerator += (pow2 (coef)) * winv;
1092               df_denominator += pow2((pow2 (coef)) * winv) / (n - 1);
1093             }
1094
1095           sec_vneq = sqrt (sec_vneq);
1096
1097           df_numerator = pow2 (df_numerator);
1098
1099           tab_double (t,  3, (v * lines_per_variable) + i + 1,
1100                       TAB_RIGHT, contrast_value, NULL);
1101
1102           tab_double (t,  3, (v * lines_per_variable) + i + 1 +
1103                       n_contrasts,
1104                       TAB_RIGHT, contrast_value, NULL);
1105
1106           std_error_contrast = sqrt (pvw->mse * coef_msq);
1107
1108           /* Std. Error */
1109           tab_double (t,  4, (v * lines_per_variable) + i + 1,
1110                       TAB_RIGHT, std_error_contrast,
1111                       NULL);
1112
1113           T = fabs (contrast_value / std_error_contrast);
1114
1115           /* T Statistic */
1116
1117           tab_double (t,  5, (v * lines_per_variable) + i + 1,
1118                       TAB_RIGHT, T,
1119                       NULL);
1120
1121
1122           /* Degrees of Freedom */
1123           tab_fixed (t,  6, (v * lines_per_variable) + i + 1,
1124                      TAB_RIGHT,  df,
1125                      8, 0);
1126
1127
1128           /* Significance TWO TAILED !!*/
1129           tab_double (t,  7, (v * lines_per_variable) + i + 1,
1130                       TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T, df),
1131                       NULL);
1132
1133           /* Now for the Variances NOT Equal case */
1134
1135           /* Std. Error */
1136           tab_double (t,  4,
1137                       (v * lines_per_variable) + i + 1 + n_contrasts,
1138                       TAB_RIGHT, sec_vneq,
1139                       NULL);
1140
1141           T = contrast_value / sec_vneq;
1142           tab_double (t,  5,
1143                       (v * lines_per_variable) + i + 1 + n_contrasts,
1144                       TAB_RIGHT, T,
1145                       NULL);
1146
1147           df = df_numerator / df_denominator;
1148
1149           tab_double (t,  6,
1150                       (v * lines_per_variable) + i + 1 + n_contrasts,
1151                       TAB_RIGHT, df,
1152                       NULL);
1153
1154           /* The Significance */
1155           tab_double (t, 7, (v * lines_per_variable) + i + 1 + n_contrasts,
1156                       TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T,df),
1157                       NULL);
1158         }
1159
1160       if ( v > 0 )
1161         tab_hline (t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
1162     }
1163
1164   tab_submit (t);
1165 }
1166
1167