pintos-os.org Git - pspp/blob - src/language/stats/oneway.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include <gsl/gsl_cdf.h>
  20 #include <gsl/gsl_matrix.h>
  21 #include <math.h>
  22
  23 #include "data/case.h"
  24 #include "data/casegrouper.h"
  25 #include "data/casereader.h"
  26 #include "data/dataset.h"
  27 #include "data/dictionary.h"
  28 #include "data/format.h"
  29 #include "data/value.h"
  30 #include "language/command.h"
  31 #include "language/dictionary/split-file.h"
  32 #include "language/lexer/lexer.h"
  33 #include "language/lexer/value-parser.h"
  34 #include "language/lexer/variable-parser.h"
  35 #include "libpspp/ll.h"
  36 #include "libpspp/message.h"
  37 #include "libpspp/misc.h"
  38 #include "libpspp/taint.h"
  39 #include "linreg/sweep.h"
  40 #include "math/categoricals.h"
  41 #include "math/covariance.h"
  42 #include "math/levene.h"
  43 #include "math/moments.h"
  44 #include "output/tab.h"
  45
  46 #include "gettext.h"
  47 #define _(msgid) gettext (msgid)
  48
  49
  50 enum missing_type
  51   {
  52     MISS_LISTWISE,
  53     MISS_ANALYSIS,
  54   };
  55
  56 enum statistics
  57   {
  58     STATS_DESCRIPTIVES = 0x0001,
  59     STATS_HOMOGENEITY = 0x0002
  60   };
  61
  62 struct coeff_node
  63 {
  64   struct ll ll;
  65   double coeff;
  66 };
  67
  68
  69 struct contrasts_node
  70 {
  71   struct ll ll;
  72   struct ll_list coefficient_list;
  73
  74   bool bad_count; /* True if the number of coefficients does not equal the number of groups */
  75 };
  76
  77 struct oneway_spec
  78 {
  79   size_t n_vars;
  80   const struct variable **vars;
  81
  82   const struct variable *indep_var;
  83
  84   enum statistics stats;
  85
  86   enum missing_type missing_type;
  87   enum mv_class exclude;
  88
  89   /* List of contrasts */
  90   struct ll_list contrast_list;
  91
  92   /* The weight variable */
  93   const struct variable *wv;
  94 };
  95
  96 /* Per category data */
  97 struct descriptive_data
  98 {
  99   const struct variable *var;
 100   struct moments1 *mom;
 101
 102   double minimum;
 103   double maximum;
 104 };
 105
 106 /* Workspace variable for each dependent variable */
 107 struct per_var_ws
 108 {
 109   struct categoricals *cat;
 110   struct covariance *cov;
 111   struct levene *nl;
 112
 113   double sst;
 114   double sse;
 115   double ssa;
 116
 117   int n_groups;
 118
 119   double mse;
 120 };
 121
 122 struct oneway_workspace
 123 {
 124   /* The number of distinct values of the independent variable, when all
 125      missing values are disregarded */
 126   int actual_number_of_groups;
 127
 128   struct per_var_ws *vws;
 129
 130   /* An array of descriptive data.  One for each dependent variable */
 131   struct descriptive_data **dd_total;
 132 };
 133
 134 /* Routines to show the output tables */
 135 static void show_anova_table (const struct oneway_spec *, const struct oneway_workspace *);
 136 static void show_descriptives (const struct oneway_spec *, const struct oneway_workspace *);
 137 static void show_homogeneity (const struct oneway_spec *, const struct oneway_workspace *);
 138
 139 static void output_oneway (const struct oneway_spec *, struct oneway_workspace *ws);
 140 static void run_oneway (const struct oneway_spec *cmd, struct casereader *input, const struct dataset *ds);
 141
 142 int
 143 cmd_oneway (struct lexer *lexer, struct dataset *ds)
 144 {
 145   const struct dictionary *dict = dataset_dict (ds);
 146   struct oneway_spec oneway ;
 147   oneway.n_vars = 0;
 148   oneway.vars = NULL;
 149   oneway.indep_var = NULL;
 150   oneway.stats = 0;
 151   oneway.missing_type = MISS_ANALYSIS;
 152   oneway.exclude = MV_ANY;
 153   oneway.wv = dict_get_weight (dict);
 154
 155   ll_init (&oneway.contrast_list);
 156
 157
 158   if ( lex_match (lexer, T_SLASH))
 159     {
 160       if (!lex_force_match_id (lexer, "VARIABLES"))
 161         {
 162           goto error;
 163         }
 164       lex_match (lexer, T_EQUALS);
 165     }
 166
 167   if (!parse_variables_const (lexer, dict,
 168                               &oneway.vars, &oneway.n_vars,
 169                               PV_NO_DUPLICATE | PV_NUMERIC))
 170     goto error;
 171
 172   lex_force_match (lexer, T_BY);
 173
 174   oneway.indep_var = parse_variable_const (lexer, dict);
 175
 176   while (lex_token (lexer) != T_ENDCMD)
 177     {
 178       lex_match (lexer, T_SLASH);
 179
 180       if (lex_match_id (lexer, "STATISTICS"))
 181         {
 182           lex_match (lexer, T_EQUALS);
 183           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 184             {
 185               if (lex_match_id (lexer, "DESCRIPTIVES"))
 186                 {
 187                   oneway.stats |= STATS_DESCRIPTIVES;
 188                 }
 189               else if (lex_match_id (lexer, "HOMOGENEITY"))
 190                 {
 191                   oneway.stats |= STATS_HOMOGENEITY;
 192                 }
 193               else
 194                 {
 195                   lex_error (lexer, NULL);
 196                   goto error;
 197                 }
 198             }
 199         }
 200       else if (lex_match_id (lexer, "CONTRAST"))
 201         {
 202           struct contrasts_node *cl = xzalloc (sizeof *cl);
 203
 204           struct ll_list *coefficient_list = &cl->coefficient_list;
 205           lex_match (lexer, T_EQUALS);
 206
 207           ll_init (coefficient_list);
 208
 209           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 210             {
 211               if ( lex_is_number (lexer))
 212                 {
 213                   struct coeff_node *cc = xmalloc (sizeof *cc);
 214                   cc->coeff = lex_number (lexer);
 215
 216                   ll_push_tail (coefficient_list, &cc->ll);
 217                   lex_get (lexer);
 218                 }
 219               else
 220                 {
 221                   lex_error (lexer, NULL);
 222                   goto error;
 223                 }
 224             }
 225
 226           ll_push_tail (&oneway.contrast_list, &cl->ll);
 227         }
 228       else if (lex_match_id (lexer, "MISSING"))
 229         {
 230           lex_match (lexer, T_EQUALS);
 231           while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
 232             {
 233               if (lex_match_id (lexer, "INCLUDE"))
 234                 {
 235                   oneway.exclude = MV_SYSTEM;
 236                 }
 237               else if (lex_match_id (lexer, "EXCLUDE"))
 238                 {
 239                   oneway.exclude = MV_ANY;
 240                 }
 241               else if (lex_match_id (lexer, "LISTWISE"))
 242                 {
 243                   oneway.missing_type = MISS_LISTWISE;
 244                 }
 245               else if (lex_match_id (lexer, "ANALYSIS"))
 246                 {
 247                   oneway.missing_type = MISS_ANALYSIS;
 248                 }
 249               else
 250                 {
 251                   lex_error (lexer, NULL);
 252                   goto error;
 253                 }
 254             }
 255         }
 256       else
 257         {
 258           lex_error (lexer, NULL);
 259           goto error;
 260         }
 261     }
 262
 263
 264   {
 265     struct casegrouper *grouper;
 266     struct casereader *group;
 267     bool ok;
 268
 269     grouper = casegrouper_create_splits (proc_open (ds), dict);
 270     while (casegrouper_get_next_group (grouper, &group))
 271       run_oneway (&oneway, group, ds);
 272     ok = casegrouper_destroy (grouper);
 273     ok = proc_commit (ds) && ok;
 274   }
 275
 276   free (oneway.vars);
 277   return CMD_SUCCESS;
 278
 279  error:
 280   free (oneway.vars);
 281   return CMD_FAILURE;
 282 }
 283
 284
 285 \f
 286
 287
 288 static struct descriptive_data *
 289 dd_create (const struct variable *var)
 290 {
 291   struct descriptive_data *dd = xmalloc (sizeof *dd);
 292
 293   dd->mom = moments1_create (MOMENT_VARIANCE);
 294   dd->minimum = DBL_MAX;
 295   dd->maximum = -DBL_MAX;
 296   dd->var = var;
 297
 298   return dd;
 299 }
 300
 301 static void
 302 dd_destroy (struct descriptive_data *dd)
 303 {
 304   moments1_destroy (dd->mom);
 305   free (dd);
 306 }
 307
 308 static void *
 309 makeit (void *aux1, void *aux2 UNUSED)
 310 {
 311   const struct variable *var = aux1;
 312
 313   struct descriptive_data *dd = dd_create (var);
 314
 315   return dd;
 316 }
 317
 318 static void
 319 updateit (void *user_data,
 320           enum mv_class exclude,
 321           const struct variable *wv,
 322           const struct variable *catvar UNUSED,
 323           const struct ccase *c,
 324           void *aux1, void *aux2)
 325 {
 326   struct descriptive_data *dd = user_data;
 327
 328   const struct variable *varp = aux1;
 329
 330   const union value *valx = case_data (c, varp);
 331
 332   struct descriptive_data *dd_total = aux2;
 333
 334   double weight;
 335
 336   if ( var_is_value_missing (varp, valx, exclude))
 337     return;
 338
 339   weight = wv != NULL ? case_data (c, wv)->f : 1.0;
 340
 341   moments1_add (dd->mom, valx->f, weight);
 342   if (valx->f < dd->minimum)
 343     dd->minimum = valx->f;
 344
 345   if (valx->f > dd->maximum)
 346     dd->maximum = valx->f;
 347
 348   {
 349     const struct variable *var = dd_total->var;
 350     const union value *val = case_data (c, var);
 351
 352     moments1_add (dd_total->mom,
 353                   val->f,
 354                   weight);
 355
 356     if (val->f < dd_total->minimum)
 357       dd_total->minimum = val->f;
 358
 359     if (val->f > dd_total->maximum)
 360       dd_total->maximum = val->f;
 361   }
 362 }
 363
 364 static void
 365 run_oneway (const struct oneway_spec *cmd,
 366             struct casereader *input,
 367             const struct dataset *ds)
 368 {
 369   int v;
 370   struct taint *taint;
 371   struct dictionary *dict = dataset_dict (ds);
 372   struct casereader *reader;
 373   struct ccase *c;
 374
 375   struct oneway_workspace ws;
 376
 377   ws.actual_number_of_groups = 0;
 378   ws.vws = xzalloc (cmd->n_vars * sizeof (*ws.vws));
 379   ws.dd_total = xmalloc (sizeof (struct descriptive_data) * cmd->n_vars);
 380
 381   for (v = 0 ; v < cmd->n_vars; ++v)
 382     ws.dd_total[v] = dd_create (cmd->vars[v]);
 383
 384   for (v = 0; v < cmd->n_vars; ++v)
 385     {
 386       ws.vws[v].cat = categoricals_create (&cmd->indep_var, 1, cmd->wv,
 387                                            cmd->exclude, makeit, updateit,
 388                                            CONST_CAST (struct variable *,
 389                                                        cmd->vars[v]),
 390                                            ws.dd_total[v]);
 391
 392       ws.vws[v].cov = covariance_2pass_create (1, &cmd->vars[v],
 393                                                ws.vws[v].cat,
 394                                                cmd->wv, cmd->exclude);
 395       ws.vws[v].nl = levene_create (var_get_width (cmd->indep_var), NULL);
 396     }
 397
 398   c = casereader_peek (input, 0);
 399   if (c == NULL)
 400     {
 401       casereader_destroy (input);
 402       goto finish;
 403     }
 404   output_split_file_values (ds, c);
 405   case_unref (c);
 406
 407   taint = taint_clone (casereader_get_taint (input));
 408
 409   input = casereader_create_filter_missing (input, &cmd->indep_var, 1,
 410                                             cmd->exclude, NULL, NULL);
 411   if (cmd->missing_type == MISS_LISTWISE)
 412     input = casereader_create_filter_missing (input, cmd->vars, cmd->n_vars,
 413                                               cmd->exclude, NULL, NULL);
 414   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 415
 416   reader = casereader_clone (input);
 417   for (; (c = casereader_read (reader)) != NULL; case_unref (c))
 418     {
 419       int i;
 420       double w = dict_get_case_weight (dict, c, NULL);
 421
 422       for (i = 0; i < cmd->n_vars; ++i)
 423         {
 424           struct per_var_ws *pvw = &ws.vws[i];
 425           const struct variable *v = cmd->vars[i];
 426           const union value *val = case_data (c, v);
 427
 428           if ( MISS_ANALYSIS == cmd->missing_type)
 429             {
 430               if ( var_is_value_missing (v, val, cmd->exclude))
 431                 continue;
 432             }
 433
 434           covariance_accumulate_pass1 (pvw->cov, c);
 435           levene_pass_one (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
 436         }
 437     }
 438   casereader_destroy (reader);
 439
 440   reader = casereader_clone (input);
 441   for ( ; (c = casereader_read (reader) ); case_unref (c))
 442     {
 443       int i;
 444       double w = dict_get_case_weight (dict, c, NULL);
 445       for (i = 0; i < cmd->n_vars; ++i)
 446         {
 447           struct per_var_ws *pvw = &ws.vws[i];
 448           const struct variable *v = cmd->vars[i];
 449           const union value *val = case_data (c, v);
 450
 451           if ( MISS_ANALYSIS == cmd->missing_type)
 452             {
 453               if ( var_is_value_missing (v, val, cmd->exclude))
 454                 continue;
 455             }
 456
 457           covariance_accumulate_pass2 (pvw->cov, c);
 458           levene_pass_two (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
 459         }
 460     }
 461   casereader_destroy (reader);
 462
 463   reader = casereader_clone (input);
 464   for ( ; (c = casereader_read (reader) ); case_unref (c))
 465     {
 466       int i;
 467       double w = dict_get_case_weight (dict, c, NULL);
 468
 469       for (i = 0; i < cmd->n_vars; ++i)
 470         {
 471           struct per_var_ws *pvw = &ws.vws[i];
 472           const struct variable *v = cmd->vars[i];
 473           const union value *val = case_data (c, v);
 474
 475           if ( MISS_ANALYSIS == cmd->missing_type)
 476             {
 477               if ( var_is_value_missing (v, val, cmd->exclude))
 478                 continue;
 479             }
 480
 481           levene_pass_three (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
 482         }
 483     }
 484   casereader_destroy (reader);
 485
 486
 487   for (v = 0; v < cmd->n_vars; ++v)
 488     {
 489       struct per_var_ws *pvw = &ws.vws[v];
 490       gsl_matrix *cm = covariance_calculate_unnormalized (pvw->cov);
 491       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
 492
 493       double n;
 494       moments1_calculate (ws.dd_total[v]->mom, &n, NULL, NULL, NULL, NULL);
 495
 496       pvw->sst = gsl_matrix_get (cm, 0, 0);
 497
 498       reg_sweep (cm, 0);
 499
 500       pvw->sse = gsl_matrix_get (cm, 0, 0);
 501
 502       pvw->ssa = pvw->sst - pvw->sse;
 503
 504       pvw->n_groups = categoricals_total (cats);
 505
 506       pvw->mse = (pvw->sst - pvw->ssa) / (n - pvw->n_groups);
 507
 508       gsl_matrix_free (cm);
 509     }
 510
 511   for (v = 0; v < cmd->n_vars; ++v)
 512     {
 513       const struct categoricals *cats = covariance_get_categoricals (ws.vws[v].cov);
 514
 515       categoricals_done (cats);
 516
 517       if (categoricals_total (cats) > ws.actual_number_of_groups)
 518         ws.actual_number_of_groups = categoricals_total (cats);
 519     }
 520
 521   casereader_destroy (input);
 522
 523   if (!taint_has_tainted_successor (taint))
 524     output_oneway (cmd, &ws);
 525
 526   taint_destroy (taint);
 527
 528  finish:
 529   for (v = 0; v < cmd->n_vars; ++v)
 530     {
 531       covariance_destroy (ws.vws[v].cov);
 532       levene_destroy (ws.vws[v].nl);
 533       dd_destroy (ws.dd_total[v]);
 534     }
 535   free (ws.vws);
 536   free (ws.dd_total);
 537 }
 538
 539 static void show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws);
 540 static void show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspace *ws);
 541
 542 static void
 543 output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws)
 544 {
 545   size_t i = 0;
 546
 547   /* Check the sanity of the given contrast values */
 548   struct contrasts_node *coeff_list  = NULL;
 549   ll_for_each (coeff_list, struct contrasts_node, ll, &cmd->contrast_list)
 550     {
 551       struct coeff_node *cn = NULL;
 552       double sum = 0;
 553       struct ll_list *cl = &coeff_list->coefficient_list;
 554       ++i;
 555
 556       if (ll_count (cl) != ws->actual_number_of_groups)
 557         {
 558           msg (SW,
 559                _("Number of contrast coefficients must equal the number of groups"));
 560           coeff_list->bad_count = true;
 561           continue;
 562         }
 563
 564       ll_for_each (cn, struct coeff_node, ll, cl)
 565         sum += cn->coeff;
 566
 567       if ( sum != 0.0 )
 568         msg (SW, _("Coefficients for contrast %zu do not total zero"), i);
 569     }
 570
 571   if (cmd->stats & STATS_DESCRIPTIVES)
 572     show_descriptives (cmd, ws);
 573
 574   if (cmd->stats & STATS_HOMOGENEITY)
 575     show_homogeneity (cmd, ws);
 576
 577   show_anova_table (cmd, ws);
 578
 579   if (ll_count (&cmd->contrast_list) > 0)
 580     {
 581       show_contrast_coeffs (cmd, ws);
 582       show_contrast_tests (cmd, ws);
 583     }
 584 }
 585
 586
 587 /* Show the ANOVA table */
 588 static void
 589 show_anova_table (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 590 {
 591   size_t i;
 592   int n_cols =7;
 593   size_t n_rows = cmd->n_vars * 3 + 1;
 594
 595   struct tab_table *t = tab_create (n_cols, n_rows);
 596
 597   tab_headers (t, 2, 0, 1, 0);
 598
 599   tab_box (t,
 600            TAL_2, TAL_2,
 601            -1, TAL_1,
 602            0, 0,
 603            n_cols - 1, n_rows - 1);
 604
 605   tab_hline (t, TAL_2, 0, n_cols - 1, 1 );
 606   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 607   tab_vline (t, TAL_0, 1, 0, 0);
 608
 609   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
 610   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
 611   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
 612   tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
 613   tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 614
 615
 616   for (i = 0; i < cmd->n_vars; ++i)
 617     {
 618       double n;
 619       double df1, df2;
 620       double msa;
 621       const char *s = var_to_string (cmd->vars[i]);
 622       const struct per_var_ws *pvw = &ws->vws[i];
 623
 624       moments1_calculate (ws->dd_total[i]->mom, &n, NULL, NULL, NULL, NULL);
 625
 626       df1 = pvw->n_groups - 1;
 627       df2 = n - pvw->n_groups;
 628       msa = pvw->ssa / df1;
 629
 630       tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s);
 631       tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups"));
 632       tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups"));
 633       tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total"));
 634
 635       if (i > 0)
 636         tab_hline (t, TAL_1, 0, n_cols - 1, i * 3 + 1);
 637
 638
 639       /* Sums of Squares */
 640       tab_double (t, 2, i * 3 + 1, 0, pvw->ssa, NULL);
 641       tab_double (t, 2, i * 3 + 3, 0, pvw->sst, NULL);
 642       tab_double (t, 2, i * 3 + 2, 0, pvw->sse, NULL);
 643
 644
 645       /* Degrees of freedom */
 646       tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0);
 647       tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0);
 648       tab_fixed (t, 3, i * 3 + 3, 0, n - 1, 4, 0);
 649
 650       /* Mean Squares */
 651       tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL);
 652       tab_double (t, 4, i * 3 + 2, TAB_RIGHT, pvw->mse, NULL);
 653
 654       {
 655         const double F = msa / pvw->mse ;
 656
 657         /* The F value */
 658         tab_double (t, 5, i * 3 + 1, 0,  F, NULL);
 659
 660         /* The significance */
 661         tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), NULL);
 662       }
 663     }
 664
 665   tab_title (t, _("ANOVA"));
 666   tab_submit (t);
 667 }
 668
 669
 670 /* Show the descriptives table */
 671 static void
 672 show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 673 {
 674   size_t v;
 675   int n_cols = 10;
 676   struct tab_table *t;
 677   int row;
 678
 679   const double confidence = 0.95;
 680   const double q = (1.0 - confidence) / 2.0;
 681
 682   const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
 683
 684   int n_rows = 2;
 685
 686   for (v = 0; v < cmd->n_vars; ++v)
 687     n_rows += ws->actual_number_of_groups + 1;
 688
 689   t = tab_create (n_cols, n_rows);
 690   tab_headers (t, 2, 0, 2, 0);
 691
 692   /* Put a frame around the entire box, and vertical lines inside */
 693   tab_box (t,
 694            TAL_2, TAL_2,
 695            -1, TAL_1,
 696            0, 0,
 697            n_cols - 1, n_rows - 1);
 698
 699   /* Underline headers */
 700   tab_hline (t, TAL_2, 0, n_cols - 1, 2);
 701   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 702
 703   tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N"));
 704   tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean"));
 705   tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 706   tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Std. Error"));
 707
 708
 709   tab_vline (t, TAL_0, 7, 0, 0);
 710   tab_hline (t, TAL_1, 6, 7, 1);
 711   tab_joint_text_format (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE,
 712                          _("%g%% Confidence Interval for Mean"),
 713                          confidence*100.0);
 714
 715   tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound"));
 716   tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound"));
 717
 718   tab_text (t, 8, 1, TAB_CENTER | TAT_TITLE, _("Minimum"));
 719   tab_text (t, 9, 1, TAB_CENTER | TAT_TITLE, _("Maximum"));
 720
 721   tab_title (t, _("Descriptives"));
 722
 723   row = 2;
 724   for (v = 0; v < cmd->n_vars; ++v)
 725     {
 726       const char *s = var_to_string (cmd->vars[v]);
 727       const struct fmt_spec *fmt = var_get_print_format (cmd->vars[v]);
 728
 729       int count = 0;
 730
 731       struct per_var_ws *pvw = &ws->vws[v];
 732       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
 733
 734       tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s);
 735       if ( v > 0)
 736         tab_hline (t, TAL_1, 0, n_cols - 1, row);
 737
 738       for (count = 0; count < categoricals_total (cats); ++count)
 739         {
 740           double T;
 741           double n, mean, variance;
 742           double std_dev, std_error ;
 743
 744           struct string vstr;
 745
 746           const union value *gval = categoricals_get_value_by_category (cats, count);
 747           const struct descriptive_data *dd = categoricals_get_user_data_by_category (cats, count);
 748
 749           moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL);
 750
 751           std_dev = sqrt (variance);
 752           std_error = std_dev / sqrt (n) ;
 753
 754           ds_init_empty (&vstr);
 755
 756           var_append_value_name (cmd->indep_var, gval, &vstr);
 757
 758           tab_text (t, 1, row + count,
 759                     TAB_LEFT | TAT_TITLE,
 760                     ds_cstr (&vstr));
 761
 762           ds_destroy (&vstr);
 763
 764           /* Now fill in the numbers ... */
 765
 766           tab_double (t, 2, row + count, 0, n, wfmt);
 767
 768           tab_double (t, 3, row + count, 0, mean, NULL);
 769
 770           tab_double (t, 4, row + count, 0, std_dev, NULL);
 771
 772
 773           tab_double (t, 5, row + count, 0, std_error, NULL);
 774
 775           /* Now the confidence interval */
 776
 777           T = gsl_cdf_tdist_Qinv (q, n - 1);
 778
 779           tab_double (t, 6, row + count, 0,
 780                       mean - T * std_error, NULL);
 781
 782           tab_double (t, 7, row + count, 0,
 783                       mean + T * std_error, NULL);
 784
 785           /* Min and Max */
 786
 787           tab_double (t, 8, row + count, 0,  dd->minimum, fmt);
 788           tab_double (t, 9, row + count, 0,  dd->maximum, fmt);
 789         }
 790
 791       {
 792         double T;
 793         double n, mean, variance;
 794         double std_dev;
 795         double std_error;
 796
 797         moments1_calculate (ws->dd_total[v]->mom, &n, &mean, &variance, NULL, NULL);
 798
 799         std_dev = sqrt (variance);
 800         std_error = std_dev / sqrt (n) ;
 801
 802         tab_text (t, 1, row + count,
 803                   TAB_LEFT | TAT_TITLE, _("Total"));
 804
 805         tab_double (t, 2, row + count, 0, n, wfmt);
 806
 807         tab_double (t, 3, row + count, 0, mean, NULL);
 808
 809         tab_double (t, 4, row + count, 0, std_dev, NULL);
 810
 811         tab_double (t, 5, row + count, 0, std_error, NULL);
 812
 813         /* Now the confidence interval */
 814         T = gsl_cdf_tdist_Qinv (q, n - 1);
 815
 816         tab_double (t, 6, row + count, 0,
 817                     mean - T * std_error, NULL);
 818
 819         tab_double (t, 7, row + count, 0,
 820                     mean + T * std_error, NULL);
 821
 822         /* Min and Max */
 823         tab_double (t, 8, row + count, 0,  ws->dd_total[v]->minimum, fmt);
 824         tab_double (t, 9, row + count, 0,  ws->dd_total[v]->maximum, fmt);
 825       }
 826
 827       row += categoricals_total (cats) + 1;
 828     }
 829
 830   tab_submit (t);
 831 }
 832
 833 /* Show the homogeneity table */
 834 static void
 835 show_homogeneity (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 836 {
 837   size_t v;
 838   int n_cols = 5;
 839   size_t n_rows = cmd->n_vars + 1;
 840
 841   struct tab_table *t = tab_create (n_cols, n_rows);
 842   tab_headers (t, 1, 0, 1, 0);
 843
 844   /* Put a frame around the entire box, and vertical lines inside */
 845   tab_box (t,
 846            TAL_2, TAL_2,
 847            -1, TAL_1,
 848            0, 0,
 849            n_cols - 1, n_rows - 1);
 850
 851
 852   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
 853   tab_vline (t, TAL_2, 1, 0, n_rows - 1);
 854
 855   tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic"));
 856   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1"));
 857   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2"));
 858   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 859
 860   tab_title (t, _("Test of Homogeneity of Variances"));
 861
 862   for (v = 0; v < cmd->n_vars; ++v)
 863     {
 864       double n;
 865       const struct per_var_ws *pvw = &ws->vws[v];
 866       double F = levene_calculate (pvw->nl);
 867
 868       const struct variable *var = cmd->vars[v];
 869       const char *s = var_to_string (var);
 870       double df1, df2;
 871
 872       moments1_calculate (ws->dd_total[v]->mom, &n, NULL, NULL, NULL, NULL);
 873
 874       df1 = pvw->n_groups - 1;
 875       df2 = n - pvw->n_groups;
 876
 877       tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s);
 878
 879       tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL);
 880       tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0);
 881       tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0);
 882
 883       /* Now the significance */
 884       tab_double (t, 4, v + 1, TAB_RIGHT, gsl_cdf_fdist_Q (F, df1, df2), NULL);
 885     }
 886
 887   tab_submit (t);
 888 }
 889
 890
 891 /* Show the contrast coefficients table */
 892 static void
 893 show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 894 {
 895   int c_num = 0;
 896   struct ll *cli;
 897
 898   int n_contrasts = ll_count (&cmd->contrast_list);
 899   int n_cols = 2 + ws->actual_number_of_groups;
 900   int n_rows = 2 + n_contrasts;
 901
 902   struct tab_table *t;
 903
 904   const struct covariance *cov = ws->vws[0].cov ;
 905
 906   t = tab_create (n_cols, n_rows);
 907   tab_headers (t, 2, 0, 2, 0);
 908
 909   /* Put a frame around the entire box, and vertical lines inside */
 910   tab_box (t,
 911            TAL_2, TAL_2,
 912            -1, TAL_1,
 913            0, 0,
 914            n_cols - 1, n_rows - 1);
 915
 916   tab_box (t,
 917            -1, -1,
 918            TAL_0, TAL_0,
 919            2, 0,
 920            n_cols - 1, 0);
 921
 922   tab_box (t,
 923            -1, -1,
 924            TAL_0, TAL_0,
 925            0, 0,
 926            1, 1);
 927
 928   tab_hline (t, TAL_1, 2, n_cols - 1, 1);
 929   tab_hline (t, TAL_2, 0, n_cols - 1, 2);
 930
 931   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 932
 933   tab_title (t, _("Contrast Coefficients"));
 934
 935   tab_text (t,  0, 2, TAB_LEFT | TAT_TITLE, _("Contrast"));
 936
 937
 938   tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
 939                   var_to_string (cmd->indep_var));
 940
 941   for ( cli = ll_head (&cmd->contrast_list);
 942         cli != ll_null (&cmd->contrast_list);
 943         cli = ll_next (cli))
 944     {
 945       int count = 0;
 946       struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll);
 947       struct ll *coeffi ;
 948
 949       tab_text_format (t, 1, c_num + 2, TAB_CENTER, "%d", c_num + 1);
 950
 951       for (coeffi = ll_head (&cn->coefficient_list);
 952            coeffi != ll_null (&cn->coefficient_list);
 953            ++count, coeffi = ll_next (coeffi))
 954         {
 955           const struct categoricals *cats = covariance_get_categoricals (cov);
 956           const union value *val = categoricals_get_value_by_category (cats, count);
 957           struct string vstr;
 958
 959           ds_init_empty (&vstr);
 960
 961           var_append_value_name (cmd->indep_var, val, &vstr);
 962
 963           tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr));
 964
 965           ds_destroy (&vstr);
 966
 967           if (cn->bad_count)
 968             tab_text (t, count + 2, c_num + 2, TAB_RIGHT, "?" );
 969           else
 970             {
 971               struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll);
 972
 973               tab_text_format (t, count + 2, c_num + 2, TAB_RIGHT, "%g", coeffn->coeff);
 974             }
 975         }
 976       ++c_num;
 977     }
 978
 979   tab_submit (t);
 980 }
 981
 982
 983 /* Show the results of the contrast tests */
 984 static void
 985 show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspace *ws)
 986 {
 987   int n_contrasts = ll_count (&cmd->contrast_list);
 988   size_t v;
 989   int n_cols = 8;
 990   size_t n_rows = 1 + cmd->n_vars * 2 * n_contrasts;
 991
 992   struct tab_table *t;
 993
 994   t = tab_create (n_cols, n_rows);
 995   tab_headers (t, 3, 0, 1, 0);
 996
 997   /* Put a frame around the entire box, and vertical lines inside */
 998   tab_box (t,
 999            TAL_2, TAL_2,
1000            -1, TAL_1,
1001            0, 0,
1002            n_cols - 1, n_rows - 1);
1003
1004   tab_box (t,
1005            -1, -1,
1006            TAL_0, TAL_0,
1007            0, 0,
1008            2, 0);
1009
1010   tab_hline (t, TAL_2, 0, n_cols - 1, 1);
1011   tab_vline (t, TAL_2, 3, 0, n_rows - 1);
1012
1013   tab_title (t, _("Contrast Tests"));
1014
1015   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast"));
1016   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast"));
1017   tab_text (t,  4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1018   tab_text (t,  5, 0, TAB_CENTER | TAT_TITLE, _("t"));
1019   tab_text (t,  6, 0, TAB_CENTER | TAT_TITLE, _("df"));
1020   tab_text (t,  7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
1021
1022   for (v = 0; v < cmd->n_vars; ++v)
1023     {
1024       const struct per_var_ws *pvw = &ws->vws[v];
1025       const struct categoricals *cats = covariance_get_categoricals (pvw->cov);
1026       struct ll *cli;
1027       int i = 0;
1028       int lines_per_variable = 2 * n_contrasts;
1029
1030       tab_text (t,  0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE,
1031                 var_to_string (cmd->vars[v]));
1032
1033       for ( cli = ll_head (&cmd->contrast_list);
1034             cli != ll_null (&cmd->contrast_list);
1035             ++i, cli = ll_next (cli))
1036         {
1037           struct contrasts_node *cn = ll_data (cli, struct contrasts_node, ll);
1038           struct ll *coeffi ;
1039           int ci = 0;
1040           double contrast_value = 0.0;
1041           double coef_msq = 0.0;
1042
1043           double T;
1044           double std_error_contrast;
1045           double df;
1046           double sec_vneq = 0.0;
1047
1048           /* Note: The calculation of the degrees of freedom in the
1049              "variances not equal" case is painfull!!
1050              The following formula may help to understand it:
1051              \frac{\left (\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2}
1052              {
1053              \sum_{i=1}^k\left (
1054              \frac{\left (c_i^2\frac{s_i^2}{n_i}\right)^2}  {n_i-1}
1055              \right)
1056              }
1057           */
1058
1059           double df_denominator = 0.0;
1060           double df_numerator = 0.0;
1061
1062           double grand_n;
1063           moments1_calculate (ws->dd_total[v]->mom, &grand_n, NULL, NULL, NULL, NULL);
1064           df = grand_n - pvw->n_groups;
1065
1066           if ( i == 0 )
1067             {
1068               tab_text (t,  1, (v * lines_per_variable) + i + 1,
1069                         TAB_LEFT | TAT_TITLE,
1070                         _("Assume equal variances"));
1071
1072               tab_text (t,  1, (v * lines_per_variable) + i + 1 + n_contrasts,
1073                         TAB_LEFT | TAT_TITLE,
1074                         _("Does not assume equal"));
1075             }
1076
1077           tab_text_format (t,  2, (v * lines_per_variable) + i + 1,
1078                            TAB_CENTER | TAT_TITLE, "%d", i + 1);
1079
1080
1081           tab_text_format (t,  2,
1082                            (v * lines_per_variable) + i + 1 + n_contrasts,
1083                            TAB_CENTER | TAT_TITLE, "%d", i + 1);
1084
1085           if (cn->bad_count)
1086             continue;
1087
1088           for (coeffi = ll_head (&cn->coefficient_list);
1089                coeffi != ll_null (&cn->coefficient_list);
1090                ++ci, coeffi = ll_next (coeffi))
1091             {
1092               double n, mean, variance;
1093               const struct descriptive_data *dd = categoricals_get_user_data_by_category (cats, ci);
1094               struct coeff_node *cn = ll_data (coeffi, struct coeff_node, ll);
1095               const double coef = cn->coeff;
1096               double winv ;
1097
1098               moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL);
1099
1100               winv = variance / n;
1101
1102               contrast_value += coef * mean;
1103
1104               coef_msq += (pow2 (coef)) / n;
1105
1106               sec_vneq += (pow2 (coef)) * variance / n;
1107
1108               df_numerator += (pow2 (coef)) * winv;
1109               df_denominator += pow2((pow2 (coef)) * winv) / (n - 1);
1110             }
1111
1112           sec_vneq = sqrt (sec_vneq);
1113
1114           df_numerator = pow2 (df_numerator);
1115
1116           tab_double (t,  3, (v * lines_per_variable) + i + 1,
1117                       TAB_RIGHT, contrast_value, NULL);
1118
1119           tab_double (t,  3, (v * lines_per_variable) + i + 1 +
1120                       n_contrasts,
1121                       TAB_RIGHT, contrast_value, NULL);
1122
1123           std_error_contrast = sqrt (pvw->mse * coef_msq);
1124
1125           /* Std. Error */
1126           tab_double (t,  4, (v * lines_per_variable) + i + 1,
1127                       TAB_RIGHT, std_error_contrast,
1128                       NULL);
1129
1130           T = fabs (contrast_value / std_error_contrast);
1131
1132           /* T Statistic */
1133
1134           tab_double (t,  5, (v * lines_per_variable) + i + 1,
1135                       TAB_RIGHT, T,
1136                       NULL);
1137
1138
1139           /* Degrees of Freedom */
1140           tab_fixed (t,  6, (v * lines_per_variable) + i + 1,
1141                      TAB_RIGHT,  df,
1142                      8, 0);
1143
1144
1145           /* Significance TWO TAILED !!*/
1146           tab_double (t,  7, (v * lines_per_variable) + i + 1,
1147                       TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T, df),
1148                       NULL);
1149
1150           /* Now for the Variances NOT Equal case */
1151
1152           /* Std. Error */
1153           tab_double (t,  4,
1154                       (v * lines_per_variable) + i + 1 + n_contrasts,
1155                       TAB_RIGHT, sec_vneq,
1156                       NULL);
1157
1158           T = contrast_value / sec_vneq;
1159           tab_double (t,  5,
1160                       (v * lines_per_variable) + i + 1 + n_contrasts,
1161                       TAB_RIGHT, T,
1162                       NULL);
1163
1164           df = df_numerator / df_denominator;
1165
1166           tab_double (t,  6,
1167                       (v * lines_per_variable) + i + 1 + n_contrasts,
1168                       TAB_RIGHT, df,
1169                       NULL);
1170
1171           /* The Significance */
1172           tab_double (t, 7, (v * lines_per_variable) + i + 1 + n_contrasts,
1173                       TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T,df),
1174                       NULL);
1175         }
1176
1177       if ( v > 0 )
1178         tab_hline (t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
1179     }
1180
1181   tab_submit (t);
1182 }
1183
1184