pintos-os.org Git - pspp/blob - src/language/stats/oneway.q

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include <gsl/gsl_cdf.h>
  20 #include <math.h>
  21 #include <stdio.h>
  22 #include <stdlib.h>
  23
  24 #include <data/case.h>
  25 #include <data/casegrouper.h>
  26 #include <data/casereader.h>
  27 #include <data/dictionary.h>
  28 #include <data/procedure.h>
  29 #include <data/value-labels.h>
  30 #include <data/variable.h>
  31 #include <language/command.h>
  32 #include <language/dictionary/split-file.h>
  33 #include <language/lexer/lexer.h>
  34 #include <libpspp/compiler.h>
  35 #include <libpspp/hash.h>
  36 #include <libpspp/message.h>
  37 #include <libpspp/misc.h>
  38 #include <libpspp/str.h>
  39 #include <libpspp/taint.h>
  40 #include <math/group-proc.h>
  41 #include <math/group.h>
  42 #include <math/levene.h>
  43 #include <output/manager.h>
  44 #include <output/table.h>
  45 #include "sort-criteria.h"
  46 #include <data/format.h>
  47
  48 #include "xalloc.h"
  49
  50 #include "gettext.h"
  51 #define _(msgid) gettext (msgid)
  52
  53 /* (headers) */
  54
  55 /* (specification)
  56    "ONEWAY" (oneway_):
  57    *^variables=custom;
  58    missing=miss:!analysis/listwise,
  59            incl:include/!exclude;
  60    +contrast= double list;
  61    +statistics[st_]=descriptives,homogeneity.
  62 */
  63 /* (declarations) */
  64 /* (functions) */
  65
  66 static struct cmd_oneway cmd;
  67
  68 /* The independent variable */
  69 static const struct variable *indep_var;
  70
  71 /* Number of dependent variables */
  72 static size_t n_vars;
  73
  74 /* The dependent variables */
  75 static const struct variable **vars;
  76
  77
  78 /* A  hash table containing all the distinct values of the independent
  79    variables */
  80 static struct hsh_table *global_group_hash ;
  81
  82 /* The number of distinct values of the independent variable, when all
  83    missing values are disregarded */
  84 static int ostensible_number_of_groups = -1;
  85
  86
  87 static void run_oneway (struct cmd_oneway *, struct casereader *,
  88                         const struct dataset *);
  89
  90
  91 /* Routines to show the output tables */
  92 static void show_anova_table(void);
  93 static void show_descriptives (const struct dictionary *dict);
  94 static void show_homogeneity(void);
  95
  96 static void show_contrast_coeffs(short *);
  97 static void show_contrast_tests(short *);
  98
  99
 100 enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2};
 101
 102 static enum stat_table_t stat_tables ;
 103
 104 static void output_oneway (const struct dictionary *dict);
 105
 106
 107 int
 108 cmd_oneway (struct lexer *lexer, struct dataset *ds)
 109 {
 110   struct casegrouper *grouper;
 111   struct casereader *group;
 112   int i;
 113   bool ok;
 114
 115   if ( !parse_oneway (lexer, ds, &cmd, NULL) )
 116     return CMD_FAILURE;
 117
 118   /* What statistics were requested */
 119   if ( cmd.sbc_statistics )
 120     {
 121
 122       for (i = 0 ; i < ONEWAY_ST_count ; ++i )
 123         {
 124           if  ( ! cmd.a_statistics[i]  ) continue;
 125
 126           switch (i) {
 127           case ONEWAY_ST_DESCRIPTIVES:
 128             stat_tables |= STAT_DESC;
 129             break;
 130           case ONEWAY_ST_HOMOGENEITY:
 131             stat_tables |= STAT_HOMO;
 132             break;
 133           }
 134         }
 135     }
 136
 137   /* Data pass.  FIXME: error handling. */
 138   grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
 139   while (casegrouper_get_next_group (grouper, &group))
 140     run_oneway (&cmd, group, ds);
 141   ok = casegrouper_destroy (grouper);
 142   ok = proc_commit (ds) && ok;
 143
 144   free (vars);
 145   free_oneway (&cmd);
 146
 147   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 148 }
 149
 150
 151 static void
 152 output_oneway (const struct dictionary *dict)
 153 {
 154   size_t i;
 155   short *bad_contrast ;
 156
 157   bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast);
 158
 159   /* Check the sanity of the given contrast values */
 160   for (i = 0 ; i < cmd.sbc_contrast ; ++i )
 161     {
 162       int j;
 163       double sum = 0;
 164
 165       bad_contrast[i] = 0;
 166       if ( subc_list_double_count(&cmd.dl_contrast[i]) !=
 167            ostensible_number_of_groups )
 168         {
 169           msg(SW,
 170               _("Number of contrast coefficients must equal the number of groups"));
 171           bad_contrast[i] = 1;
 172           continue;
 173         }
 174
 175       for (j=0; j < ostensible_number_of_groups ; ++j )
 176         sum += subc_list_double_at(&cmd.dl_contrast[i],j);
 177
 178       if ( sum != 0.0 )
 179         msg(SW,_("Coefficients for contrast %zu do not total zero"), i + 1);
 180     }
 181
 182   if ( stat_tables & STAT_DESC )
 183     show_descriptives (dict);
 184
 185   if ( stat_tables & STAT_HOMO )
 186     show_homogeneity();
 187
 188   show_anova_table();
 189
 190   if (cmd.sbc_contrast )
 191     {
 192       show_contrast_coeffs(bad_contrast);
 193       show_contrast_tests(bad_contrast);
 194     }
 195
 196
 197   free(bad_contrast);
 198
 199   /* Clean up */
 200   for (i = 0 ; i < n_vars ; ++i )
 201     {
 202       struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash;
 203
 204       hsh_destroy(group_hash);
 205     }
 206
 207   hsh_destroy(global_group_hash);
 208
 209 }
 210
 211
 212
 213
 214 /* Parser for the variables sub command */
 215 static int
 216 oneway_custom_variables (struct lexer *lexer,
 217                         struct dataset *ds, struct cmd_oneway *cmd UNUSED,
 218                         void *aux UNUSED)
 219 {
 220   struct dictionary *dict = dataset_dict (ds);
 221
 222   lex_match (lexer, '=');
 223
 224   if ((lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
 225       && lex_token (lexer) != T_ALL)
 226     return 2;
 227
 228   if (!parse_variables_const (lexer, dict, &vars, &n_vars,
 229                         PV_DUPLICATE
 230                         | PV_NUMERIC | PV_NO_SCRATCH) )
 231     {
 232       free (vars);
 233       return 0;
 234     }
 235
 236   assert(n_vars);
 237
 238   if ( ! lex_match (lexer, T_BY))
 239     return 2;
 240
 241   indep_var = parse_variable (lexer, dict);
 242
 243   if ( !indep_var )
 244     {
 245       msg(SE,_("`%s' is not a variable name"),lex_tokid (lexer));
 246       return 0;
 247     }
 248
 249   return 1;
 250 }
 251
 252
 253 /* Show the ANOVA table */
 254 static void
 255 show_anova_table(void)
 256 {
 257   size_t i;
 258   int n_cols =7;
 259   size_t n_rows = n_vars * 3 + 1;
 260
 261   struct tab_table *t;
 262
 263
 264   t = tab_create (n_cols,n_rows,0);
 265   tab_headers (t, 2, 0, 1, 0);
 266   tab_dim (t, tab_natural_dimensions);
 267
 268
 269   tab_box (t,
 270            TAL_2, TAL_2,
 271            -1, TAL_1,
 272            0, 0,
 273            n_cols - 1, n_rows - 1);
 274
 275   tab_hline (t, TAL_2, 0, n_cols - 1, 1 );
 276   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 277   tab_vline (t, TAL_0, 1, 0, 0);
 278
 279   tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
 280   tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
 281   tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
 282   tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
 283   tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 284
 285
 286   for ( i=0 ; i < n_vars ; ++i )
 287     {
 288       struct group_statistics *totals = &group_proc_get (vars[i])->ugs;
 289       struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash;
 290       struct hsh_iterator g;
 291       struct group_statistics *gs;
 292       double ssa=0;
 293       const char *s = var_to_string(vars[i]);
 294
 295       for (gs =  hsh_first (group_hash,&g);
 296            gs != 0;
 297            gs = hsh_next(group_hash,&g))
 298         {
 299           ssa += (gs->sum * gs->sum)/gs->n;
 300         }
 301
 302       ssa -= ( totals->sum * totals->sum ) / totals->n ;
 303
 304       tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s);
 305       tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups"));
 306       tab_text (t, 1, i * 3 + 2, TAB_LEFT | TAT_TITLE, _("Within Groups"));
 307       tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total"));
 308
 309       if (i > 0)
 310         tab_hline(t, TAL_1, 0, n_cols - 1 , i * 3 + 1);
 311
 312       {
 313         struct group_proc *gp = group_proc_get (vars[i]);
 314         const double sst = totals->ssq - ( totals->sum * totals->sum) / totals->n ;
 315         const double df1 = gp->n_groups - 1;
 316         const double df2 = totals->n - gp->n_groups ;
 317         const double msa = ssa / df1;
 318
 319         gp->mse  = (sst - ssa) / df2;
 320
 321
 322         /* Sums of Squares */
 323         tab_double (t, 2, i * 3 + 1, 0, ssa, NULL);
 324         tab_double (t, 2, i * 3 + 3, 0, sst, NULL);
 325         tab_double (t, 2, i * 3 + 2, 0, sst - ssa, NULL);
 326
 327
 328         /* Degrees of freedom */
 329         tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0);
 330         tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0);
 331         tab_fixed (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0);
 332
 333         /* Mean Squares */
 334         tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL);
 335         tab_double (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, NULL);
 336
 337
 338         {
 339           const double F = msa / gp->mse ;
 340
 341           /* The F value */
 342           tab_double (t, 5, i * 3 + 1, 0,  F, NULL);
 343
 344           /* The significance */
 345           tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1,df2), NULL);
 346         }
 347
 348       }
 349
 350     }
 351
 352
 353   tab_title (t, _("ANOVA"));
 354   tab_submit (t);
 355 }
 356
 357
 358 /* Show the descriptives table */
 359 static void
 360 show_descriptives (const struct dictionary *dict)
 361 {
 362   size_t v;
 363   int n_cols = 10;
 364   struct tab_table *t;
 365   int row;
 366
 367   const double confidence = 0.95;
 368   const double q = (1.0 - confidence) / 2.0;
 369
 370   const struct variable *wv = dict_get_weight (dict);
 371   const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
 372
 373   int n_rows = 2 ;
 374
 375   for ( v = 0 ; v < n_vars ; ++v )
 376     n_rows += group_proc_get (vars[v])->n_groups + 1;
 377
 378   t = tab_create (n_cols,n_rows,0);
 379   tab_headers (t, 2, 0, 2, 0);
 380   tab_dim (t, tab_natural_dimensions);
 381
 382
 383   /* Put a frame around the entire box, and vertical lines inside */
 384   tab_box (t,
 385            TAL_2, TAL_2,
 386            -1, TAL_1,
 387            0, 0,
 388            n_cols - 1, n_rows - 1);
 389
 390   /* Underline headers */
 391   tab_hline (t, TAL_2, 0, n_cols - 1, 2 );
 392   tab_vline (t, TAL_2, 2, 0, n_rows - 1);
 393
 394   tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N"));
 395   tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Mean"));
 396   tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 397   tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Std. Error"));
 398
 399
 400   tab_vline(t, TAL_0, 7, 0, 0);
 401   tab_hline(t, TAL_1, 6, 7, 1);
 402   tab_joint_text (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, _("%g%% Confidence Interval for Mean"),confidence*100.0);
 403
 404   tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound"));
 405   tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound"));
 406
 407   tab_text (t, 8, 1, TAB_CENTER | TAT_TITLE, _("Minimum"));
 408   tab_text (t, 9, 1, TAB_CENTER | TAT_TITLE, _("Maximum"));
 409
 410
 411   tab_title (t, _("Descriptives"));
 412
 413
 414   row = 2;
 415   for ( v=0 ; v < n_vars ; ++v )
 416     {
 417       double T;
 418       double std_error;
 419
 420       struct group_proc *gp = group_proc_get (vars[v]);
 421
 422       struct group_statistics *gs;
 423       struct group_statistics *totals = &gp->ugs;
 424
 425       const char *s = var_to_string (vars[v]);
 426       const struct fmt_spec *fmt = var_get_print_format (vars[v]);
 427
 428       struct group_statistics *const *gs_array =
 429         (struct group_statistics *const *) hsh_sort(gp->group_hash);
 430       int count = 0;
 431
 432       tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s);
 433       if ( v > 0)
 434         tab_hline(t, TAL_1, 0, n_cols - 1 , row);
 435
 436       for (count = 0; count < hsh_count (gp->group_hash); ++count)
 437         {
 438           struct string vstr;
 439           ds_init_empty (&vstr);
 440           gs = gs_array[count];
 441
 442           var_append_value_name (indep_var, &gs->id, &vstr);
 443
 444           tab_text (t, 1, row + count,
 445                     TAB_LEFT | TAT_TITLE,
 446                     ds_cstr (&vstr));
 447
 448           ds_destroy (&vstr);
 449
 450           /* Now fill in the numbers ... */
 451
 452           tab_fixed (t, 2, row + count, 0, gs->n, 8, 0);
 453
 454           tab_double (t, 3, row + count, 0, gs->mean, NULL);
 455
 456           tab_double (t, 4, row + count, 0, gs->std_dev, NULL);
 457
 458           std_error = gs->std_dev / sqrt (gs->n) ;
 459           tab_double (t, 5, row + count, 0,
 460                      std_error, NULL);
 461
 462           /* Now the confidence interval */
 463
 464           T = gsl_cdf_tdist_Qinv (q, gs->n - 1);
 465
 466           tab_double (t, 6, row + count, 0,
 467                     gs->mean - T * std_error, NULL);
 468
 469           tab_double (t, 7, row + count, 0,
 470                     gs->mean + T * std_error, NULL);
 471
 472           /* Min and Max */
 473
 474           tab_double (t, 8, row + count, 0,  gs->minimum, fmt);
 475           tab_double (t, 9, row + count, 0,  gs->maximum, fmt);
 476         }
 477
 478       tab_text (t, 1, row + count,
 479                 TAB_LEFT | TAT_TITLE ,_("Total"));
 480
 481       tab_double (t, 2, row + count, 0, totals->n, wfmt);
 482
 483       tab_double (t, 3, row + count, 0, totals->mean, NULL);
 484
 485       tab_double (t, 4, row + count, 0, totals->std_dev, NULL);
 486
 487       std_error = totals->std_dev / sqrt (totals->n) ;
 488
 489       tab_double (t, 5, row + count, 0, std_error, NULL);
 490
 491       /* Now the confidence interval */
 492
 493       T = gsl_cdf_tdist_Qinv (q, totals->n - 1);
 494
 495       tab_double (t, 6, row + count, 0,
 496                   totals->mean - T * std_error, NULL);
 497
 498       tab_double (t, 7, row + count, 0,
 499                   totals->mean + T * std_error, NULL);
 500
 501       /* Min and Max */
 502
 503       tab_double (t, 8, row + count, 0,  totals->minimum, fmt);
 504       tab_double (t, 9, row + count, 0,  totals->maximum, fmt);
 505
 506       row += gp->n_groups + 1;
 507     }
 508
 509
 510   tab_submit (t);
 511 }
 512
 513 /* Show the homogeneity table */
 514 static void
 515 show_homogeneity(void)
 516 {
 517   size_t v;
 518   int n_cols = 5;
 519   size_t n_rows = n_vars + 1;
 520
 521   struct tab_table *t;
 522
 523
 524   t = tab_create (n_cols,n_rows,0);
 525   tab_headers (t, 1, 0, 1, 0);
 526   tab_dim (t, tab_natural_dimensions);
 527
 528   /* Put a frame around the entire box, and vertical lines inside */
 529   tab_box (t,
 530            TAL_2, TAL_2,
 531            -1, TAL_1,
 532            0, 0,
 533            n_cols - 1, n_rows - 1);
 534
 535
 536   tab_hline(t, TAL_2, 0, n_cols - 1, 1);
 537   tab_vline(t, TAL_2, 1, 0, n_rows - 1);
 538
 539
 540   tab_text (t,  1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic"));
 541   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("df1"));
 542   tab_text (t,  3, 0, TAB_CENTER | TAT_TITLE, _("df2"));
 543   tab_text (t,  4, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
 544
 545
 546   tab_title (t, _("Test of Homogeneity of Variances"));
 547
 548   for ( v=0 ; v < n_vars ; ++v )
 549     {
 550       double F;
 551       const struct variable *var = vars[v];
 552       const struct group_proc *gp = group_proc_get (vars[v]);
 553       const char *s = var_to_string(var);
 554       const struct group_statistics *totals = &gp->ugs;
 555
 556       const double df1 = gp->n_groups - 1;
 557       const double df2 = totals->n - gp->n_groups ;
 558
 559       tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s);
 560
 561       F = gp->levene;
 562       tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL);
 563       tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0);
 564       tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0);
 565
 566       /* Now the significance */
 567       tab_double (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q (F, df1, df2), NULL);
 568     }
 569
 570   tab_submit (t);
 571 }
 572
 573
 574 /* Show the contrast coefficients table */
 575 static void
 576 show_contrast_coeffs (short *bad_contrast)
 577 {
 578   int n_cols = 2 + ostensible_number_of_groups;
 579   int n_rows = 2 + cmd.sbc_contrast;
 580   union value *group_value;
 581   int count = 0 ;
 582   void *const *group_values ;
 583
 584   struct tab_table *t;
 585
 586   t = tab_create (n_cols,n_rows,0);
 587   tab_headers (t, 2, 0, 2, 0);
 588   tab_dim (t, tab_natural_dimensions);
 589
 590   /* Put a frame around the entire box, and vertical lines inside */
 591   tab_box (t,
 592            TAL_2, TAL_2,
 593            -1, TAL_1,
 594            0, 0,
 595            n_cols - 1, n_rows - 1);
 596
 597   tab_box (t,
 598            -1,-1,
 599            TAL_0, TAL_0,
 600            2, 0,
 601            n_cols - 1, 0);
 602
 603   tab_box (t,
 604            -1,-1,
 605            TAL_0, TAL_0,
 606            0,0,
 607            1,1);
 608
 609   tab_hline(t, TAL_1, 2, n_cols - 1, 1);
 610   tab_hline(t, TAL_2, 0, n_cols - 1, 2);
 611
 612   tab_vline(t, TAL_2, 2, 0, n_rows - 1);
 613
 614   tab_title (t, _("Contrast Coefficients"));
 615
 616   tab_text (t,  0, 2, TAB_LEFT | TAT_TITLE, _("Contrast"));
 617
 618
 619   tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
 620                   var_to_string(indep_var));
 621
 622   group_values = hsh_sort(global_group_hash);
 623   for (count = 0 ;
 624        count < hsh_count(global_group_hash) ;
 625        ++count)
 626     {
 627       int i;
 628       struct string vstr;
 629       group_value = group_values[count];
 630
 631       ds_init_empty (&vstr);
 632
 633       var_append_value_name (indep_var, group_value, &vstr);
 634
 635       tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE,
 636                 ds_cstr (&vstr));
 637
 638       ds_destroy (&vstr);
 639
 640
 641       for (i = 0 ; i < cmd.sbc_contrast ; ++i )
 642         {
 643           tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1);
 644
 645           if ( bad_contrast[i] )
 646             tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" );
 647           else
 648             tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g",
 649                      subc_list_double_at(&cmd.dl_contrast[i], count)
 650                      );
 651         }
 652     }
 653
 654   tab_submit (t);
 655 }
 656
 657
 658 /* Show the results of the contrast tests */
 659 static void
 660 show_contrast_tests(short *bad_contrast)
 661 {
 662   size_t v;
 663   int n_cols = 8;
 664   size_t n_rows = 1 + n_vars * 2 * cmd.sbc_contrast;
 665
 666   struct tab_table *t;
 667
 668   t = tab_create (n_cols,n_rows,0);
 669   tab_headers (t, 3, 0, 1, 0);
 670   tab_dim (t, tab_natural_dimensions);
 671
 672   /* Put a frame around the entire box, and vertical lines inside */
 673   tab_box (t,
 674            TAL_2, TAL_2,
 675            -1, TAL_1,
 676            0, 0,
 677            n_cols - 1, n_rows - 1);
 678
 679   tab_box (t,
 680            -1,-1,
 681            TAL_0, TAL_0,
 682            0, 0,
 683            2, 0);
 684
 685   tab_hline(t, TAL_2, 0, n_cols - 1, 1);
 686   tab_vline(t, TAL_2, 3, 0, n_rows - 1);
 687
 688
 689   tab_title (t, _("Contrast Tests"));
 690
 691   tab_text (t,  2, 0, TAB_CENTER | TAT_TITLE, _("Contrast"));
 692   tab_text (t,  3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast"));
 693   tab_text (t,  4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
 694   tab_text (t,  5, 0, TAB_CENTER | TAT_TITLE, _("t"));
 695   tab_text (t,  6, 0, TAB_CENTER | TAT_TITLE, _("df"));
 696   tab_text (t,  7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
 697
 698   for ( v = 0 ; v < n_vars ; ++v )
 699     {
 700       int i;
 701       int lines_per_variable = 2 * cmd.sbc_contrast;
 702
 703
 704       tab_text (t,  0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE,
 705                 var_to_string(vars[v]));
 706
 707       for ( i = 0 ; i < cmd.sbc_contrast ; ++i )
 708         {
 709           int ci;
 710           double contrast_value = 0.0;
 711           double coef_msq = 0.0;
 712           struct group_proc *grp_data = group_proc_get (vars[v]);
 713           struct hsh_table *group_hash = grp_data->group_hash;
 714
 715           void *const *group_stat_array;
 716
 717           double T;
 718           double std_error_contrast ;
 719           double df;
 720           double sec_vneq=0.0;
 721
 722
 723           /* Note: The calculation of the degrees of freedom in the
 724              "variances not equal" case is painfull!!
 725              The following formula may help to understand it:
 726              \frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2}
 727              {
 728              \sum_{i=1}^k\left(
 729              \frac{\left(c_i^2\frac{s_i^2}{n_i}\right)^2}  {n_i-1}
 730              \right)
 731              }
 732           */
 733
 734           double df_denominator = 0.0;
 735           double df_numerator = 0.0;
 736           if ( i == 0 )
 737             {
 738               tab_text (t,  1, (v * lines_per_variable) + i + 1,
 739                         TAB_LEFT | TAT_TITLE,
 740                         _("Assume equal variances"));
 741
 742               tab_text (t,  1, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 743                         TAB_LEFT | TAT_TITLE,
 744                         _("Does not assume equal"));
 745             }
 746
 747           tab_text (t,  2, (v * lines_per_variable) + i + 1,
 748                     TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1);
 749
 750
 751           tab_text (t,  2, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 752                     TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1);
 753
 754
 755           if ( bad_contrast[i])
 756             continue;
 757
 758           group_stat_array = hsh_sort(group_hash);
 759
 760           for (ci = 0 ; ci < hsh_count(group_hash) ;  ++ci)
 761             {
 762               const double coef = subc_list_double_at(&cmd.dl_contrast[i], ci);
 763               struct group_statistics *gs = group_stat_array[ci];
 764
 765               const double winv = (gs->std_dev * gs->std_dev) / gs->n;
 766
 767               contrast_value += coef * gs->mean;
 768
 769               coef_msq += (coef * coef) / gs->n ;
 770
 771               sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ;
 772
 773               df_numerator += (coef * coef) * winv;
 774               df_denominator += pow2((coef * coef) * winv) / (gs->n - 1);
 775             }
 776           sec_vneq = sqrt(sec_vneq);
 777
 778           df_numerator = pow2 (df_numerator);
 779
 780           tab_double (t,  3, (v * lines_per_variable) + i + 1,
 781                      TAB_RIGHT, contrast_value, NULL);
 782
 783           tab_double (t,  3, (v * lines_per_variable) + i + 1 +
 784                      cmd.sbc_contrast,
 785                      TAB_RIGHT, contrast_value, NULL);
 786
 787           std_error_contrast = sqrt (grp_data->mse * coef_msq);
 788
 789           /* Std. Error */
 790           tab_double (t,  4, (v * lines_per_variable) + i + 1,
 791                      TAB_RIGHT, std_error_contrast,
 792                      NULL);
 793
 794           T = fabs(contrast_value / std_error_contrast) ;
 795
 796           /* T Statistic */
 797
 798           tab_double (t,  5, (v * lines_per_variable) + i + 1,
 799                      TAB_RIGHT, T,
 800                      NULL);
 801
 802           df = grp_data->ugs.n - grp_data->n_groups;
 803
 804           /* Degrees of Freedom */
 805           tab_fixed (t,  6, (v * lines_per_variable) + i + 1,
 806                      TAB_RIGHT,  df,
 807                      8, 0);
 808
 809
 810           /* Significance TWO TAILED !!*/
 811           tab_double (t,  7, (v * lines_per_variable) + i + 1,
 812                      TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T, df),
 813                      NULL);
 814
 815
 816           /* Now for the Variances NOT Equal case */
 817
 818           /* Std. Error */
 819           tab_double (t,  4,
 820                      (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 821                      TAB_RIGHT, sec_vneq,
 822                      NULL);
 823
 824
 825           T = contrast_value / sec_vneq;
 826           tab_double (t,  5,
 827                      (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 828                      TAB_RIGHT, T,
 829                      NULL);
 830
 831
 832           df = df_numerator / df_denominator;
 833
 834           tab_double (t,  6,
 835                      (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 836                      TAB_RIGHT, df,
 837                      NULL);
 838
 839           /* The Significance */
 840
 841           tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast,
 842                      TAB_RIGHT,  2 * gsl_cdf_tdist_Q (T,df),
 843                      NULL);
 844
 845
 846         }
 847
 848       if ( v > 0 )
 849         tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1);
 850     }
 851
 852   tab_submit (t);
 853 }
 854
 855
 856 /* ONEWAY ANOVA Calculations */
 857
 858 static void  postcalc (  struct cmd_oneway *cmd UNUSED );
 859
 860 static void  precalc ( struct cmd_oneway *cmd UNUSED );
 861
 862
 863
 864 /* Pre calculations */
 865 static void
 866 precalc ( struct cmd_oneway *cmd UNUSED )
 867 {
 868   size_t i=0;
 869
 870   for(i=0; i< n_vars ; ++i)
 871     {
 872       struct group_proc *gp = group_proc_get (vars[i]);
 873       struct group_statistics *totals = &gp->ugs;
 874
 875       /* Create a hash for each of the dependent variables.
 876          The hash contains a group_statistics structure,
 877          and is keyed by value of the independent variable */
 878
 879       gp->group_hash =
 880         hsh_create(4,
 881                    (hsh_compare_func *) compare_group,
 882                    (hsh_hash_func *) hash_group,
 883                    (hsh_free_func *) free_group,
 884                    (void *) var_get_width (indep_var) );
 885
 886
 887       totals->sum=0;
 888       totals->n=0;
 889       totals->ssq=0;
 890       totals->sum_diff=0;
 891       totals->maximum = - DBL_MAX;
 892       totals->minimum = DBL_MAX;
 893     }
 894 }
 895
 896 static void
 897 free_value (void *value_, const void *aux UNUSED)
 898 {
 899   union value *value = value_;
 900   free (value);
 901 }
 902
 903 static void
 904 run_oneway (struct cmd_oneway *cmd,
 905             struct casereader *input,
 906             const struct dataset *ds)
 907 {
 908   struct taint *taint;
 909   struct dictionary *dict = dataset_dict (ds);
 910   enum mv_class exclude;
 911   struct casereader *reader;
 912   struct ccase c;
 913
 914   if (!casereader_peek (input, 0, &c))
 915     {
 916       casereader_destroy (input);
 917       return;
 918     }
 919   output_split_file_values (ds, &c);
 920   case_destroy (&c);
 921
 922   taint = taint_clone (casereader_get_taint (input));
 923
 924   global_group_hash = hsh_create(4,
 925                                  (hsh_compare_func *) compare_values,
 926                                  (hsh_hash_func *) hash_value,
 927                                  free_value,
 928                                  (void *) var_get_width (indep_var) );
 929
 930   precalc(cmd);
 931
 932   exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
 933   input = casereader_create_filter_missing (input, &indep_var, 1,
 934                                             exclude, NULL);
 935   if (cmd->miss == ONEWAY_LISTWISE)
 936     input = casereader_create_filter_missing (input, vars, n_vars,
 937                                               exclude, NULL);
 938   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 939
 940   reader = casereader_clone (input);
 941   for (; casereader_read (reader, &c); case_destroy (&c))
 942     {
 943       size_t i;
 944
 945       const double weight = dict_get_case_weight (dict, &c, NULL);
 946
 947       const union value *indep_val = case_data (&c, indep_var);
 948       void **p = hsh_probe (global_group_hash, indep_val);
 949       if (*p == NULL)
 950         *p = value_dup (indep_val, var_get_width (indep_var));
 951
 952       for ( i = 0 ; i < n_vars ; ++i )
 953         {
 954           const struct variable *v = vars[i];
 955
 956           const union value *val = case_data (&c, v);
 957
 958           struct group_proc *gp = group_proc_get (vars[i]);
 959           struct hsh_table *group_hash = gp->group_hash;
 960
 961           struct group_statistics *gs;
 962
 963           gs = hsh_find(group_hash, (void *) indep_val );
 964
 965           if ( ! gs )
 966             {
 967               gs = xmalloc (sizeof *gs);
 968               gs->id = *indep_val;
 969               gs->sum=0;
 970               gs->n=0;
 971               gs->ssq=0;
 972               gs->sum_diff=0;
 973               gs->minimum = DBL_MAX;
 974               gs->maximum = -DBL_MAX;
 975
 976               hsh_insert ( group_hash, (void *) gs );
 977             }
 978
 979           if (!var_is_value_missing (v, val, exclude))
 980             {
 981               struct group_statistics *totals = &gp->ugs;
 982
 983               totals->n+=weight;
 984               totals->sum+=weight * val->f;
 985               totals->ssq+=weight * val->f * val->f;
 986
 987               if ( val->f * weight  < totals->minimum )
 988                 totals->minimum = val->f * weight;
 989
 990               if ( val->f * weight  > totals->maximum )
 991                 totals->maximum = val->f * weight;
 992
 993               gs->n+=weight;
 994               gs->sum+=weight * val->f;
 995               gs->ssq+=weight * val->f * val->f;
 996
 997               if ( val->f * weight  < gs->minimum )
 998                 gs->minimum = val->f * weight;
 999
1000               if ( val->f * weight  > gs->maximum )
1001                 gs->maximum = val->f * weight;
1002             }
1003
1004           gp->n_groups = hsh_count ( group_hash );
1005         }
1006
1007     }
1008   casereader_destroy (reader);
1009
1010   postcalc(cmd);
1011
1012
1013   if ( stat_tables & STAT_HOMO )
1014     levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude);
1015
1016   casereader_destroy (input);
1017
1018   ostensible_number_of_groups = hsh_count (global_group_hash);
1019
1020   if (!taint_has_tainted_successor (taint))
1021     output_oneway (dict);
1022
1023   taint_destroy (taint);
1024 }
1025
1026
1027 /* Post calculations for the ONEWAY command */
1028 void
1029 postcalc (  struct cmd_oneway *cmd UNUSED )
1030 {
1031   size_t i=0;
1032
1033
1034   for(i = 0; i < n_vars ; ++i)
1035     {
1036       struct group_proc *gp = group_proc_get (vars[i]);
1037       struct hsh_table *group_hash = gp->group_hash;
1038       struct group_statistics *totals = &gp->ugs;
1039
1040       struct hsh_iterator g;
1041       struct group_statistics *gs;
1042
1043       for (gs =  hsh_first (group_hash,&g);
1044            gs != 0;
1045            gs = hsh_next(group_hash,&g))
1046         {
1047           gs->mean=gs->sum / gs->n;
1048           gs->s_std_dev= sqrt(
1049                               ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1050                               ) ;
1051
1052           gs->std_dev= sqrt(
1053                             gs->n/(gs->n-1) *
1054                             ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1055                             ) ;
1056
1057           gs->se_mean = gs->std_dev / sqrt(gs->n);
1058           gs->mean_diff= gs->sum_diff / gs->n;
1059
1060         }
1061
1062
1063
1064       totals->mean = totals->sum / totals->n;
1065       totals->std_dev= sqrt(
1066                             totals->n/(totals->n-1) *
1067                             ( (totals->ssq / totals->n ) - totals->mean * totals->mean )
1068                             ) ;
1069
1070       totals->se_mean = totals->std_dev / sqrt(totals->n);
1071
1072     }
1073 }
1074
1075 /*
1076   Local Variables:
1077   mode: c
1078   End:
1079 */