src/t-test.q

   1 /* PSPP - computes sample statistics. -*-c-*-
   2
   3    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   4    Written by John Williams <johnr.williams@stonebow.otago.ac.nz>.
   5    Almost completly re-written by John Darrington 2004
   6
   7    This program is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU General Public License as
   9    published by the Free Software Foundation; either version 2 of the
  10    License, or (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful, but
  13    WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA. */
  21
  22 #include <config.h>
  23 #include <assert.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <math.h>
  27 #include "alloc.h"
  28 #include "str.h"
  29 #include "dcdflib/cdflib.h"
  30 #include "command.h"
  31 #include "lexer.h"
  32 #include "error.h"
  33 #include "magic.h"
  34 #include "tab.h"
  35 #include "som.h"
  36 #include "value-labels.h"
  37 #include "var.h"
  38 #include "vfm.h"
  39 #include "hash.h"
  40 #include "stats.h"
  41 #include "t-test.h"
  42 #include "levene.h"
  43
  44 /* (specification)
  45    "T-TEST" (tts_):
  46      +groups=custom;
  47      +testval=double;
  48      variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
  49      pairs=custom;
  50      +missing=miss:!analysis/listwise,
  51              incl:include/!exclude;
  52      format=fmt:!labels/nolabels;
  53      criteria=:cin(d:criteria,"%s > 0. && %s < 1.").
  54 */
  55 /* (declarations) */
  56 /* (functions) */
  57
  58
  59 static struct cmd_t_test cmd;
  60
  61 /* Function to use for testing for missing values */
  62 static is_missing_func value_is_missing;
  63
  64 /* Variable for the GROUPS subcommand, if given. */
  65 static struct variable *indep_var;
  66
  67 /* GROUPS: Number of values specified by the user; the values
  68    specified if any. */
  69
  70 static int n_group_values;
  71 static union value groups_values[2];
  72 static enum comparison criteria[2];
  73
  74
  75
  76 /* PAIRS: Number of pairs to be compared ; each pair. */
  77 static int n_pairs = 0 ;
  78 struct pair
  79 {
  80 #if 1
  81   /* The variables comprising the pair */
  82   struct variable *v[2];
  83 #endif
  84
  85   /* The number of valid variable pairs */
  86   double n;
  87
  88   /* The sum of the members */
  89   double sum[2];
  90
  91   /* sum of squares of the members */
  92   double ssq[2];
  93
  94   /* Std deviation of the members */
  95   double std_dev[2];
  96
  97
  98   /* Sample Std deviation of the members */
  99   double s_std_dev[2];
 100
 101   /* The means of the members */
 102   double mean[2];
 103
 104   /* The correlation coefficient between the variables */
 105   double correlation;
 106
 107   /* The sum of the differences */
 108   double sum_of_diffs;
 109
 110   /* The sum of the products */
 111   double sum_of_prod;
 112
 113   /* The mean of the differences */
 114   double mean_diff;
 115
 116   /* The sum of the squares of the differences */
 117   double ssq_diffs;
 118
 119   /* The std deviation of the differences */
 120   double std_dev_diff;
 121 };
 122
 123 static struct pair *pairs=0;
 124
 125 static int parse_value (union value * v, int type) ;
 126
 127 /* Structures and Functions for the Statistics Summary Box */
 128 struct ssbox;
 129 typedef void populate_ssbox_func(struct ssbox *ssb,
 130                                             struct cmd_t_test *cmd);
 131 typedef void finalize_ssbox_func(struct ssbox *ssb);
 132
 133 struct ssbox
 134 {
 135   struct tab_table *t;
 136
 137   populate_ssbox_func *populate;
 138   finalize_ssbox_func *finalize;
 139
 140 };
 141
 142 /* Create a ssbox */
 143 void ssbox_create(struct ssbox *ssb,   struct cmd_t_test *cmd, int mode);
 144
 145 /* Populate a ssbox according to cmd */
 146 void ssbox_populate(struct ssbox *ssb, struct cmd_t_test *cmd);
 147
 148 /* Submit and destroy a ssbox */
 149 void ssbox_finalize(struct ssbox *ssb);
 150
 151 /* A function to create, populate and submit the Paired Samples Correlation
 152    box */
 153 void pscbox(void);
 154
 155
 156 /* Structures and Functions for the Test Results Box */
 157 struct trbox;
 158
 159 typedef void populate_trbox_func(struct trbox *trb,
 160                                  struct cmd_t_test *cmd);
 161 typedef void finalize_trbox_func(struct trbox *trb);
 162
 163 struct trbox {
 164   struct tab_table *t;
 165   populate_trbox_func *populate;
 166   finalize_trbox_func *finalize;
 167 };
 168
 169 /* Create a trbox */
 170 void trbox_create(struct trbox *trb,   struct cmd_t_test *cmd, int mode);
 171
 172 /* Populate a ssbox according to cmd */
 173 void trbox_populate(struct trbox *trb, struct cmd_t_test *cmd);
 174
 175 /* Submit and destroy a ssbox */
 176 void trbox_finalize(struct trbox *trb);
 177
 178 /* Which mode was T-TEST invoked */
 179 enum {
 180   T_1_SAMPLE = 0 ,
 181   T_IND_SAMPLES,
 182   T_PAIRED
 183 };
 184
 185
 186 static int common_calc (struct ccase *, void *);
 187 static void common_precalc (void *);
 188 static void common_postcalc (void *);
 189
 190 static int one_sample_calc (struct ccase *, void *);
 191 static void one_sample_precalc (void *);
 192 static void one_sample_postcalc (void *);
 193
 194 static int  paired_calc (struct ccase *, void *);
 195 static void paired_precalc (void *);
 196 static void paired_postcalc (void *);
 197
 198 static void group_precalc (void *);
 199 static int  group_calc (struct ccase *, void *);
 200 static void group_postcalc (void *);
 201
 202
 203 static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED);
 204 static unsigned hash_var_name (const void *a_, void *v_ UNUSED);
 205
 206
 207
 208 int
 209 cmd_t_test(void)
 210 {
 211   int mode;
 212
 213   struct ssbox stat_summary_box;
 214   struct trbox test_results_box;
 215
 216   if (!lex_force_match_id ("T"))
 217     return CMD_FAILURE;
 218
 219   lex_match ('-');
 220   lex_match_id ("TEST");
 221
 222   if ( !parse_t_test(&cmd) )
 223     return CMD_FAILURE;
 224
 225   if (! cmd.sbc_criteria)
 226     cmd.criteria=0.95;
 227
 228   {
 229     int m=0;
 230     if (cmd.sbc_testval) ++m;
 231     if (cmd.sbc_groups) ++m;
 232     if (cmd.sbc_pairs) ++m;
 233
 234     if ( m != 1)
 235       {
 236         msg(SE,
 237             _("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.")
 238             );
 239         return CMD_FAILURE;
 240       }
 241   }
 242
 243   if (cmd.sbc_testval)
 244     mode=T_1_SAMPLE;
 245   else if (cmd.sbc_groups)
 246     mode=T_IND_SAMPLES;
 247   else
 248     mode=T_PAIRED;
 249
 250   if ( mode == T_PAIRED)
 251     {
 252       if (cmd.sbc_variables)
 253         {
 254           msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS"));
 255           return CMD_FAILURE;
 256         }
 257       else
 258         {
 259           /* Iterate through the pairs and put each variable that is a
 260              member of a pair into cmd.v_variables */
 261
 262           int i;
 263           struct hsh_iterator hi;
 264           struct hsh_table *hash;
 265           struct variable *v;
 266
 267           hash=hsh_create(n_pairs,compare_var_name,hash_var_name,0,0);
 268
 269           for (i=0; i < n_pairs; ++i)
 270             {
 271               hsh_insert(hash,pairs[i].v[0]);
 272               hsh_insert(hash,pairs[i].v[1]);
 273             }
 274
 275           assert(cmd.n_variables == 0);
 276           cmd.n_variables = hsh_count(hash);
 277
 278           cmd.v_variables = xrealloc(cmd.v_variables,
 279                                      sizeof(struct variable) * cmd.n_variables);
 280           /* Iterate through the hash */
 281           for (i=0,v = (struct variable *) hsh_first(hash,&hi);
 282                v != 0;
 283                v=hsh_next(hash,&hi) )
 284             cmd.v_variables[i++]=v;
 285
 286           hsh_destroy(hash);
 287         }
 288     }
 289
 290   /* If /MISSING=INCLUDE is set, then user missing values are ignored */
 291   if (cmd.incl == TTS_INCLUDE )
 292     value_is_missing = is_system_missing;
 293   else
 294     value_is_missing = is_missing;
 295
 296   procedure_with_splits (common_precalc, common_calc, common_postcalc, NULL);
 297
 298   switch(mode)
 299     {
 300     case T_1_SAMPLE:
 301       procedure_with_splits (one_sample_precalc, one_sample_calc,
 302                              one_sample_postcalc, NULL);
 303       break;
 304     case T_PAIRED:
 305       procedure_with_splits (paired_precalc, paired_calc, paired_postcalc,
 306                              NULL);
 307       break;
 308     case T_IND_SAMPLES:
 309       procedure_with_splits(group_precalc,group_calc,group_postcalc, NULL);
 310       levene(indep_var, cmd.n_variables, cmd.v_variables,
 311              (cmd.miss == TTS_LISTWISE)?LEV_LISTWISE:LEV_ANALYSIS ,
 312              value_is_missing);
 313       break;
 314     }
 315
 316   ssbox_create(&stat_summary_box,&cmd,mode);
 317   ssbox_populate(&stat_summary_box,&cmd);
 318   ssbox_finalize(&stat_summary_box);
 319
 320   if ( mode == T_PAIRED)
 321       pscbox();
 322
 323   trbox_create(&test_results_box,&cmd,mode);
 324   trbox_populate(&test_results_box,&cmd);
 325   trbox_finalize(&test_results_box);
 326
 327   n_pairs=0;
 328   free(pairs);
 329   pairs=0;
 330
 331   if ( mode == T_IND_SAMPLES)
 332     {
 333       int i;
 334       /* Destroy any group statistics we created */
 335       for (i= 0 ; i < cmd.n_variables ; ++i )
 336         {
 337           free(cmd.v_variables[i]->p.t_t.gs);
 338         }
 339     }
 340
 341   return CMD_SUCCESS;
 342 }
 343
 344 static int
 345 tts_custom_groups (struct cmd_t_test *cmd UNUSED)
 346 {
 347
 348   lex_match('=');
 349
 350   if (token != T_ALL &&
 351       (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
 352      )
 353   {
 354     msg(SE,_("`%s' is not a variable name"),tokid);
 355     return 0;
 356   }
 357
 358   indep_var = parse_variable ();
 359   if (!indep_var)
 360     {
 361       lex_error ("expecting variable name in GROUPS subcommand");
 362       return 0;
 363     }
 364
 365   if (indep_var->type == T_STRING && indep_var->width > MAX_SHORT_STRING)
 366     {
 367       msg (SE, _("Long string variable %s is not valid here."),
 368            indep_var->name);
 369       return 0;
 370     }
 371
 372   if (!lex_match ('('))
 373     {
 374       if (indep_var->type == NUMERIC)
 375         {
 376           groups_values[0].f = 1;
 377           groups_values[1].f = 2;
 378           criteria[0] = criteria[1] = CMP_EQ;
 379           n_group_values = 2;
 380           return 1;
 381         }
 382       else
 383         {
 384           msg (SE, _("When applying GROUPS to a string variable, at "
 385                      "least one value must be specified."));
 386           return 0;
 387         }
 388     }
 389
 390   if (!parse_value (&groups_values[0],indep_var->type))
 391       return 0;
 392
 393   lex_match (',');
 394   if (lex_match (')'))
 395     {
 396       criteria[0] =  CMP_LE;
 397       criteria[1] =  CMP_GT;
 398       groups_values[1] = groups_values[0];
 399       n_group_values = 1;
 400       return 1;
 401     }
 402
 403   if (!parse_value (&groups_values[1],indep_var->type))
 404     return 0;
 405
 406   n_group_values = 2;
 407   if (!lex_force_match (')'))
 408     return 0;
 409
 410   criteria[0] = criteria[1] = CMP_EQ;
 411   return 1;
 412 }
 413
 414
 415 static int
 416 tts_custom_pairs (struct cmd_t_test *cmd UNUSED)
 417 {
 418   struct variable **vars;
 419   int n_vars;
 420   int n_pairs_local;
 421
 422   int n_before_WITH ;
 423   int n_after_WITH = -1;
 424   int paired ; /* Was the PAIRED keyword given ? */
 425
 426   lex_match('=');
 427
 428   if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
 429       && token != T_ALL)
 430     {
 431       msg(SE,_("`%s' is not a variable name"),tokid);
 432       return 0;
 433     }
 434
 435   n_vars=0;
 436   if (!parse_variables (default_dict, &vars, &n_vars,
 437                         PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
 438     {
 439       free (vars);
 440       return 0;
 441     }
 442   assert (n_vars);
 443
 444   n_before_WITH=0;
 445   if (lex_match (T_WITH))
 446     {
 447       n_before_WITH = n_vars;
 448       if (!parse_variables (default_dict, &vars, &n_vars,
 449                             PV_DUPLICATE | PV_APPEND
 450                             | PV_NUMERIC | PV_NO_SCRATCH))
 451         {
 452           free (vars);
 453           return 0;
 454         }
 455       n_after_WITH = n_vars - n_before_WITH;
 456     }
 457
 458   paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')'));
 459
 460   /* Determine the number of pairs needed */
 461   if (paired)
 462     {
 463       if (n_before_WITH != n_after_WITH)
 464         {
 465           free (vars);
 466           msg (SE, _("PAIRED was specified but the number of variables "
 467                      "preceding WITH (%d) did not match the number "
 468                      "following (%d)."),
 469                n_before_WITH, n_after_WITH );
 470           return 0;
 471         }
 472       n_pairs_local=n_before_WITH;
 473     }
 474   else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */
 475     {
 476       n_pairs_local=n_before_WITH * n_after_WITH ;
 477     }
 478   else /* Neither WITH nor PAIRED keyword given */
 479     {
 480       if (n_vars < 2)
 481         {
 482           free (vars);
 483           msg (SE, _("At least two variables must be specified "
 484                      "on PAIRS."));
 485           return 0;
 486         }
 487
 488       /* how many ways can you pick 2 from n_vars ? */
 489       n_pairs_local = n_vars * (n_vars -1 ) /2 ;
 490     }
 491
 492
 493   /* Allocate storage for the pairs */
 494   pairs = xrealloc(pairs, sizeof(struct pair) * (n_pairs + n_pairs_local) );
 495
 496   /* Populate the pairs with the appropriate variables */
 497   if ( paired )
 498     {
 499       int i;
 500
 501       assert(n_pairs_local == n_vars/2);
 502       for (i = 0; i < n_pairs_local ; ++i)
 503         {
 504           pairs[i].v[n_pairs+0] = vars[i];
 505           pairs[i].v[n_pairs+1] = vars[i+n_pairs_local];
 506         }
 507     }
 508   else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */
 509     {
 510       int i,j;
 511       int p=n_pairs;
 512
 513       for(i=0 ; i < n_before_WITH ; ++i )
 514         {
 515           for(j=0 ; j < n_after_WITH ; ++j)
 516             {
 517               pairs[p].v[0] = vars[i];
 518               pairs[p].v[1] = vars[j+n_before_WITH];
 519               ++p;
 520             }
 521         }
 522     }
 523   else /* Neither WITH nor PAIRED given */
 524     {
 525       int i,j;
 526       int p=n_pairs;
 527
 528       for(i=0 ; i < n_vars ; ++i )
 529         {
 530           for(j=i+1 ; j < n_vars ; ++j)
 531             {
 532               pairs[p].v[0] = vars[i];
 533               pairs[p].v[1] = vars[j];
 534               ++p;
 535             }
 536         }
 537     }
 538
 539   n_pairs+=n_pairs_local;
 540
 541   return 1;
 542 }
 543
 544 /* Parses the current token (numeric or string, depending on type)
 545     value v and returns success. */
 546 static int
 547 parse_value (union value * v, int type )
 548 {
 549   if (type == NUMERIC)
 550     {
 551       if (!lex_force_num ())
 552         return 0;
 553       v->f = tokval;
 554     }
 555   else
 556     {
 557       if (!lex_force_string ())
 558         return 0;
 559       strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr));
 560     }
 561
 562   lex_get ();
 563
 564   return 1;
 565 }
 566
 567
 568 /* Implementation of the SSBOX object */
 569
 570 void ssbox_base_init(struct ssbox *this, int cols,int rows);
 571
 572 void ssbox_base_finalize(struct ssbox *ssb);
 573
 574 void ssbox_one_sample_init(struct ssbox *this,
 575                            struct cmd_t_test *cmd );
 576
 577 void ssbox_independent_samples_init(struct ssbox *this,
 578                                     struct cmd_t_test *cmd);
 579
 580 void ssbox_paired_init(struct ssbox *this,
 581                            struct cmd_t_test *cmd);
 582
 583 /* Factory to create an ssbox */
 584 void
 585 ssbox_create(struct ssbox *ssb, struct cmd_t_test *cmd, int mode)
 586 {
 587     switch (mode)
 588       {
 589       case T_1_SAMPLE:
 590         ssbox_one_sample_init(ssb,cmd);
 591         break;
 592       case T_IND_SAMPLES:
 593         ssbox_independent_samples_init(ssb,cmd);
 594         break;
 595       case T_PAIRED:
 596         ssbox_paired_init(ssb,cmd);
 597         break;
 598       default:
 599         assert(0);
 600       }
 601 }
 602
 603
 604 /* Despatcher for the populate method */
 605 void
 606 ssbox_populate(struct ssbox *ssb,struct cmd_t_test *cmd)
 607 {
 608   ssb->populate(ssb,cmd);
 609 }
 610
 611
 612 /* Despatcher for finalize */
 613 void
 614 ssbox_finalize(struct ssbox *ssb)
 615 {
 616   ssb->finalize(ssb);
 617 }
 618
 619
 620 /* Submit the box and clear up */
 621 void
 622 ssbox_base_finalize(struct ssbox *ssb)
 623 {
 624   tab_submit(ssb->t);
 625 }
 626
 627 /* Initialize a ssbox struct */
 628 void
 629 ssbox_base_init(struct ssbox *this, int cols,int rows)
 630 {
 631   this->finalize = ssbox_base_finalize;
 632   this->t = tab_create (cols, rows, 0);
 633
 634   tab_columns (this->t, SOM_COL_DOWN, 1);
 635   tab_headers (this->t,0,0,1,0);
 636   tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 );
 637   tab_hline(this->t, TAL_2,0,cols-1,1);
 638   tab_dim (this->t, tab_natural_dimensions);
 639 }
 640
 641 void  ssbox_one_sample_populate(struct ssbox *ssb,
 642                               struct cmd_t_test *cmd);
 643
 644 /* Initialize the one_sample ssbox */
 645 void
 646 ssbox_one_sample_init(struct ssbox *this,
 647                            struct cmd_t_test *cmd )
 648 {
 649   const int hsize=5;
 650   const int vsize=cmd->n_variables+1;
 651
 652   this->populate = ssbox_one_sample_populate;
 653
 654   ssbox_base_init(this, hsize,vsize);
 655   tab_title (this->t, 0, _("One-Sample Statistics"));
 656   tab_vline(this->t, TAL_2, 1,0,vsize);
 657   tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N"));
 658   tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
 659   tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 660   tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
 661 }
 662
 663 void ssbox_independent_samples_populate(struct ssbox *ssb,
 664                                         struct cmd_t_test *cmd);
 665
 666 /* Initialize the independent samples ssbox */
 667 void
 668 ssbox_independent_samples_init(struct ssbox *this,
 669         struct cmd_t_test *cmd)
 670 {
 671   int hsize=6;
 672   int vsize = cmd->n_variables*2 +1;
 673
 674   this->populate = ssbox_independent_samples_populate;
 675
 676   ssbox_base_init(this, hsize,vsize);
 677   tab_title (this->t, 0, _("Group Statistics"));
 678   tab_vline(this->t,0,1,0,vsize);
 679   tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, indep_var->name);
 680   tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
 681   tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
 682   tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 683   tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
 684 }
 685
 686
 687 /* Populate the ssbox for independent samples */
 688 void
 689 ssbox_independent_samples_populate(struct ssbox *ssb,
 690                               struct cmd_t_test *cmd)
 691 {
 692   int i;
 693
 694   char *val_lab0=0;
 695   char *val_lab1=0;
 696
 697   char prefix[2][3]={"",""};
 698
 699   if ( indep_var->type == NUMERIC )
 700     {
 701       val_lab0 = val_labs_find( indep_var->val_labs,groups_values[0]);
 702       val_lab1 = val_labs_find( indep_var->val_labs,groups_values[1]);
 703     }
 704   else
 705     {
 706       val_lab0 = groups_values[0].s;
 707       val_lab1 = groups_values[1].s;
 708     }
 709
 710   if (n_group_values == 1)
 711     {
 712       strcpy(prefix[0],"< ");
 713       strcpy(prefix[1],">=");
 714     }
 715
 716   assert(ssb->t);
 717
 718   for (i=0; i < cmd->n_variables; ++i)
 719     {
 720       int g;
 721
 722       tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name);
 723
 724       if (val_lab0)
 725         tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
 726                   "%s%s", prefix[0], val_lab0);
 727       else
 728         tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
 729                   "%s%g", prefix[0], groups_values[0].f);
 730
 731
 732       if (val_lab1)
 733         tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
 734                   "%s%s", prefix[1], val_lab1);
 735       else
 736         tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
 737                   "%s%g", prefix[1], groups_values[1].f);
 738
 739       /* Fill in the group statistics */
 740       for ( g=0; g < 2 ; ++g )
 741         {
 742           struct group_statistics *gs = &cmd->v_variables[i]->p.t_t.gs[g];
 743
 744           tab_float(ssb->t, 2 ,i*2+g+1, TAB_RIGHT, gs->n, 2, 0);
 745           tab_float(ssb->t, 3 ,i*2+g+1, TAB_RIGHT, gs->mean, 8, 2);
 746           tab_float(ssb->t, 4 ,i*2+g+1, TAB_RIGHT, gs->std_dev, 8, 3);
 747           tab_float(ssb->t, 5 ,i*2+g+1, TAB_RIGHT, gs->se_mean, 8, 3);
 748         }
 749     }
 750 }
 751
 752
 753 void ssbox_paired_populate(struct ssbox *ssb,
 754                            struct cmd_t_test *cmd);
 755
 756 /* Initialize the paired values ssbox */
 757 void
 758 ssbox_paired_init(struct ssbox *this, struct cmd_t_test *cmd UNUSED)
 759 {
 760   int hsize=6;
 761
 762   int vsize = n_pairs*2+1;
 763
 764   this->populate = ssbox_paired_populate;
 765
 766   ssbox_base_init(this, hsize,vsize);
 767   tab_title (this->t, 0, _("Paired Sample Statistics"));
 768   tab_vline(this->t,TAL_0,1,0,vsize-1);
 769   tab_vline(this->t,TAL_2,2,0,vsize-1);
 770   tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
 771   tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
 772   tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
 773   tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
 774 }
 775
 776
 777 /* Populate the ssbox for paired values */
 778 void
 779 ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED)
 780 {
 781   int i;
 782
 783   assert(ssb->t);
 784
 785   for (i=0; i < n_pairs; ++i)
 786     {
 787       int j;
 788
 789       tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _("Pair %d"),i);
 790
 791       for (j=0 ; j < 2 ; ++j)
 792         {
 793           struct group_statistics *gs;
 794
 795           gs=&pairs[i].v[j]->p.t_t.ugs;
 796
 797           /* Titles */
 798
 799           tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, pairs[i].v[j]->name);
 800
 801           /* Values */
 802           tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2);
 803           tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 2, 0);
 804           tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3);
 805           tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt(pairs[i].n), 8, 3);
 806
 807         }
 808     }
 809 }
 810
 811 /* Populate the one sample ssbox */
 812 void
 813 ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd)
 814 {
 815   int i;
 816
 817   assert(ssb->t);
 818
 819   for (i=0; i < cmd->n_variables; ++i)
 820     {
 821       struct group_statistics *gs;
 822       gs= &cmd->v_variables[i]->p.t_t.ugs;
 823
 824       tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name);
 825       tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0);
 826       tab_float (ssb->t,2, i+1, TAB_RIGHT, gs->mean, 8, 2);
 827       tab_float (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, 8, 2);
 828       tab_float (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, 8, 3);
 829     }
 830
 831 }
 832
 833
 834
 835 /* Implementation of the Test Results box struct */
 836
 837 void trbox_base_init(struct trbox *self,int n_vars, int cols);
 838 void trbox_base_finalize(struct trbox *trb);
 839
 840 void trbox_independent_samples_init(struct trbox *trb,
 841                                     struct cmd_t_test *cmd );
 842
 843 void trbox_independent_samples_populate(struct trbox *trb,
 844                                         struct cmd_t_test *cmd);
 845
 846 void trbox_one_sample_init(struct trbox *self,
 847                       struct cmd_t_test *cmd );
 848
 849 void trbox_one_sample_populate(struct trbox *trb,
 850                                struct cmd_t_test *cmd);
 851
 852 void trbox_paired_init(struct trbox *self,
 853                        struct cmd_t_test *cmd );
 854
 855 void trbox_paired_populate(struct trbox *trb,
 856                       struct cmd_t_test *cmd);
 857
 858
 859
 860 /* Create a trbox according to mode*/
 861 void
 862 trbox_create(struct trbox *trb,
 863              struct cmd_t_test *cmd, int mode)
 864 {
 865     switch (mode)
 866       {
 867       case T_1_SAMPLE:
 868         trbox_one_sample_init(trb,cmd);
 869         break;
 870       case T_IND_SAMPLES:
 871         trbox_independent_samples_init(trb,cmd);
 872         break;
 873       case T_PAIRED:
 874         trbox_paired_init(trb,cmd);
 875         break;
 876       default:
 877         assert(0);
 878       }
 879 }
 880
 881 /* Populate a trbox according to cmd */
 882 void
 883 trbox_populate(struct trbox *trb, struct cmd_t_test *cmd)
 884 {
 885   trb->populate(trb,cmd);
 886 }
 887
 888 /* Submit and destroy a trbox */
 889 void
 890 trbox_finalize(struct trbox *trb)
 891 {
 892   trb->finalize(trb);
 893 }
 894
 895 /* Initialize the independent samples trbox */
 896 void
 897 trbox_independent_samples_init(struct trbox *self,
 898                            struct cmd_t_test *cmd UNUSED)
 899 {
 900   const int hsize=11;
 901   const int vsize=cmd->n_variables*2+3;
 902
 903   assert(self);
 904   self->populate = trbox_independent_samples_populate;
 905
 906   trbox_base_init(self,cmd->n_variables*2,hsize);
 907   tab_title(self->t,0,_("Independent Samples Test"));
 908   tab_hline(self->t,TAL_1,2,hsize-1,1);
 909   tab_vline(self->t,TAL_2,2,0,vsize-1);
 910   tab_vline(self->t,TAL_1,4,0,vsize-1);
 911   tab_box(self->t,-1,-1,-1,TAL_1, 2,1,hsize-2,vsize-1);
 912   tab_hline(self->t,TAL_1, hsize-2,hsize-1,2);
 913   tab_box(self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1);
 914   tab_joint_text(self->t, 2, 0, 3, 0,
 915                  TAB_CENTER,_("Levene's Test for Equality of Variances"));
 916   tab_joint_text(self->t, 4,0,hsize-1,0,
 917                  TAB_CENTER,_("t-test for Equality of Means"));
 918
 919   tab_text(self->t,2,2, TAB_CENTER | TAT_TITLE,_("F"));
 920   tab_text(self->t,3,2, TAB_CENTER | TAT_TITLE,_("Sig."));
 921   tab_text(self->t,4,2, TAB_CENTER | TAT_TITLE,_("t"));
 922   tab_text(self->t,5,2, TAB_CENTER | TAT_TITLE,_("df"));
 923   tab_text(self->t,6,2, TAB_CENTER | TAT_TITLE,_("Sig. (2-tailed)"));
 924   tab_text(self->t,7,2, TAB_CENTER | TAT_TITLE,_("Mean Difference"));
 925   tab_text(self->t,8,2, TAB_CENTER | TAT_TITLE,_("Std. Error Difference"));
 926   tab_text(self->t,9,2, TAB_CENTER | TAT_TITLE,_("Lower"));
 927   tab_text(self->t,10,2, TAB_CENTER | TAT_TITLE,_("Upper"));
 928
 929   tab_joint_text(self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF,
 930                  _("%d%% Confidence Interval of the Difference"),
 931                  (int)round(cmd->criteria*100.0));
 932
 933 }
 934
 935 /* Populate the independent samples trbox */
 936 void
 937 trbox_independent_samples_populate(struct trbox *self,
 938                                    struct cmd_t_test *cmd )
 939 {
 940   int i;
 941
 942   assert(self);
 943   for (i=0; i < cmd->n_variables; ++i)
 944     {
 945       int which =1;
 946       double p,q;
 947       int status;
 948       double bound;
 949
 950       double t;
 951       double df;
 952
 953       double df1, df2;
 954
 955       double pooled_variance;
 956       double std_err_diff;
 957       double mean_diff;
 958
 959       struct group_statistics *gs0 = &cmd->v_variables[i]->p.t_t.gs[0];
 960       struct group_statistics *gs1 = &cmd->v_variables[i]->p.t_t.gs[1];
 961
 962       tab_text (self->t, 0, i*2+3, TAB_LEFT, cmd->v_variables[i]->name);
 963
 964       tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed"));
 965
 966
 967       tab_float(self->t, 2, i*2+3, TAB_CENTER,
 968                 cmd->v_variables[i]->p.t_t.levene, 8,3);
 969
 970
 971       /* Now work out the significance of the Levene test */
 972
 973       which=1; df1 = 1; df2 = cmd->v_variables[i]->p.t_t.ugs.n - 2;
 974       cdff(&which,&p,&q,&cmd->v_variables[i]->p.t_t.levene,
 975            &df1,&df2,&status,&bound);
 976
 977       if ( 0 != status )
 978         {
 979           msg( SE, _("Error calculating F statistic (cdff returned %d)."),status);
 980         }
 981
 982       tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 );
 983
 984       df = gs0->n + gs1->n - 2.0 ;
 985       tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0);
 986
 987       pooled_variance = ( (gs0->n )*sqr(gs0->s_std_dev)
 988                           +
 989                           (gs1->n )*sqr(gs1->s_std_dev)
 990                         ) / df  ;
 991
 992       t = (gs0->mean - gs1->mean) / sqrt(pooled_variance) ;
 993       t /= sqrt((gs0->n + gs1->n)/(gs0->n*gs1->n));
 994
 995       tab_float (self->t, 4, i*2+3, TAB_RIGHT, t, 8, 3);
 996
 997
 998       which=1; /* get p & q from t & df */
 999       cdft(&which, &p, &q, &t, &df, &status, &bound);
1000       if ( 0 != status )
1001         {
1002           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1003         }
1004
1005       tab_float(self->t, 6, i*2+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3);
1006
1007       mean_diff = gs0->mean - gs1->mean;
1008       tab_float(self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3);
1009
1010
1011       std_err_diff = sqrt( sqr(gs0->se_mean) + sqr(gs1->se_mean));
1012       tab_float(self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3);
1013
1014
1015       /* Now work out the confidence interval */
1016       q = (1 - cmd->criteria)/2.0;  /* 2-tailed test */
1017       p = 1 - q ;
1018       which=2; /* Calc T from p,q and df */
1019       cdft(&which, &p, &q, &t, &df, &status, &bound);
1020       if ( 0 != status )
1021         {
1022           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1023         }
1024
1025       tab_float(self->t, 9, i*2+3, TAB_RIGHT,
1026                 mean_diff - t * std_err_diff, 8, 3);
1027
1028       tab_float(self->t, 10, i*2+3, TAB_RIGHT,
1029                 mean_diff + t * std_err_diff, 8, 3);
1030
1031
1032       {
1033         double se2;
1034       /* Now for the \sigma_1 != \sigma_2 case */
1035       tab_text (self->t, 1, i*2+3+1,
1036                 TAB_LEFT, _("Equal variances not assumed"));
1037
1038
1039       se2 = (sqr(gs0->s_std_dev)/(gs0->n -1) ) +
1040         (sqr(gs1->s_std_dev)/(gs1->n -1) );
1041
1042       t = mean_diff / sqrt(se2) ;
1043       tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3);
1044
1045       df = sqr(se2) / (
1046                        (sqr(sqr(gs0->s_std_dev)/(gs0->n - 1 ))
1047                         /(gs0->n -1 )
1048                         )
1049                        +
1050                        (sqr(sqr(gs1->s_std_dev)/(gs1->n - 1 ))
1051                         /(gs1->n -1 )
1052                         )
1053                        ) ;
1054       tab_float (self->t, 5, i*2+3+1, TAB_RIGHT, df, 8, 3);
1055
1056       which=1; /* get p & q from t & df */
1057       cdft(&which, &p, &q, &t, &df, &status, &bound);
1058       if ( 0 != status )
1059         {
1060           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1061         }
1062
1063       tab_float(self->t, 6, i*2+3+1, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3);
1064
1065       /* Now work out the confidence interval */
1066       q = (1 - cmd->criteria)/2.0;  /* 2-tailed test */
1067       p = 1 - q ;
1068       which=2; /* Calc T from p,q and df */
1069       cdft(&which, &p, &q, &t, &df, &status, &bound);
1070       if ( 0 != status )
1071         {
1072           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1073         }
1074
1075
1076       tab_float(self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, 8, 3);
1077
1078
1079       tab_float(self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, 8, 3);
1080
1081
1082       tab_float(self->t, 9, i*2+3+1, TAB_RIGHT,
1083                 mean_diff - t * std_err_diff, 8, 3);
1084
1085       tab_float(self->t, 10, i*2+3+1, TAB_RIGHT,
1086                 mean_diff + t * std_err_diff, 8, 3);
1087
1088       }
1089     }
1090 }
1091
1092 /* Initialize the paired samples trbox */
1093 void
1094 trbox_paired_init(struct trbox *self,
1095                            struct cmd_t_test *cmd UNUSED)
1096 {
1097
1098   const int hsize=10;
1099   const int vsize=n_pairs+3;
1100
1101   self->populate = trbox_paired_populate;
1102
1103   trbox_base_init(self,n_pairs,hsize);
1104   tab_title (self->t, 0, _("Paired Samples Test"));
1105   tab_hline(self->t,TAL_1,2,6,1);
1106   tab_vline(self->t,TAL_2,2,0,vsize);
1107   tab_joint_text(self->t,2,0,6,0,TAB_CENTER,_("Paired Differences"));
1108   tab_box(self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1);
1109   tab_box(self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1);
1110   tab_hline(self->t,TAL_1,5,6, 2);
1111   tab_vline(self->t,TAL_0,6,0,1);
1112
1113   tab_joint_text(self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
1114                  _("%d%% Confidence Interval of the Difference"),
1115                  (int)round(cmd->criteria*100.0));
1116
1117   tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
1118   tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
1119   tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean"));
1120   tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
1121   tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
1122   tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t"));
1123   tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df"));
1124   tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
1125 }
1126
1127 /* Populate the paired samples trbox */
1128 void
1129 trbox_paired_populate(struct trbox *trb,
1130                               struct cmd_t_test *cmd UNUSED)
1131 {
1132   int i;
1133
1134   for (i=0; i < n_pairs; ++i)
1135     {
1136       int which =1;
1137       double p,q;
1138       int status;
1139       double bound;
1140       double se_mean;
1141
1142       double n = pairs[i].n;
1143       double t;
1144       double df = n - 1;
1145
1146       tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _("Pair %d"),i);
1147
1148       tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s",
1149                 pairs[i].v[0]->name, pairs[i].v[1]->name);
1150
1151       tab_float(trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, 8, 4);
1152
1153       tab_float(trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, 8, 5);
1154
1155       /* SE Mean */
1156       se_mean = pairs[i].std_dev_diff / sqrt(n) ;
1157       tab_float(trb->t, 4, i+3, TAB_RIGHT, se_mean, 8,5 );
1158
1159       /* Now work out the confidence interval */
1160       q = (1 - cmd->criteria)/2.0;  /* 2-tailed test */
1161       p = 1 - q ;
1162       which=2; /* Calc T from p,q and df */
1163       cdft(&which, &p, &q, &t, &df, &status, &bound);
1164
1165       if ( 0 != status )
1166         {
1167           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1168         }
1169
1170       tab_float(trb->t, 5, i+3, TAB_RIGHT,
1171                 pairs[i].mean_diff - t * se_mean , 8, 4);
1172
1173       tab_float(trb->t, 6, i+3, TAB_RIGHT,
1174                 pairs[i].mean_diff + t * se_mean , 8, 4);
1175
1176       t = (pairs[i].mean[0] - pairs[i].mean[1])
1177         / sqrt (
1178                 ( sqr (pairs[i].s_std_dev[0]) + sqr (pairs[i].s_std_dev[1]) -
1179                   2 * pairs[i].correlation *
1180                   pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] )
1181                 / (n - 1)
1182                 );
1183
1184       tab_float(trb->t, 7, i+3, TAB_RIGHT, t , 8,3 );
1185
1186       /* Degrees of freedom */
1187       tab_float(trb->t, 8, i+3, TAB_RIGHT, df , 2, 0 );
1188
1189       which=1;
1190       cdft(&which, &p, &q, &t, &df, &status, &bound);
1191       if ( 0 != status )
1192         {
1193           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1194         }
1195
1196
1197       tab_float(trb->t, 9, i+3, TAB_RIGHT, 2.0*(t>0?q:p) , 8, 3);
1198
1199     }
1200 }
1201
1202 /* Initialize the one sample trbox */
1203 void
1204 trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd )
1205 {
1206   const int hsize=7;
1207   const int vsize=cmd->n_variables+3;
1208
1209   self->populate = trbox_one_sample_populate;
1210
1211   trbox_base_init(self, cmd->n_variables,hsize);
1212   tab_title (self->t, 0, _("One-Sample Test"));
1213   tab_hline(self->t, TAL_1, 1, hsize - 1, 1);
1214   tab_vline(self->t, TAL_2, 1, 0, vsize);
1215
1216   tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF,
1217                  _("Test Value = %f"),cmd->n_testval);
1218
1219   tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1);
1220
1221
1222   tab_joint_text(self->t,5,1,6,1,TAB_CENTER  | TAT_PRINTF,
1223                  _("%d%% Confidence Interval of the Difference"),
1224                  (int)round(cmd->criteria*100.0));
1225
1226   tab_vline(self->t,TAL_0,6,1,1);
1227   tab_hline(self->t,TAL_1,5,6,2);
1228   tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t"));
1229   tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df"));
1230   tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
1231   tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
1232   tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
1233   tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
1234
1235 }
1236
1237
1238 /* Populate the one sample trbox */
1239 void
1240 trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd)
1241 {
1242   int i;
1243
1244   assert(trb->t);
1245
1246   for (i=0; i < cmd->n_variables; ++i)
1247     {
1248       int which =1;
1249       double t;
1250       double p,q;
1251       double df;
1252       int status;
1253       double bound;
1254       struct group_statistics *gs;
1255       gs= &cmd->v_variables[i]->p.t_t.ugs;
1256
1257
1258       tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name);
1259
1260       t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ;
1261
1262       tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3);
1263
1264       /* degrees of freedom */
1265       df = gs->n - 1;
1266
1267       tab_float (trb->t, 2, i+3, TAB_RIGHT, df, 8,0);
1268
1269       cdft(&which, &p, &q, &t, &df, &status, &bound);
1270
1271       if ( 0 != status )
1272         {
1273           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1274         }
1275
1276
1277       /* Multiply by 2 to get 2-tailed significance, makeing sure we've got
1278          the correct tail*/
1279       tab_float (trb->t, 3, i+3, TAB_RIGHT, 2.0*(t>0?q:p), 8,3);
1280
1281       tab_float (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, 8,3);
1282
1283
1284       q = (1 - cmd->criteria)/2.0;  /* 2-tailed test */
1285       p = 1 - q ;
1286       which=2; /* Calc T from p,q and df */
1287       cdft(&which, &p, &q, &t, &df, &status, &bound);
1288       if ( 0 != status )
1289         {
1290           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1291         }
1292
1293       tab_float (trb->t, 5, i+3, TAB_RIGHT,
1294                  gs->mean_diff - t * gs->se_mean, 8,4);
1295
1296       tab_float (trb->t, 6, i+3, TAB_RIGHT,
1297                  gs->mean_diff + t * gs->se_mean, 8,4);
1298     }
1299 }
1300
1301 /* Base initializer for the generalized trbox */
1302 void
1303 trbox_base_init(struct trbox *self, int data_rows, int cols)
1304 {
1305   const int rows = 3 + data_rows;
1306
1307   self->finalize = trbox_base_finalize;
1308   self->t = tab_create (cols, rows, 0);
1309   tab_headers (self->t,0,0,3,0);
1310   tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols -1, rows -1);
1311   tab_hline(self->t, TAL_2,0,cols-1,3);
1312   tab_dim (self->t, tab_natural_dimensions);
1313 }
1314
1315
1316 /* Base finalizer for the trbox */
1317 void
1318 trbox_base_finalize(struct trbox *trb)
1319 {
1320   tab_submit(trb->t);
1321 }
1322
1323
1324 /* Create , populate and submit the Paired Samples Correlation box */
1325 void
1326 pscbox(void)
1327 {
1328   const int rows=1+n_pairs;
1329   const int cols=5;
1330   int i;
1331
1332   struct tab_table *table;
1333
1334   table = tab_create (cols,rows,0);
1335
1336   tab_columns (table, SOM_COL_DOWN, 1);
1337   tab_headers (table,0,0,1,0);
1338   tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 );
1339   tab_hline(table, TAL_2, 0, cols - 1, 1);
1340   tab_vline(table, TAL_2, 2, 0, rows - 1);
1341   tab_dim(table, tab_natural_dimensions);
1342   tab_title(table, 0, _("Paired Samples Correlations"));
1343
1344   /* column headings */
1345   tab_text(table, 2,0, TAB_CENTER | TAT_TITLE, _("N"));
1346   tab_text(table, 3,0, TAB_CENTER | TAT_TITLE, _("Correlation"));
1347   tab_text(table, 4,0, TAB_CENTER | TAT_TITLE, _("Sig."));
1348
1349   for (i=0; i < n_pairs; ++i)
1350     {
1351       int which =1;
1352       double p,q;
1353
1354       int status;
1355       double bound;
1356
1357       double df = pairs[i].n -2;
1358
1359       double correlation_t =
1360         pairs[i].correlation * sqrt(df) /
1361         sqrt(1 - sqr(pairs[i].correlation));
1362
1363
1364       /* row headings */
1365       tab_text(table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1366                _("Pair %d"), i);
1367
1368       tab_text(table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1369                _("%s & %s"), pairs[i].v[0]->name, pairs[i].v[1]->name);
1370
1371
1372       /* row data */
1373       tab_float(table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0);
1374       tab_float(table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3);
1375
1376       cdft(&which, &p, &q, &correlation_t, &df, &status, &bound);
1377       if ( 0 != status )
1378         {
1379           msg( SE, _("Error calculating T statistic (cdft returned %d)."),status);
1380         }
1381
1382       tab_float(table, 4, i+1, TAB_RIGHT, 2.0*(correlation_t>0?q:p), 8, 3);
1383     }
1384
1385   tab_submit(table);
1386 }
1387
1388
1389
1390 /* Calculation Implementation */
1391
1392 /* Per case calculations common to all variants of the T test */
1393 static int
1394 common_calc (struct ccase *c, void *aux UNUSED)
1395 {
1396   int i;
1397
1398   double weight = dict_get_case_weight(default_dict,c);
1399
1400
1401   /* Skip the entire case if /MISSING=LISTWISE is set */
1402   if ( cmd.miss == TTS_LISTWISE )
1403     {
1404       for(i=0; i< cmd.n_variables ; ++i)
1405         {
1406           struct variable *v = cmd.v_variables[i];
1407           union value *val = &c->data[v->fv];
1408
1409           if (value_is_missing(val,v) )
1410             {
1411               return 0;
1412             }
1413         }
1414     }
1415
1416   /* Listwise has to be implicit if the independent variable is missing ?? */
1417   if ( cmd.sbc_groups )
1418     {
1419       union value *gv = &c->data[indep_var->fv];
1420       if ( value_is_missing(gv,indep_var) )
1421         {
1422           return 0;
1423         }
1424     }
1425
1426
1427   for(i=0; i< cmd.n_variables ; ++i)
1428     {
1429       struct group_statistics *gs;
1430       struct variable *v = cmd.v_variables[i];
1431       union value *val = &c->data[v->fv];
1432
1433       gs= &cmd.v_variables[i]->p.t_t.ugs;
1434
1435       if (! value_is_missing(val,v) )
1436         {
1437           gs->n+=weight;
1438           gs->sum+=weight * val->f;
1439           gs->ssq+=weight * val->f * val->f;
1440         }
1441     }
1442   return 0;
1443 }
1444
1445 /* Pre calculations common to all variants of the T test */
1446 static void
1447 common_precalc (void *aux UNUSED)
1448 {
1449   int i=0;
1450
1451   for(i=0; i< cmd.n_variables ; ++i)
1452     {
1453       struct group_statistics *gs;
1454       gs= &cmd.v_variables[i]->p.t_t.ugs;
1455
1456       gs->sum=0;
1457       gs->n=0;
1458       gs->ssq=0;
1459       gs->sum_diff=0;
1460     }
1461 }
1462
1463 /* Post calculations common to all variants of the T test */
1464 void
1465 common_postcalc (void *aux UNUSED)
1466 {
1467   int i=0;
1468
1469   for(i=0; i< cmd.n_variables ; ++i)
1470     {
1471       struct group_statistics *gs;
1472       gs= &cmd.v_variables[i]->p.t_t.ugs;
1473
1474       gs->mean=gs->sum / gs->n;
1475       gs->s_std_dev= sqrt(
1476                          ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1477                          ) ;
1478
1479       gs->std_dev= sqrt(
1480                          gs->n/(gs->n-1) *
1481                          ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1482                          ) ;
1483
1484       gs->se_mean = gs->std_dev / sqrt(gs->n);
1485       gs->mean_diff= gs->sum_diff / gs->n;
1486     }
1487 }
1488
1489 /* Per case calculations for one sample t test  */
1490 static int
1491 one_sample_calc (struct ccase *c, void *aux UNUSED)
1492 {
1493   int i;
1494
1495   double weight = dict_get_case_weight(default_dict,c);
1496
1497   /* Skip the entire case if /MISSING=LISTWISE is set */
1498   if ( cmd.miss == TTS_LISTWISE )
1499     {
1500       for(i=0; i< cmd.n_variables ; ++i)
1501         {
1502           struct variable *v = cmd.v_variables[i];
1503           union value *val = &c->data[v->fv];
1504
1505           if (value_is_missing(val,v) )
1506             {
1507               return 0;
1508             }
1509         }
1510     }
1511
1512   for(i=0; i< cmd.n_variables ; ++i)
1513     {
1514       struct group_statistics *gs;
1515       struct variable *v = cmd.v_variables[i];
1516       union value *val = &c->data[v->fv];
1517
1518       gs= &cmd.v_variables[i]->p.t_t.ugs;
1519
1520       if ( ! value_is_missing(val,v))
1521         gs->sum_diff += weight * (val->f - cmd.n_testval);
1522     }
1523
1524   return 0;
1525 }
1526
1527 /* Pre calculations for one sample t test */
1528 static void
1529 one_sample_precalc (void *aux UNUSED)
1530 {
1531   int i=0;
1532
1533   for(i=0; i< cmd.n_variables ; ++i)
1534     {
1535       struct group_statistics *gs;
1536       gs= &cmd.v_variables[i]->p.t_t.ugs;
1537
1538       gs->sum_diff=0;
1539     }
1540 }
1541
1542 /* Post calculations for one sample t test */
1543 static void
1544 one_sample_postcalc (void *aux UNUSED)
1545 {
1546   int i=0;
1547
1548   for(i=0; i< cmd.n_variables ; ++i)
1549     {
1550       struct group_statistics *gs;
1551       gs= &cmd.v_variables[i]->p.t_t.ugs;
1552
1553
1554       gs->mean_diff = gs->sum_diff / gs->n ;
1555     }
1556 }
1557
1558
1559
1560 static int
1561 compare_var_name (const void *a_, const void *b_, void *v_ UNUSED)
1562 {
1563   const struct variable *a = a_;
1564   const struct variable *b = b_;
1565
1566   return strcmp(a->name,b->name);
1567 }
1568
1569 static unsigned
1570 hash_var_name (const void *a_, void *v_ UNUSED)
1571 {
1572   const struct variable *a = a_;
1573
1574   return hsh_hash_bytes (a->name, strlen(a->name));
1575 }
1576
1577
1578
1579 static void
1580 paired_precalc (void *aux UNUSED)
1581 {
1582   int i;
1583
1584   for(i=0; i < n_pairs ; ++i )
1585     {
1586       pairs[i].n = 0;
1587       pairs[i].sum[0] = 0;      pairs[i].sum[1] = 0;
1588       pairs[i].ssq[0] = 0;      pairs[i].ssq[1] = 0;
1589       pairs[i].sum_of_prod = 0;
1590       pairs[i].correlation = 0;
1591       pairs[i].sum_of_diffs = 0;
1592       pairs[i].ssq_diffs = 0;
1593     }
1594
1595 }
1596
1597
1598 static int
1599 paired_calc (struct ccase *c, void *aux UNUSED)
1600 {
1601   int i;
1602
1603   double weight = dict_get_case_weight(default_dict,c);
1604
1605   /* Skip the entire case if /MISSING=LISTWISE is set ,
1606    AND one member of a pair is missing */
1607   if ( cmd.miss == TTS_LISTWISE )
1608     {
1609       for(i=0; i < n_pairs ; ++i )
1610         {
1611           struct variable *v0 = pairs[i].v[0];
1612           struct variable *v1 = pairs[i].v[1];
1613
1614           union value *val0 = &c->data[v0->fv];
1615           union value *val1 = &c->data[v1->fv];
1616
1617           if ( value_is_missing(val0,v0) ||
1618                value_is_missing(val1,v1) )
1619             {
1620               return 0;
1621             }
1622         }
1623     }
1624
1625   for(i=0; i < n_pairs ; ++i )
1626     {
1627       struct variable *v0 = pairs[i].v[0];
1628       struct variable *v1 = pairs[i].v[1];
1629
1630       union value *val0 = &c->data[v0->fv];
1631       union value *val1 = &c->data[v1->fv];
1632
1633       if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) )
1634       {
1635         pairs[i].n += weight;
1636         pairs[i].sum[0] += weight * val0->f;
1637         pairs[i].sum[1] += weight * val1->f;
1638
1639         pairs[i].ssq[0] += weight * sqr(val0->f);
1640         pairs[i].ssq[1] += weight * sqr(val1->f);
1641
1642 #if 0
1643         pairs[i].correlation += weight *
1644           ( val0->f - pairs[i].v[0]->p.t_t.ugs.mean )
1645           *
1646           ( val1->f - pairs[i].v[1]->p.t_t.ugs.mean );
1647 #endif
1648
1649         pairs[i].sum_of_prod += weight * val0->f * val1->f ;
1650
1651
1652         pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ;
1653         pairs[i].ssq_diffs += weight * sqr(val0->f - val1->f);
1654       }
1655     }
1656
1657   return 0;
1658 }
1659
1660 static void
1661 paired_postcalc (void *aux UNUSED)
1662 {
1663   int i;
1664
1665   for(i=0; i < n_pairs ; ++i )
1666     {
1667       int j;
1668       const double n = pairs[i].n;
1669
1670       for (j=0; j < 2 ; ++j)
1671         {
1672           pairs[i].mean[j] = pairs[i].sum[j] / n ;
1673           pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n -
1674                                               sqr(pairs[i].mean[j]))
1675                                      );
1676
1677           pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n -
1678                                               sqr(pairs[i].mean[j]))
1679                                      );
1680         }
1681
1682       pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n -
1683         pairs[i].mean[0] * pairs[i].mean[1] ;
1684       /* correlation now actually contains the covariance */
1685
1686       pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1];
1687       pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 );
1688
1689 #if 0
1690       pairs[i].correlation /= pairs[i].v[0]->p.t_t.ugs.std_dev *
1691                               pairs[i].v[1]->p.t_t.ugs.std_dev ;
1692       pairs[i].correlation /= n - 1;
1693 #endif
1694
1695
1696       pairs[i].mean_diff = pairs[i].sum_of_diffs / n ;
1697
1698
1699       pairs[i].std_dev_diff = sqrt (  n / (n - 1) * (
1700                                     ( pairs[i].ssq_diffs / n )
1701                                     -
1702                                     sqr(pairs[i].mean_diff )
1703                                     ) );
1704     }
1705 }
1706
1707 /* Return the group # corresponding to the
1708    independent variable with the value val
1709 */
1710 static int
1711 get_group(const union value *val, struct variable *indep)
1712 {
1713   int i;
1714
1715   for (i = 0; i < 2  ; ++i )
1716     {
1717       const int cmp = compare_values(val,&groups_values[i],indep->width) ;
1718       switch ( criteria[i])
1719         {
1720         case CMP_EQ:
1721           if ( 0 == cmp )   return i;
1722           break;
1723         case CMP_LT:
1724           if ( 0 >  cmp )  return i;
1725           break;
1726         case CMP_LE:
1727           if ( cmp <= 0 )   return i;
1728           break;
1729         case CMP_GT:
1730           if ( cmp > 0 ) return i;
1731           break;
1732         case CMP_GE:
1733           if ( cmp >= 0 ) return i;
1734           break;
1735         default:
1736           assert(0);
1737         };
1738     }
1739
1740   /* No groups matched */
1741   return -1;
1742 }
1743
1744
1745 static void
1746 group_precalc (void *aux UNUSED)
1747 {
1748   int i;
1749   int j;
1750
1751   for(i=0; i< cmd.n_variables ; ++i)
1752     {
1753       struct t_test_proc *ttpr = &cmd.v_variables[i]->p.t_t;
1754
1755       /* There's always 2 groups for a T - TEST */
1756       ttpr->n_groups = 2;
1757       ttpr->gs = xmalloc(sizeof(struct group_statistics) * 2) ;
1758
1759       for (j=0 ; j < 2 ; ++j)
1760         {
1761           ttpr->gs[j].sum = 0;
1762           ttpr->gs[j].n = 0;
1763           ttpr->gs[j].ssq = 0;
1764
1765           if ( n_group_values == 2 )
1766             ttpr->gs[j].id = groups_values[j];
1767           else
1768             ttpr->gs[j].id = groups_values[0];
1769           ttpr->gs[j].criterion = criteria[j];
1770         }
1771     }
1772
1773 }
1774
1775 static int
1776 group_calc (struct ccase *c, void *aux UNUSED)
1777 {
1778   int i;
1779   int g;
1780   union value *gv = &c->data[indep_var->fv];
1781
1782   double weight = dict_get_case_weight(default_dict,c);
1783
1784   if ( value_is_missing(gv,indep_var) )
1785     {
1786       return 0;
1787     }
1788
1789   if ( cmd.miss == TTS_LISTWISE )
1790     {
1791       for(i=0; i< cmd.n_variables ; ++i)
1792         {
1793           struct variable *v = cmd.v_variables[i];
1794           union value *val = &c->data[v->fv];
1795
1796           if (value_is_missing(val,v) )
1797             {
1798               return 0;
1799             }
1800         }
1801     }
1802
1803
1804   gv = &c->data[indep_var->fv];
1805
1806   g = get_group(gv,indep_var);
1807
1808   /* If the independent variable doesn't match either of the values
1809      for this case then move on to the next case */
1810   if (g == -1 )
1811     return 0;
1812
1813   for(i=0; i< cmd.n_variables ; ++i)
1814     {
1815       struct variable *var = cmd.v_variables[i];
1816
1817       struct group_statistics *gs = &var->p.t_t.gs[g];
1818
1819       union value *val=&c->data[var->fv];
1820
1821       if ( !value_is_missing(val,var) )
1822         {
1823           gs->n+=weight;
1824           gs->sum+=weight * val->f;
1825           gs->ssq+=weight * sqr(val->f);
1826         }
1827     }
1828
1829   return 0;
1830 }
1831
1832
1833 static void
1834 group_postcalc (void *aux UNUSED)
1835 {
1836   int i;
1837   int j;
1838
1839   for(i=0; i< cmd.n_variables ; ++i)
1840     {
1841       for (j=0 ; j < 2 ; ++j)
1842         {
1843           struct group_statistics *gs;
1844           gs=&cmd.v_variables[i]->p.t_t.gs[j];
1845
1846           gs->mean = gs->sum / gs->n;
1847
1848           gs->s_std_dev= sqrt(
1849                          ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1850                          ) ;
1851
1852           gs->std_dev= sqrt(
1853                          gs->n/(gs->n-1) *
1854                          ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1855                          ) ;
1856
1857           gs->se_mean = gs->std_dev / sqrt(gs->n);
1858         }
1859     }
1860 }
1861