src/t-test.q

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by John Williams <johnr.williams@stonebow.otago.ac.nz>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <math.h>
  25 #include "alloc.h"
  26 #include "str.h"
  27 #include "dcdflib/cdflib.h"
  28 #include "command.h"
  29 #include "lexer.h"
  30 #include "error.h"
  31 #include "magic.h"
  32 #include "value-labels.h"
  33 #include "var.h"
  34 #include "vfm.h"
  35
  36 /* (specification)
  37    "T-TEST" (tts_):
  38      groups=custom;
  39      variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
  40      *+pairs=custom;
  41      +missing=miss:!analysis/listwise,
  42              incl:include/!exclude;
  43      +format=fmt:!labels/nolabels;
  44      +criteria=:ci(d:criteria,"%s > 0. && %s < 1.").
  45 */
  46 /* (declarations) */
  47 /* (functions) */
  48
  49 #include "debug-print.h"
  50
  51 /* Command parsing information. */
  52 static struct cmd_t_test cmd;
  53
  54 /* Variable for the GROUPS subcommand, if given. */
  55 static struct variable *groups;
  56
  57 /* GROUPS: Number of values specified by the user; the values
  58    specified if any. */
  59 static int n_groups_values;
  60 static union value groups_values[2];
  61
  62 /* PAIRED: Number of pairs; each pair. */
  63 static int n_pairs;
  64 static struct variable *(*pairs)[2];
  65
  66 /* Routines to scan data and perform t-tests */
  67 static void precalc (void);
  68 static void postcalc (void);
  69 static void g_postcalc (void);
  70 static void t_pairs (void);
  71 static void t_groups (void);
  72 static int groups_calc (struct ccase *);
  73 static int pairs_calc (struct ccase *);
  74 static int z_calc (struct ccase *);
  75
  76 struct value_list
  77   {
  78     double sum;
  79     double ss;
  80     double n;
  81     struct value_list *next;
  82   };
  83
  84 /* general workhorses - should  move these to a separate library... */
  85 double variance (double n, double ss, double sum);
  86
  87 double covariance (double x_sum, double x_n,
  88                    double y_sum, double y_n, double ss);
  89
  90 double pooled_variance (double n_1, double var_1,
  91                         double n_2, double var_2);
  92
  93 double oneway (double *f, double *p, struct value_list *list);
  94
  95 double pearson_r (double c_xy, double c_xx, double c_yy);
  96
  97 double f_sig (double f, double dfn, double dfd);
  98 double t_crt (double df, double q);
  99 double t_sig (double t, double df);
 100
 101 /* massive function simply to remove any responsibility for output
 102    from the function which does the actual t-test calculations */
 103 void print_t_groups (struct variable * grps, union value * g1, union value * g2,
 104                      double n1, double n2, double mean1, double mean2,
 105                      double sd1, double sd2, double se1, double se2,
 106                      double diff, double l_f, double l_p,
 107                      double p_t, double p_sig, double p_df, double p_sed,
 108                      double p_l, double p_h,
 109                      double s_t, double s_sig, double s_df, double s_sed,
 110                      double s_l, double s_h);
 111
 112 /* Global variables to communicate between calc() and postcalc()
 113    should move to a structure in the p union of variable... */
 114 static double v1_n, v1_ss, v1_sum, v1_se, v1_var, v1_mean;
 115 static double v2_n, v2_ss, v2_sum, v2_se, v2_var, v2_mean;
 116 static double v1_z_sum, v1_z_ss;
 117 static double v2_z_sum, v2_z_ss;
 118 static double diff, se_diff, sp, xy_sum, xy_diff, xy_ss;
 119 static int cur_var;
 120
 121 /* some defines for CDFlib */
 122 #define FIND_P 1
 123 #define FIND_CRITICAL_VALUE 2
 124 #define ERROR_SIG -1
 125
 126 #ifdef DEBUGGING
 127 static void debug_print (void);
 128 #endif
 129
 130 /* Parses and executes the T-TEST procedure. */
 131 int
 132 cmd_t_test (void)
 133 {
 134   struct cmd_t_test cmd;
 135
 136   if (!lex_force_match_id ("T"))
 137     return CMD_FAILURE;
 138   lex_match ('-');
 139   lex_match_id ("TEST");
 140
 141   if (!parse_t_test (&cmd))
 142     return CMD_FAILURE;
 143
 144 #if DEBUGGING
 145   debug_print ();
 146 #endif
 147
 148   if (n_pairs > 0)
 149     procedure (precalc, pairs_calc, postcalc);
 150   else
 151     /* probably groups then... */
 152     {
 153       printf ("\n\n  t-tests for independent samples of %s %s\n",
 154               groups->name, groups->label);
 155
 156       for (cur_var = 0; cur_var < cmd.n_variables; cur_var++)
 157         {
 158           v1_n = v1_ss = v1_sum = v1_se = v1_var = v1_mean = 0.0;
 159           v2_n = v2_ss = v2_sum = v2_se = v2_var = v2_mean = 0.0;
 160           v1_z_sum = v1_z_ss = v2_z_sum = v2_z_ss = 0.0;
 161           diff = se_diff = sp = xy_diff = xy_ss = xy_sum = 0.0;
 162
 163           procedure (precalc, groups_calc, g_postcalc);
 164           procedure (precalc, z_calc, postcalc);
 165         }
 166     }
 167
 168   return CMD_SUCCESS;
 169 }
 170
 171 void
 172 precalc (void)
 173 {
 174   return;                       /* rilly void... */
 175 }
 176
 177 int
 178 groups_calc (struct ccase * c)
 179 {
 180   int bad_weight;
 181   double group, w;
 182   struct variable *v = cmd.v_variables[cur_var];
 183   double X = c->data[v->fv].f;
 184
 185   /* Get the weight for this case. */
 186   if (default_dict.weight_index == -1)
 187     w = 1.0;
 188   else
 189     {
 190       w = c->data[default_dict.weight_index].f;
 191       if (w <= 0.0 || w == SYSMIS)
 192         {
 193           w = 0.0;
 194           bad_weight = 1;
 195           printf ("Bad weight\n");
 196         }
 197     }
 198
 199   if (X == SYSMIS || X == 0.0)  /* FIXME: should be USER_MISSING? */
 200     {
 201       /* printf("Missing value\n"); */
 202       return 1;
 203     }
 204   else
 205     {
 206       X = X * w;
 207       group = c->data[groups->fv].f;
 208
 209       if (group == groups_values[0].f)
 210         {
 211           v1_sum += X;
 212           v1_ss += X * X;
 213           v1_n += w;
 214         }
 215       else if (group == groups_values[1].f)
 216         {
 217           v2_sum += X;
 218           v2_ss += X * X;
 219           v2_n += w;
 220         }
 221     }
 222
 223   return 1;
 224 }
 225
 226 void
 227 g_postcalc (void)
 228 {
 229   v1_mean = v1_sum / v1_n;
 230   v2_mean = v2_sum / v2_n;
 231   return;
 232 }
 233
 234 int                             /* this pass generates the z-zcores */
 235 z_calc (struct ccase * c)
 236 {
 237   int bad_weight;
 238   double group, z, w;
 239   struct variable *v = cmd.v_variables[cur_var];
 240   double X = c->data[v->fv].f;
 241
 242   z = 0.0;
 243
 244   /* Get the weight for this case. */
 245   if (default_dict.weight_index == -1)
 246     w = 1.0;
 247   else
 248     {
 249       w = c->data[default_dict.weight_index].f;
 250       if (w <= 0.0 || w == SYSMIS)
 251         {
 252           w = 0.0;
 253           bad_weight = 1;
 254         }
 255     }
 256
 257   if (X == SYSMIS || X == 0.0)  /* FIXME: how to specify user missing? */
 258     {
 259       return 1;
 260     }
 261   else
 262     {
 263       group = c->data[groups->fv].f;
 264       X = w * X;
 265
 266       if (group == groups_values[0].f)
 267         {
 268           z = fabs (X - v1_mean);
 269           v1_z_sum += z;
 270           v1_z_ss += pow (z, 2);
 271         }
 272       else if (group == groups_values[1].f)
 273         {
 274           z = fabs (X - v2_mean);
 275           v2_z_ss += pow (z, 2);
 276           v2_z_sum += z;
 277         }
 278     }
 279
 280   return 1;
 281 }
 282
 283
 284 int
 285 pairs_calc (struct ccase * c)
 286 {
 287   int i;
 288   struct variable *v1, *v2;
 289   double X, Y;
 290
 291   for (i = 0; i < n_pairs; i++)
 292     {
 293
 294       v1 = pairs[i][0];
 295       v2 = pairs[i][1];
 296       X = c->data[v1->fv].f;
 297       Y = c->data[v2->fv].f;
 298
 299       if (X == SYSMIS || Y == SYSMIS)
 300         {
 301           printf ("Missing value\n");
 302         }
 303       else
 304         {
 305           xy_sum += X * Y;
 306           xy_diff += (X - Y);
 307           xy_ss += pow ((X - Y), 2);
 308           v1_sum += X;
 309           v2_sum += Y;
 310           v1_n++;
 311           v2_n++;
 312           v1_ss += (X * X);
 313           v2_ss += (Y * Y);
 314         }
 315     }
 316
 317   return 1;
 318 }
 319
 320 void
 321 postcalc (void)
 322 {
 323   /* Calculate basic statistics */
 324   v1_var = variance (v1_n, v1_ss, v1_sum);      /* variances */
 325   v2_var = variance (v2_n, v2_ss, v2_sum);
 326   v1_se = sqrt (v1_var / v1_n); /* standard errors */
 327   v2_se = sqrt (v2_var / v2_n);
 328   diff = v1_mean - v2_mean;
 329
 330   if (n_pairs > 0)
 331     {
 332       t_pairs ();
 333     }
 334   else
 335     {
 336       t_groups ();
 337     }
 338
 339   return;
 340 }
 341
 342 void
 343 t_groups (void)
 344 {
 345   double df_pooled, t_pooled, t_sep, p_pooled, p_sep;
 346   double crt_t_p, crt_t_s, tmp, v1_z, v2_z, f_levene, p_levene;
 347   double df_sep, se_diff_s, se_diff_p;
 348   struct value_list *val_1, *val_2;
 349
 350   /* Levene's test */
 351   val_1 = malloc (sizeof (struct value_list));
 352   val_1->sum = v1_z_sum;
 353   val_1->ss = v1_z_ss;
 354   val_1->n = v1_n;
 355   val_2 = malloc (sizeof (struct value_list));
 356   val_2->sum = v2_z_sum;
 357   val_2->ss = v2_z_ss;
 358   val_2->n = v2_n;
 359
 360   val_1->next = val_2;
 361   val_2->next = NULL;
 362
 363   f_levene = oneway (&f_levene, &p_levene, val_1);
 364
 365   /* T test results for pooled variances */
 366   se_diff_p = sqrt (pooled_variance (v1_n, v1_var, v2_n, v2_var));
 367   df_pooled = v1_n + v2_n - 2.0;
 368   t_pooled = diff / se_diff_p;
 369   p_pooled = t_sig (t_pooled, df_pooled);
 370   crt_t_p = t_crt (df_pooled, 0.025);
 371
 372   if ((2.0 * p_pooled) >= 1.0)
 373     p_pooled = 1.0 - p_pooled;
 374
 375   /* oh god, the separate variance calculations... */
 376   t_sep = diff / sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 377
 378   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 379   tmp = (v1_var / v1_n) / tmp;
 380   tmp = pow (tmp, 2);
 381   tmp = tmp / (v1_n - 1.0);
 382   v1_z = tmp;
 383
 384   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 385   tmp = (v2_var / v2_n) / tmp;
 386   tmp = pow (tmp, 2);
 387   tmp = tmp / (v2_n - 1.0);
 388   v2_z = tmp;
 389
 390   tmp = 1.0 / (v1_z + v2_z);
 391
 392   df_sep = tmp;
 393   p_sep = t_sig (t_sep, df_sep);
 394   if ((2.0 * p_sep) >= 1.0)
 395     p_sep = 1.0 - p_sep;
 396   crt_t_s = t_crt (df_sep, 0.025);
 397   se_diff_s = sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 398
 399   /* FIXME: convert to a proper PSPP output call */
 400   print_t_groups (groups, &groups_values[0], &groups_values[1],
 401                   v1_n, v2_n, v1_mean, v2_mean,
 402                   sqrt (v1_var), sqrt (v2_var), v1_se, v2_se,
 403                   diff, f_levene, p_levene,
 404                   t_pooled, 2.0 * p_pooled, df_pooled, se_diff_p,
 405                   diff - (crt_t_p * se_diff_p), diff + (crt_t_p * se_diff_p),
 406                   t_sep, 2.0 * p_sep, df_sep, se_diff_s,
 407                 diff - (crt_t_s * se_diff_s), diff + (crt_t_s * se_diff_s));
 408   return;
 409 }
 410
 411 void
 412 t_pairs (void)
 413 {
 414   double cov12, cov11, cov22, r, t, p, crt_t, sp, r_t, r_p;
 415   struct variable *v1, *v2;
 416
 417   v1 = pairs[0][0];
 418   v2 = pairs[0][1];
 419   cov12 = covariance (v1_sum, v1_n, v2_sum, v2_n, xy_sum);
 420   cov11 = covariance (v1_sum, v1_n, v1_sum, v1_n, v1_ss);
 421   cov22 = covariance (v2_sum, v2_n, v2_sum, v2_n, v2_ss);
 422   r = pearson_r (cov12, cov11, cov22);
 423   /* this t and it's associated p is a significance test for the pearson's r */
 424   r_t = r * sqrt ((v1_n - 2.0) / (1.0 - (r * r)));
 425   r_p = t_sig (r_t, v1_n - 2.0);
 426
 427   /* now we move to the t test for the difference in means */
 428   diff = xy_diff / v1_n;
 429   sp = sqrt (variance (v1_n, xy_ss, xy_diff));
 430   se_diff = sp / sqrt (v1_n);
 431   t = diff / se_diff;
 432   crt_t = t_crt (v1_n - 1.0, 0.025);
 433   p = t_sig (t, v1_n - 1.0);
 434
 435
 436   printf ("             Number of        2-tail\n");
 437   printf (" Variable      pairs    Corr   Sig      Mean    SD   SE of Mean\n");
 438   printf ("---------------------------------------------------------------\n");
 439   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 440           v1->name, v1_mean, sqrt (v1_var), v1_se);
 441   printf ("           %8.4f  %0.4f  %0.4f\n", v1_n, r, r_p);
 442   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 443           v2->name, v2_mean, sqrt (v2_var), v2_se);
 444   printf ("---------------------------------------------------------------\n");
 445
 446   printf ("\n\n\n");
 447   printf ("      Paired Differences              |\n");
 448   printf (" Mean          SD         SE of Mean  |  t-value   df   2-tail Sig\n");
 449   printf ("--------------------------------------|---------------------------\n");
 450
 451   printf ("%8.4f    %8.4f    %8.4f      | %8.4f %8.4f %8.4f\n",
 452           diff, sp, se_diff, t, v1_n - 1.0, 2.0 * (1.0 - p));
 453
 454   printf ("95pc CI (%8.4f, %8.4f)          |\n\n",
 455           diff - (se_diff * crt_t), diff + (se_diff * crt_t));
 456
 457   return;
 458 }
 459
 460 static int parse_value (union value *);
 461
 462 /* Parses the GROUPS subcommand. */
 463 int
 464 tts_custom_groups (struct cmd_t_test *cmd unused)
 465 {
 466   groups = parse_variable ();
 467   if (!groups)
 468     {
 469       lex_error (_("expecting variable name in GROUPS subcommand"));
 470       return 0;
 471     }
 472   if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING)
 473     {
 474       msg (SE, _("Long string variable %s is not valid here."),
 475            groups->name);
 476       return 0;
 477     }
 478
 479   if (!lex_match ('('))
 480     {
 481       if (groups->type == NUMERIC)
 482         {
 483           n_groups_values = 2;
 484           groups_values[0].f = 1;
 485           groups_values[1].f = 2;
 486           return 1;
 487         }
 488       else
 489         {
 490           msg (SE, _("When applying GROUPS to a string variable, at "
 491                      "least one value must be specified."));
 492           return 0;
 493         }
 494     }
 495
 496   if (!parse_value (&groups_values[0]))
 497     return 0;
 498   n_groups_values = 1;
 499
 500   lex_match (',');
 501
 502   if (lex_match (')'))
 503     return 1;
 504
 505   if (!parse_value (&groups_values[1]))
 506     return 0;
 507   n_groups_values = 2;
 508
 509   if (!lex_force_match (')'))
 510     return 0;
 511
 512   return 1;
 513 }
 514
 515 /* Parses the current token (numeric or string, depending on the
 516    variable in `groups') into value V and returns success. */
 517 static int
 518 parse_value (union value * v)
 519 {
 520   if (groups->type == NUMERIC)
 521     {
 522       if (!lex_force_num ())
 523         return 0;
 524       v->f = tokval;
 525     }
 526   else
 527     {
 528       if (!lex_force_string ())
 529         return 0;
 530       strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr));
 531     }
 532
 533   lex_get ();
 534
 535   return 1;
 536 }
 537
 538 /* Parses the PAIRS subcommand. */
 539 static int
 540 tts_custom_pairs (struct cmd_t_test *cmd unused)
 541 {
 542   struct variable **vars;
 543   int n_before_WITH;
 544   int n_vars;
 545   int paired;
 546   int extra;
 547 #if DEBUGGING
 548   int n_predicted;
 549 #endif
 550
 551   if ((token != T_ID || !is_varname (tokid)) && token != T_ALL)
 552     return 2;
 553   if (!parse_variables (&default_dict, &vars, &n_vars,
 554                         PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
 555     return 0;
 556
 557   assert (n_vars);
 558   if (lex_match (T_WITH))
 559     {
 560       n_before_WITH = n_vars;
 561
 562       if (!parse_variables (&default_dict, &vars, &n_vars,
 563                             PV_DUPLICATE | PV_APPEND
 564                             | PV_NUMERIC | PV_NO_SCRATCH))
 565         {
 566           free (vars);
 567           return 0;
 568         }
 569     }
 570   else
 571     n_before_WITH = 0;
 572
 573   paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')'));
 574
 575   if (paired)
 576     {
 577       if (n_before_WITH * 2 != n_vars)
 578         {
 579           free (vars);
 580           msg (SE, _("PAIRED was specified but the number of variables "
 581                      "preceding WITH (%d) did not match the number "
 582                      "following (%d)."),
 583                n_before_WITH, n_vars - n_before_WITH);
 584           return 0;
 585         }
 586
 587       extra = n_before_WITH;
 588     }
 589   else if (n_before_WITH)
 590     extra = n_before_WITH * (n_vars - n_before_WITH);
 591   else
 592     {
 593       if (n_vars < 2)
 594         {
 595           free (vars);
 596           msg (SE, _("At least two variables must be specified "
 597                      "on PAIRS."));
 598           return 0;
 599         }
 600
 601       extra = n_vars * (n_vars - 1) / 2;
 602     }
 603
 604 #if DEBUGGING
 605   n_predicted = n_pairs + extra;
 606 #endif
 607
 608   pairs = xrealloc (pairs, sizeof (struct variable *[2]) * (n_pairs + extra));
 609
 610   if (paired)
 611     {
 612       int i;
 613
 614       for (i = 0; i < extra; i++)
 615         {
 616           pairs[n_pairs][0] = vars[i];
 617           pairs[n_pairs++][1] = vars[i + extra];
 618         }
 619     }
 620   else if (n_before_WITH)
 621     {
 622       int i;
 623
 624       for (i = 0; i < n_before_WITH; i++)
 625         {
 626           int j;
 627
 628           for (j = n_before_WITH; j < n_vars; j++)
 629             {
 630               pairs[n_pairs][0] = vars[i];
 631               pairs[n_pairs++][1] = vars[j];
 632             }
 633         }
 634     }
 635   else
 636     {
 637       int i;
 638
 639       for (i = 0; i < n_vars; i++)
 640         {
 641           int j;
 642
 643           for (j = i + 1; j < n_vars; j++)
 644             {
 645               pairs[n_pairs][0] = vars[i];
 646               pairs[n_pairs++][1] = vars[j];
 647             }
 648         }
 649     }
 650
 651 #if DEBUGGING
 652   assert (n_pairs == n_predicted);
 653 #endif
 654
 655   free (vars);
 656   return 1;
 657 }
 658
 659 #if DEBUGGING
 660 static void
 661 debug_print (void)
 662 {
 663   printf ("T-TEST\n");
 664   if (groups)
 665     {
 666       printf ("  GROUPS=%s", groups->name);
 667       if (n_groups_values)
 668         {
 669           int i;
 670
 671           printf (" (");
 672           for (i = 0; i < n_groups_values; i++)
 673             if (groups->type == NUMERIC)
 674               printf ("%g%s", groups_values[i].f, i ? " " : "");
 675             else
 676               printf ("%.*s%s", groups->width, groups_values[i].s,
 677                       i ? " " : "");
 678           printf (")");
 679         }
 680       printf ("\n");
 681     }
 682   if (cmd.n_variables)
 683     {
 684       int i;
 685
 686       printf ("  VARIABLES=");
 687       for (i = 0; i < cmd.n_variables; i++)
 688         printf ("%s ", cmd.v_variables[i]->name);
 689       printf ("\n");
 690     }
 691   if (cmd.sbc_pairs)
 692     {
 693       int i;
 694
 695       printf ("  PAIRS=");
 696       for (i = 0; i < n_pairs; i++)
 697         printf ("%s ", pairs[i][0]->name);
 698       printf ("WITH");
 699       for (i = 0; i < n_pairs; i++)
 700         printf (" %s", pairs[i][1]->name);
 701       printf (" (PAIRED)\n");
 702     }
 703   printf ("  MISSING=%s %s\n",
 704           cmd.miss == TTS_ANALYSIS ? "ANALYSIS" : "LISTWISE",
 705           cmd.miss == TTS_INCLUDE ? "INCLUDE" : "EXCLUDE");
 706   printf ("  FORMAT=%s\n",
 707           cmd.fmt == TTS_LABELS ? "LABELS" : "NOLABELS");
 708   if (cmd.criteria != NOT_LONG)
 709     printf ("  CRITERIA=%f\n", cmd.criteria);
 710 }
 711
 712 #endif /* DEBUGGING */
 713
 714 /* Here are some general routines tha should probably be moved into
 715    a separate library and documented as part of the PSPP "API"   */
 716 double
 717 variance (double n, double ss, double sum)
 718 {
 719   return ((ss - ((sum * sum) / n)) / (n - 1.0));
 720 }
 721
 722 double
 723 pooled_variance (double n_1, double var_1, double n_2, double var_2)
 724 {
 725   double tmp;
 726
 727   tmp = n_1 + n_2 - 2.0;
 728   tmp = (((n_1 - 1.0) * var_1) + ((n_2 - 1.0) * var_2)) / tmp;
 729   tmp = tmp * ((n_1 + n_2) / (n_1 * n_2));
 730   return tmp;
 731 }
 732
 733 double
 734 oneway (double *f, double *p, struct value_list *levels)
 735 {
 736   double k, SSTR, SSE, SSTO, N, MSTR, MSE, sum, dftr, dfe, print;
 737   struct value_list *g;
 738
 739   k = 0.0;
 740
 741   for (g = levels; g != NULL; g = g->next)
 742     {
 743       k++;
 744       sum += g->sum;
 745       N += g->n;
 746       SSTR += g->ss - (pow (g->sum, 2) / g->n);
 747       SSTO += g->ss;
 748     }
 749
 750   SSTO = SSTO - (pow (sum, 2) / N);
 751   SSE = SSTO - SSTR;
 752
 753   dftr = N - k;
 754   dfe = k - 1.0;
 755   MSTR = SSTR / dftr;
 756   MSE = SSE / dfe;
 757
 758   *f = (MSE / MSTR);
 759   *p = f_sig (*f, dfe, dftr);
 760
 761   print = 1.0;
 762   if (print == 1.0)
 763     {
 764       printf ("sum1 %f, sum2 %f, ss1 %f, ss2 %f\n",
 765               levels->sum, levels->next->sum, levels->ss, levels->next->ss);
 766       printf ("                - - - - - - O N E W A Y - - - - - -\n\n");
 767       printf ("   Variable %s %s\n",
 768               cmd.v_variables[0]->name, cmd.v_variables[0]->label);
 769       printf ("By Variable %s %s\n", groups->name, groups->label);
 770       printf ("\n             Analysis of Variance\n\n");
 771       printf ("                    Sum of    Mean     F       F\n");
 772       printf ("Source       D.F.  Squares  Squares  Ratio   Prob\n\n");
 773       printf ("Between   %8.0f %8.4f %8.4f %8.4f %8.4f\n",
 774               dfe, SSE, MSE, *f, *p);
 775       printf ("Within    %8.0f %8.4f %8.4f\n", dftr, SSTR, MSTR);
 776       printf ("Total     %8.0f %8.4f\n\n\n", N - 1.0, SSTO);
 777     }
 778   return (*f);
 779 }
 780
 781 double
 782 f_sig (double f, double dfn, double dfd)
 783 {
 784   int which, status;
 785   double p, q, bound;
 786
 787   which = FIND_P;
 788   status = 1;
 789   p = q = bound = 0.0;
 790   cdff (&which, &p, &q, &f, &dfn, &dfd, &status, &bound);
 791
 792   switch (status)
 793     {
 794     case -1:
 795       {
 796         printf ("Parameter 1 is out of range\n");
 797         break;
 798       }
 799     case -2:
 800       {
 801         printf ("Parameter 2 is out of range\n");
 802         break;
 803       }
 804     case -3:
 805       {
 806         printf ("Parameter 3 is out of range\n");
 807         break;
 808       }
 809     case -4:
 810       {
 811         printf ("Parameter 4 is out of range\n");
 812         break;
 813       }
 814     case -5:
 815       {
 816         printf ("Parameter 5 is out of range\n");
 817         break;
 818       }
 819     case -6:
 820       {
 821         printf ("Parameter 6 is out of range\n");
 822         break;
 823       }
 824     case -7:
 825       {
 826         printf ("Parameter 7 is out of range\n");
 827         break;
 828       }
 829     case -8:
 830       {
 831         printf ("Parameter 8 is out of range\n");
 832         break;
 833       }
 834     case 0:
 835       {
 836         /* printf( "Command completed successfully\n" ); */
 837         break;
 838       }
 839     case 1:
 840       {
 841         printf ("Answer appears to be lower than the lowest search bound\n");
 842         break;
 843       }
 844     case 2:
 845       {
 846         printf ("Answer appears to be higher than the greatest search bound\n");
 847         break;
 848       }
 849     case 3:
 850       {
 851         printf ("P - Q NE 1\n");
 852         break;
 853       }
 854     }
 855
 856   if (status)
 857     {
 858       return (double) ERROR_SIG;
 859     }
 860   else
 861     {
 862       return q;
 863     }
 864 }
 865
 866 double
 867 t_crt (double df, double q)
 868 {
 869   int which, status;
 870   double p, bound, t;
 871
 872   which = FIND_CRITICAL_VALUE;
 873   bound = 0.0;
 874   p = 1.0 - q;
 875   t = 0.0;
 876
 877   cdft (&which, &p, &q, &t, &df, &status, &bound);
 878
 879   switch (status)
 880     {
 881     case -1:
 882       {
 883         printf ("t_crt: Parameter 1 is out of range\n");
 884         break;
 885       }
 886     case -2:
 887       {
 888         printf ("t_crt: value of p (%f) is out of range\n", p);
 889         break;
 890       }
 891     case -3:
 892       {
 893         printf ("t_crt: value of q (%f) is out of range\n", q);
 894         break;
 895       }
 896     case -4:
 897       {
 898         printf ("t_crt: value of df (%f) is out of range\n", df);
 899         break;
 900       }
 901     case -5:
 902       {
 903         printf ("t_crt: Parameter 5 is out of range\n");
 904         break;
 905       }
 906     case -6:
 907       {
 908         printf ("t_crt: Parameter 6 is out of range\n");
 909         break;
 910       }
 911     case -7:
 912       {
 913         printf ("t_crt: Parameter 7 is out of range\n");
 914         break;
 915       }
 916     case 0:
 917       {
 918         /* printf( "Command completed successfully\n" ); */
 919         break;
 920       }
 921     case 1:
 922       {
 923         printf ("t_crt: Answer appears to be lower than the lowest search bound\n");
 924         break;
 925       }
 926     case 2:
 927       {
 928         printf ("t_crt: Answer appears to be higher than the greatest search bound\n");
 929         break;
 930       }
 931     case 3:
 932       {
 933         printf ("t_crt: P - Q NE 1\n");
 934         break;
 935       }
 936     }
 937
 938   if (status)
 939     {
 940       return (double) ERROR_SIG;
 941     }
 942   else
 943     {
 944       return t;
 945     }
 946 }
 947
 948 double
 949 t_sig (double t, double df)
 950 {
 951   int which, status;
 952   double p, q, bound;
 953
 954   which = FIND_P;
 955   q = 0.0;
 956   p = 0.0;
 957   bound = 0.0;
 958
 959   cdft (&which, &p, &q, &t, &df, &status, &bound);
 960
 961   switch (status)
 962     {
 963     case -1:
 964       {
 965         printf ("t-sig: Parameter 1 is out of range\n");
 966         break;
 967       }
 968     case -2:
 969       {
 970         printf ("t-sig: Parameter 2 is out of range\n");
 971         break;
 972       }
 973     case -3:
 974       {
 975         printf ("t-sig: Parameter 3 is out of range\n");
 976         break;
 977       }
 978     case -4:
 979       {
 980         printf ("t-sig: Parameter 4 is out of range\n");
 981         break;
 982       }
 983     case -5:
 984       {
 985         printf ("t-sig: Parameter 5 is out of range\n");
 986         break;
 987       }
 988     case -6:
 989       {
 990         printf ("t-sig: Parameter 6 is out of range\n");
 991         break;
 992       }
 993     case -7:
 994       {
 995         printf ("t-sig: Parameter 7 is out of range\n");
 996         break;
 997       }
 998     case 0:
 999       {
1000         /* printf( "Command completed successfully\n" ); */
1001         break;
1002       }
1003     case 1:
1004       {
1005         printf ("t-sig: Answer appears to be lower than the lowest search bound\n");
1006         break;
1007       }
1008     case 2:
1009       {
1010         printf ("t-sig: Answer appears to be higher than the greatest search bound\n");
1011         break;
1012       }
1013     case 3:
1014       {
1015         printf ("t-sig: P - Q NE 1\n");
1016         break;
1017       }
1018     }
1019
1020   if (status)
1021     {
1022       return (double) ERROR_SIG;
1023     }
1024   else
1025     {
1026       return q;
1027     }
1028 }
1029
1030 double
1031 covariance (double x_sum, double x_n, double y_sum, double y_n, double ss)
1032 {
1033   double tmp;
1034
1035   tmp = x_sum * y_sum;
1036   tmp = tmp / x_n;
1037   tmp = ss - tmp;
1038   tmp = (tmp / (x_n + y_n - 1.0));
1039   return tmp;
1040 }
1041
1042 double
1043 pearson_r (double c_xy, double c_xx, double c_yy)
1044 {
1045   return (c_xy / (sqrt (c_xx * c_yy)));
1046 }
1047
1048 void
1049 print_t_groups (struct variable * grps, union value * g1, union value * g2,
1050                 double n1, double n2, double mean1, double mean2,
1051                 double sd1, double sd2, double se1, double se2,
1052                 double diff, double l_f, double l_p,
1053                 double p_t, double p_sig, double p_df, double p_sed,
1054                 double p_l, double p_h,
1055                 double s_t, double s_sig, double s_df, double s_sed,
1056                 double s_l, double s_h)
1057 {
1058
1059   /* Display all this shit as SPSS 6.0 does (roughly) */
1060   printf ("\n\n                 Number                                 \n");
1061   printf ("   Variable     of Cases    Mean      SD      SE of Mean\n");
1062   printf ("-----------------------------------------------------------\n");
1063   printf ("   %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label);
1064   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1065           val_labs_find (grps->val_labs, *g1), g1->f, n1, mean1, sd1, se1);
1066   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1067           val_labs_find (grps->val_labs, *g2), g2->f, n2, mean2, sd2, se2);
1068   printf ("-----------------------------------------------------------\n");
1069   printf ("\n   Mean Difference = %8.4f\n", diff);
1070   printf ("\n   Levene's Test for Equality of Variances: F= %.3f  P= %.3f\n",
1071           l_f, l_p);
1072   printf ("\n\n   t-test for Equality of Means                         95pc     \n");
1073   printf ("Variances   t-value    df   2-Tail Sig SE of Diff    CI for Diff  \n");
1074   printf ("-----------------------------------------------------------------\n");
1075   printf ("Equal     %8.2f %8.0f %8.3f %8.3f (%8.3f, %8.3f)\n",
1076           p_t, p_df, p_sig, p_sed, p_l, p_h);
1077   printf ("Unequal   %8.2f %8.2f %8.3f %8.3f (%8.3f, %8.3f)\n",
1078           s_t, s_df, s_sig, s_sed, s_l, s_h);
1079   printf ("-----------------------------------------------------------------\n");
1080 }
1081
1082 /*
1083    Local Variables:
1084    mode: c
1085    End:
1086 */