src/t-test.q

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <math.h>
  25 #include "alloc.h"
  26 #include "str.h"
  27 #include "dcdflib/cdflib.h"
  28 #include "command.h"
  29 #include "lexer.h"
  30 #include "error.h"
  31 #include "magic.h"
  32 #include "var.h"
  33 #include "vfm.h"
  34
  35 /* (specification)
  36    "T-TEST" (tts_):
  37      groups=custom;
  38      variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
  39      *+pairs=custom;
  40      +missing=miss:!analysis/listwise,
  41              incl:include/!exclude;
  42      +format=fmt:!labels/nolabels;
  43      +criteria=:ci(d:criteria,"%s > 0. && %s < 1.").
  44 */
  45 /* (declarations) */
  46 /* (functions) */
  47
  48 #undef DEBUGGING
  49 #define DEBUGGING 1
  50 #include "debug-print.h"
  51
  52 /* Command parsing information. */
  53 static struct cmd_t_test cmd;
  54
  55 /* Variable for the GROUPS subcommand, if given. */
  56 static struct variable *groups;
  57
  58 /* GROUPS: Number of values specified by the user; the values
  59    specified if any. */
  60 static int n_groups_values;
  61 static union value groups_values[2];
  62
  63 /* PAIRED: Number of pairs; each pair. */
  64 static int n_pairs;
  65 static struct variable *(*pairs)[2];
  66
  67 /* Routines to scan data and perform t-tests */
  68 static void precalc (void);
  69 static void postcalc (void);
  70 static void g_postcalc (void);
  71 static void t_pairs (void);
  72 static void t_groups (void);
  73 static int groups_calc (struct ccase *);
  74 static int pairs_calc (struct ccase *);
  75 static int z_calc (struct ccase *);
  76
  77 struct value_list
  78   {
  79     double sum;
  80     double ss;
  81     double n;
  82     struct value_list *next;
  83   };
  84
  85 /* general workhorses - should  move these to a separate library... */
  86 double variance (double n, double ss, double sum);
  87
  88 double covariance (double x_sum, double x_n,
  89                    double y_sum, double y_n, double ss);
  90
  91 double pooled_variance (double n_1, double var_1,
  92                         double n_2, double var_2);
  93
  94 double oneway (double *f, double *p, struct value_list *list);
  95
  96 double pearson_r (double c_xy, double c_xx, double c_yy);
  97
  98 double f_sig (double f, double dfn, double dfd);
  99 double t_crt (double df, double q);
 100 double t_sig (double t, double df);
 101
 102 /* massive function simply to remove any responsibility for output
 103    from the function which does the actual t-test calculations */
 104 void print_t_groups (struct variable * grps, union value * g1, union value * g2,
 105                      double n1, double n2, double mean1, double mean2,
 106                      double sd1, double sd2, double se1, double se2,
 107                      double diff, double l_f, double l_p,
 108                      double p_t, double p_sig, double p_df, double p_sed,
 109                      double p_l, double p_h,
 110                      double s_t, double s_sig, double s_df, double s_sed,
 111                      double s_l, double s_h);
 112
 113 /* Global variables to communicate between calc() and postcalc()
 114    should move to a structure in the p union of variable... */
 115 static double v1_n, v1_ss, v1_sum, v1_se, v1_var, v1_mean;
 116 static double v2_n, v2_ss, v2_sum, v2_se, v2_var, v2_mean;
 117 static double v1_z_sum, v1_z_ss;
 118 static double v2_z_sum, v2_z_ss;
 119 static double diff, se_diff, sp, xy_sum, xy_diff, xy_ss;
 120 static int cur_var;
 121
 122 /* some defines for CDFlib */
 123 #define FIND_P 1
 124 #define FIND_CRITICAL_VALUE 2
 125 #define ERROR_SIG -1
 126
 127 #ifdef DEBUGGING
 128 static void debug_print (void);
 129 #endif
 130
 131 /* Parses and executes the T-TEST procedure. */
 132 int
 133 cmd_t_test (void)
 134 {
 135   struct cmd_t_test cmd;
 136
 137   if (!lex_force_match_id ("T"))
 138     return CMD_FAILURE;
 139   lex_match ('-');
 140   lex_match_id ("TEST");
 141
 142   if (!parse_t_test (&cmd))
 143     return CMD_FAILURE;
 144
 145 #if DEBUGGING
 146   debug_print ();
 147 #endif
 148
 149   if (n_pairs > 0)
 150     procedure (precalc, pairs_calc, postcalc);
 151   else
 152     /* probably groups then... */
 153     {
 154       printf ("\n\n  t-tests for independent samples of %s %s\n",
 155               groups->name, groups->label);
 156
 157       for (cur_var = 0; cur_var < cmd.n_variables; cur_var++)
 158         {
 159           v1_n = v1_ss = v1_sum = v1_se = v1_var = v1_mean = 0.0;
 160           v2_n = v2_ss = v2_sum = v2_se = v2_var = v2_mean = 0.0;
 161           v1_z_sum = v1_z_ss = v2_z_sum = v2_z_ss = 0.0;
 162           diff = se_diff = sp = xy_diff = xy_ss = xy_sum = 0.0;
 163
 164           procedure (precalc, groups_calc, g_postcalc);
 165           procedure (precalc, z_calc, postcalc);
 166         }
 167     }
 168
 169   return CMD_SUCCESS;
 170 }
 171
 172 void
 173 precalc (void)
 174 {
 175   return;                       /* rilly void... */
 176 }
 177
 178 int
 179 groups_calc (struct ccase * c)
 180 {
 181   int bad_weight;
 182   double group, w;
 183   struct variable *v = cmd.v_variables[cur_var];
 184   double X = c->data[v->fv].f;
 185
 186   /* Get the weight for this case. */
 187   if (default_dict.weight_index == -1)
 188     w = 1.0;
 189   else
 190     {
 191       w = c->data[default_dict.weight_index].f;
 192       if (w <= 0.0 || w == SYSMIS)
 193         {
 194           w = 0.0;
 195           bad_weight = 1;
 196           printf ("Bad weight\n");
 197         }
 198     }
 199
 200   if (X == SYSMIS || X == 0.0)  /* FIXME: should be USER_MISSING? */
 201     {
 202       /* printf("Missing value\n"); */
 203       return 1;
 204     }
 205   else
 206     {
 207       X = X * w;
 208       group = c->data[groups->fv].f;
 209
 210       if (group == groups_values[0].f)
 211         {
 212           v1_sum += X;
 213           v1_ss += X * X;
 214           v1_n += w;
 215         }
 216       else if (group == groups_values[1].f)
 217         {
 218           v2_sum += X;
 219           v2_ss += X * X;
 220           v2_n += w;
 221         }
 222     }
 223
 224   return 1;
 225 }
 226
 227 void
 228 g_postcalc (void)
 229 {
 230   v1_mean = v1_sum / v1_n;
 231   v2_mean = v2_sum / v2_n;
 232   return;
 233 }
 234
 235 int                             /* this pass generates the z-zcores */
 236 z_calc (struct ccase * c)
 237 {
 238   int bad_weight;
 239   double group, z, w;
 240   struct variable *v = cmd.v_variables[cur_var];
 241   double X = c->data[v->fv].f;
 242
 243   z = 0.0;
 244
 245   /* Get the weight for this case. */
 246   if (default_dict.weight_index == -1)
 247     w = 1.0;
 248   else
 249     {
 250       w = c->data[default_dict.weight_index].f;
 251       if (w <= 0.0 || w == SYSMIS)
 252         {
 253           w = 0.0;
 254           bad_weight = 1;
 255         }
 256     }
 257
 258   if (X == SYSMIS || X == 0.0)  /* FIXME: how to specify user missing? */
 259     {
 260       return 1;
 261     }
 262   else
 263     {
 264       group = c->data[groups->fv].f;
 265       X = w * X;
 266
 267       if (group == groups_values[0].f)
 268         {
 269           z = fabs (X - v1_mean);
 270           v1_z_sum += z;
 271           v1_z_ss += pow (z, 2);
 272         }
 273       else if (group == groups_values[1].f)
 274         {
 275           z = fabs (X - v2_mean);
 276           v2_z_ss += pow (z, 2);
 277           v2_z_sum += z;
 278         }
 279     }
 280
 281   return 1;
 282 }
 283
 284
 285 int
 286 pairs_calc (struct ccase * c)
 287 {
 288   int i;
 289   struct variable *v1, *v2;
 290   double X, Y;
 291
 292   for (i = 0; i < n_pairs; i++)
 293     {
 294
 295       v1 = pairs[i][0];
 296       v2 = pairs[i][1];
 297       X = c->data[v1->fv].f;
 298       Y = c->data[v2->fv].f;
 299
 300       if (X == SYSMIS || Y == SYSMIS)
 301         {
 302           printf ("Missing value\n");
 303         }
 304       else
 305         {
 306           xy_sum += X * Y;
 307           xy_diff += (X - Y);
 308           xy_ss += pow ((X - Y), 2);
 309           v1_sum += X;
 310           v2_sum += Y;
 311           v1_n++;
 312           v2_n++;
 313           v1_ss += (X * X);
 314           v2_ss += (Y * Y);
 315         }
 316     }
 317
 318   return 1;
 319 }
 320
 321 void
 322 postcalc (void)
 323 {
 324   /* Calculate basic statistics */
 325   v1_var = variance (v1_n, v1_ss, v1_sum);      /* variances */
 326   v2_var = variance (v2_n, v2_ss, v2_sum);
 327   v1_se = sqrt (v1_var / v1_n); /* standard errors */
 328   v2_se = sqrt (v2_var / v2_n);
 329   diff = v1_mean - v2_mean;
 330
 331   if (n_pairs > 0)
 332     {
 333       t_pairs ();
 334     }
 335   else
 336     {
 337       t_groups ();
 338     }
 339
 340   return;
 341 }
 342
 343 void
 344 t_groups (void)
 345 {
 346   double df_pooled, t_pooled, t_sep, p_pooled, p_sep;
 347   double crt_t_p, crt_t_s, tmp, v1_z, v2_z, f_levene, p_levene;
 348   double df_sep, se_diff_s, se_diff_p;
 349   struct value_list *val_1, *val_2;
 350
 351   /* Levene's test */
 352   val_1 = malloc (sizeof (struct value_list));
 353   val_1->sum = v1_z_sum;
 354   val_1->ss = v1_z_ss;
 355   val_1->n = v1_n;
 356   val_2 = malloc (sizeof (struct value_list));
 357   val_2->sum = v2_z_sum;
 358   val_2->ss = v2_z_ss;
 359   val_2->n = v2_n;
 360
 361   val_1->next = val_2;
 362   val_2->next = NULL;
 363
 364   f_levene = oneway (&f_levene, &p_levene, val_1);
 365
 366   /* T test results for pooled variances */
 367   se_diff_p = sqrt (pooled_variance (v1_n, v1_var, v2_n, v2_var));
 368   df_pooled = v1_n + v2_n - 2.0;
 369   t_pooled = diff / se_diff_p;
 370   p_pooled = t_sig (t_pooled, df_pooled);
 371   crt_t_p = t_crt (df_pooled, 0.025);
 372
 373   if ((2.0 * p_pooled) >= 1.0)
 374     p_pooled = 1.0 - p_pooled;
 375
 376   /* oh god, the separate variance calculations... */
 377   t_sep = diff / sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 378
 379   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 380   tmp = (v1_var / v1_n) / tmp;
 381   tmp = pow (tmp, 2);
 382   tmp = tmp / (v1_n - 1.0);
 383   v1_z = tmp;
 384
 385   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 386   tmp = (v2_var / v2_n) / tmp;
 387   tmp = pow (tmp, 2);
 388   tmp = tmp / (v2_n - 1.0);
 389   v2_z = tmp;
 390
 391   tmp = 1.0 / (v1_z + v2_z);
 392
 393   df_sep = tmp;
 394   p_sep = t_sig (t_sep, df_sep);
 395   if ((2.0 * p_sep) >= 1.0)
 396     p_sep = 1.0 - p_sep;
 397   crt_t_s = t_crt (df_sep, 0.025);
 398   se_diff_s = sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 399
 400   /* FIXME: convert to a proper PSPP output call */
 401   print_t_groups (groups, &groups_values[0], &groups_values[1],
 402                   v1_n, v2_n, v1_mean, v2_mean,
 403                   sqrt (v1_var), sqrt (v2_var), v1_se, v2_se,
 404                   diff, f_levene, p_levene,
 405                   t_pooled, 2.0 * p_pooled, df_pooled, se_diff_p,
 406                   diff - (crt_t_p * se_diff_p), diff + (crt_t_p * se_diff_p),
 407                   t_sep, 2.0 * p_sep, df_sep, se_diff_s,
 408                 diff - (crt_t_s * se_diff_s), diff + (crt_t_s * se_diff_s));
 409   return;
 410 }
 411
 412 void
 413 t_pairs (void)
 414 {
 415   double cov12, cov11, cov22, r, t, p, crt_t, sp, r_t, r_p;
 416   struct variable *v1, *v2;
 417
 418   v1 = pairs[0][0];
 419   v2 = pairs[0][1];
 420   cov12 = covariance (v1_sum, v1_n, v2_sum, v2_n, xy_sum);
 421   cov11 = covariance (v1_sum, v1_n, v1_sum, v1_n, v1_ss);
 422   cov22 = covariance (v2_sum, v2_n, v2_sum, v2_n, v2_ss);
 423   r = pearson_r (cov12, cov11, cov22);
 424   /* this t and it's associated p is a significance test for the pearson's r */
 425   r_t = r * sqrt ((v1_n - 2.0) / (1.0 - (r * r)));
 426   r_p = t_sig (r_t, v1_n - 2.0);
 427
 428   /* now we move to the t test for the difference in means */
 429   diff = xy_diff / v1_n;
 430   sp = sqrt (variance (v1_n, xy_ss, xy_diff));
 431   se_diff = sp / sqrt (v1_n);
 432   t = diff / se_diff;
 433   crt_t = t_crt (v1_n - 1.0, 0.025);
 434   p = t_sig (t, v1_n - 1.0);
 435
 436
 437   printf ("             Number of        2-tail\n");
 438   printf (" Variable      pairs    Corr   Sig      Mean    SD   SE of Mean\n");
 439   printf ("---------------------------------------------------------------\n");
 440   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 441           v1->name, v1_mean, sqrt (v1_var), v1_se);
 442   printf ("           %8.4f  %0.4f  %0.4f\n", v1_n, r, r_p);
 443   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 444           v2->name, v2_mean, sqrt (v2_var), v2_se);
 445   printf ("---------------------------------------------------------------\n");
 446
 447   printf ("\n\n\n");
 448   printf ("      Paired Differences              |\n");
 449   printf (" Mean          SD         SE of Mean  |  t-value   df   2-tail Sig\n");
 450   printf ("--------------------------------------|---------------------------\n");
 451
 452   printf ("%8.4f    %8.4f    %8.4f      | %8.4f %8.4f %8.4f\n",
 453           diff, sp, se_diff, t, v1_n - 1.0, 2.0 * (1.0 - p));
 454
 455   printf ("95pc CI (%8.4f, %8.4f)          |\n\n",
 456           diff - (se_diff * crt_t), diff + (se_diff * crt_t));
 457
 458   return;
 459 }
 460
 461 static int parse_value (union value *);
 462
 463 /* Parses the GROUPS subcommand. */
 464 int
 465 tts_custom_groups (struct cmd_t_test *cmd unused)
 466 {
 467   groups = parse_variable ();
 468   if (!groups)
 469     {
 470       lex_error (_("expecting variable name in GROUPS subcommand"));
 471       return 0;
 472     }
 473   if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING)
 474     {
 475       msg (SE, _("Long string variable %s is not valid here."),
 476            groups->name);
 477       return 0;
 478     }
 479
 480   if (!lex_match ('('))
 481     {
 482       if (groups->type == NUMERIC)
 483         {
 484           n_groups_values = 2;
 485           groups_values[0].f = 1;
 486           groups_values[1].f = 2;
 487           return 1;
 488         }
 489       else
 490         {
 491           msg (SE, _("When applying GROUPS to a string variable, at "
 492                      "least one value must be specified."));
 493           return 0;
 494         }
 495     }
 496
 497   if (!parse_value (&groups_values[0]))
 498     return 0;
 499   n_groups_values = 1;
 500
 501   lex_match (',');
 502
 503   if (lex_match (')'))
 504     return 1;
 505
 506   if (!parse_value (&groups_values[1]))
 507     return 0;
 508   n_groups_values = 2;
 509
 510   if (!lex_force_match (')'))
 511     return 0;
 512
 513   return 1;
 514 }
 515
 516 /* Parses the current token (numeric or string, depending on the
 517    variable in `groups') into value V and returns success. */
 518 static int
 519 parse_value (union value * v)
 520 {
 521   if (groups->type == NUMERIC)
 522     {
 523       if (!lex_force_num ())
 524         return 0;
 525       v->f = tokval;
 526     }
 527   else
 528     {
 529       if (!lex_force_string ())
 530         return 0;
 531       strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr));
 532     }
 533
 534   lex_get ();
 535
 536   return 1;
 537 }
 538
 539 /* Parses the PAIRS subcommand. */
 540 static int
 541 tts_custom_pairs (struct cmd_t_test *cmd unused)
 542 {
 543   struct variable **vars;
 544   int n_before_WITH;
 545   int n_vars;
 546   int paired;
 547   int extra;
 548 #if DEBUGGING
 549   int n_predicted;
 550 #endif
 551
 552   if ((token != T_ID || !is_varname (tokid)) && token != T_ALL)
 553     return 2;
 554   if (!parse_variables (&default_dict, &vars, &n_vars,
 555                         PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
 556     return 0;
 557
 558   assert (n_vars);
 559   if (lex_match (T_WITH))
 560     {
 561       n_before_WITH = n_vars;
 562
 563       if (!parse_variables (&default_dict, &vars, &n_vars,
 564                             PV_DUPLICATE | PV_APPEND
 565                             | PV_NUMERIC | PV_NO_SCRATCH))
 566         {
 567           free (vars);
 568           return 0;
 569         }
 570     }
 571   else
 572     n_before_WITH = 0;
 573
 574   paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')'));
 575
 576   if (paired)
 577     {
 578       if (n_before_WITH * 2 != n_vars)
 579         {
 580           free (vars);
 581           msg (SE, _("PAIRED was specified but the number of variables "
 582                      "preceding WITH (%d) did not match the number "
 583                      "following (%d)."),
 584                n_before_WITH, n_vars - n_before_WITH);
 585           return 0;
 586         }
 587
 588       extra = n_before_WITH;
 589     }
 590   else if (n_before_WITH)
 591     extra = n_before_WITH * (n_vars - n_before_WITH);
 592   else
 593     {
 594       if (n_vars < 2)
 595         {
 596           free (vars);
 597           msg (SE, _("At least two variables must be specified "
 598                      "on PAIRS."));
 599           return 0;
 600         }
 601
 602       extra = n_vars * (n_vars - 1) / 2;
 603     }
 604
 605 #if DEBUGGING
 606   n_predicted = n_pairs + extra;
 607 #endif
 608
 609   pairs = xrealloc (pairs, sizeof (struct variable *[2]) * (n_pairs + extra));
 610
 611   if (paired)
 612     {
 613       int i;
 614
 615       for (i = 0; i < extra; i++)
 616         {
 617           pairs[n_pairs][0] = vars[i];
 618           pairs[n_pairs++][1] = vars[i + extra];
 619         }
 620     }
 621   else if (n_before_WITH)
 622     {
 623       int i;
 624
 625       for (i = 0; i < n_before_WITH; i++)
 626         {
 627           int j;
 628
 629           for (j = n_before_WITH; j < n_vars; j++)
 630             {
 631               pairs[n_pairs][0] = vars[i];
 632               pairs[n_pairs++][1] = vars[j];
 633             }
 634         }
 635     }
 636   else
 637     {
 638       int i;
 639
 640       for (i = 0; i < n_vars; i++)
 641         {
 642           int j;
 643
 644           for (j = i + 1; j < n_vars; j++)
 645             {
 646               pairs[n_pairs][0] = vars[i];
 647               pairs[n_pairs++][1] = vars[j];
 648             }
 649         }
 650     }
 651
 652 #if DEBUGGING
 653   assert (n_pairs == n_predicted);
 654 #endif
 655
 656   free (vars);
 657   return 1;
 658 }
 659
 660 #if DEBUGGING
 661 static void
 662 debug_print (void)
 663 {
 664   printf ("T-TEST\n");
 665   if (groups)
 666     {
 667       printf ("  GROUPS=%s", groups->name);
 668       if (n_groups_values)
 669         {
 670           int i;
 671
 672           printf (" (");
 673           for (i = 0; i < n_groups_values; i++)
 674             if (groups->type == NUMERIC)
 675               printf ("%g%s", groups_values[i].f, i ? " " : "");
 676             else
 677               printf ("%.*s%s", groups->width, groups_values[i].s,
 678                       i ? " " : "");
 679           printf (")");
 680         }
 681       printf ("\n");
 682     }
 683   if (cmd.n_variables)
 684     {
 685       int i;
 686
 687       printf ("  VARIABLES=");
 688       for (i = 0; i < cmd.n_variables; i++)
 689         printf ("%s ", cmd.v_variables[i]->name);
 690       printf ("\n");
 691     }
 692   if (cmd.sbc_pairs)
 693     {
 694       int i;
 695
 696       printf ("  PAIRS=");
 697       for (i = 0; i < n_pairs; i++)
 698         printf ("%s ", pairs[i][0]->name);
 699       printf ("WITH");
 700       for (i = 0; i < n_pairs; i++)
 701         printf (" %s", pairs[i][1]->name);
 702       printf (" (PAIRED)\n");
 703     }
 704   printf ("  MISSING=%s %s\n",
 705           cmd.miss == TTS_ANALYSIS ? "ANALYSIS" : "LISTWISE",
 706           cmd.miss == TTS_INCLUDE ? "INCLUDE" : "EXCLUDE");
 707   printf ("  FORMAT=%s\n",
 708           cmd.fmt == TTS_LABELS ? "LABELS" : "NOLABELS");
 709   if (cmd.criteria != NOT_LONG)
 710     printf ("  CRITERIA=%f\n", cmd.criteria);
 711 }
 712
 713 #endif /* DEBUGGING */
 714
 715 /* Here are some general routines tha should probably be moved into
 716    a separate library and documented as part of the PSPP "API"   */
 717 double
 718 variance (double n, double ss, double sum)
 719 {
 720   return ((ss - ((sum * sum) / n)) / (n - 1.0));
 721 }
 722
 723 double
 724 pooled_variance (double n_1, double var_1, double n_2, double var_2)
 725 {
 726   double tmp;
 727
 728   tmp = n_1 + n_2 - 2.0;
 729   tmp = (((n_1 - 1.0) * var_1) + ((n_2 - 1.0) * var_2)) / tmp;
 730   tmp = tmp * ((n_1 + n_2) / (n_1 * n_2));
 731   return tmp;
 732 }
 733
 734 double
 735 oneway (double *f, double *p, struct value_list *levels)
 736 {
 737   double k, SSTR, SSE, SSTO, N, MSTR, MSE, sum, dftr, dfe, print;
 738   struct value_list *g;
 739
 740   k = 0.0;
 741
 742   for (g = levels; g != NULL; g = g->next)
 743     {
 744       k++;
 745       sum += g->sum;
 746       N += g->n;
 747       SSTR += g->ss - (pow (g->sum, 2) / g->n);
 748       SSTO += g->ss;
 749     }
 750
 751   SSTO = SSTO - (pow (sum, 2) / N);
 752   SSE = SSTO - SSTR;
 753
 754   dftr = N - k;
 755   dfe = k - 1.0;
 756   MSTR = SSTR / dftr;
 757   MSE = SSE / dfe;
 758
 759   *f = (MSE / MSTR);
 760   *p = f_sig (*f, dfe, dftr);
 761
 762   print = 1.0;
 763   if (print == 1.0)
 764     {
 765       printf ("sum1 %f, sum2 %f, ss1 %f, ss2 %f\n",
 766               levels->sum, levels->next->sum, levels->ss, levels->next->ss);
 767       printf ("                - - - - - - O N E W A Y - - - - - -\n\n");
 768       printf ("   Variable %s %s\n",
 769               cmd.v_variables[0]->name, cmd.v_variables[0]->label);
 770       printf ("By Variable %s %s\n", groups->name, groups->label);
 771       printf ("\n             Analysis of Variance\n\n");
 772       printf ("                    Sum of    Mean     F       F\n");
 773       printf ("Source       D.F.  Squares  Squares  Ratio   Prob\n\n");
 774       printf ("Between   %8.0f %8.4f %8.4f %8.4f %8.4f\n",
 775               dfe, SSE, MSE, *f, *p);
 776       printf ("Within    %8.0f %8.4f %8.4f\n", dftr, SSTR, MSTR);
 777       printf ("Total     %8.0f %8.4f\n\n\n", N - 1.0, SSTO);
 778     }
 779   return (*f);
 780 }
 781
 782 double
 783 f_sig (double f, double dfn, double dfd)
 784 {
 785   int which, status;
 786   double p, q, bound;
 787
 788   which = FIND_P;
 789   status = 1;
 790   p = q = bound = 0.0;
 791   cdff (&which, &p, &q, &f, &dfn, &dfd, &status, &bound);
 792
 793   switch (status)
 794     {
 795     case -1:
 796       {
 797         printf ("Parameter 1 is out of range\n");
 798         break;
 799       }
 800     case -2:
 801       {
 802         printf ("Parameter 2 is out of range\n");
 803         break;
 804       }
 805     case -3:
 806       {
 807         printf ("Parameter 3 is out of range\n");
 808         break;
 809       }
 810     case -4:
 811       {
 812         printf ("Parameter 4 is out of range\n");
 813         break;
 814       }
 815     case -5:
 816       {
 817         printf ("Parameter 5 is out of range\n");
 818         break;
 819       }
 820     case -6:
 821       {
 822         printf ("Parameter 6 is out of range\n");
 823         break;
 824       }
 825     case -7:
 826       {
 827         printf ("Parameter 7 is out of range\n");
 828         break;
 829       }
 830     case -8:
 831       {
 832         printf ("Parameter 8 is out of range\n");
 833         break;
 834       }
 835     case 0:
 836       {
 837         /* printf( "Command completed successfully\n" ); */
 838         break;
 839       }
 840     case 1:
 841       {
 842         printf ("Answer appears to be lower than the lowest search bound\n");
 843         break;
 844       }
 845     case 2:
 846       {
 847         printf ("Answer appears to be higher than the greatest search bound\n");
 848         break;
 849       }
 850     case 3:
 851       {
 852         printf ("P - Q NE 1\n");
 853         break;
 854       }
 855     }
 856
 857   if (status)
 858     {
 859       return (double) ERROR_SIG;
 860     }
 861   else
 862     {
 863       return q;
 864     }
 865 }
 866
 867 double
 868 t_crt (double df, double q)
 869 {
 870   int which, status;
 871   double p, bound, t;
 872
 873   which = FIND_CRITICAL_VALUE;
 874   bound = 0.0;
 875   p = 1.0 - q;
 876   t = 0.0;
 877
 878   cdft (&which, &p, &q, &t, &df, &status, &bound);
 879
 880   switch (status)
 881     {
 882     case -1:
 883       {
 884         printf ("t_crt: Parameter 1 is out of range\n");
 885         break;
 886       }
 887     case -2:
 888       {
 889         printf ("t_crt: value of p (%f) is out of range\n", p);
 890         break;
 891       }
 892     case -3:
 893       {
 894         printf ("t_crt: value of q (%f) is out of range\n", q);
 895         break;
 896       }
 897     case -4:
 898       {
 899         printf ("t_crt: value of df (%f) is out of range\n", df);
 900         break;
 901       }
 902     case -5:
 903       {
 904         printf ("t_crt: Parameter 5 is out of range\n");
 905         break;
 906       }
 907     case -6:
 908       {
 909         printf ("t_crt: Parameter 6 is out of range\n");
 910         break;
 911       }
 912     case -7:
 913       {
 914         printf ("t_crt: Parameter 7 is out of range\n");
 915         break;
 916       }
 917     case 0:
 918       {
 919         /* printf( "Command completed successfully\n" ); */
 920         break;
 921       }
 922     case 1:
 923       {
 924         printf ("t_crt: Answer appears to be lower than the lowest search bound\n");
 925         break;
 926       }
 927     case 2:
 928       {
 929         printf ("t_crt: Answer appears to be higher than the greatest search bound\n");
 930         break;
 931       }
 932     case 3:
 933       {
 934         printf ("t_crt: P - Q NE 1\n");
 935         break;
 936       }
 937     }
 938
 939   if (status)
 940     {
 941       return (double) ERROR_SIG;
 942     }
 943   else
 944     {
 945       return t;
 946     }
 947 }
 948
 949 double
 950 t_sig (double t, double df)
 951 {
 952   int which, status;
 953   double p, q, bound;
 954
 955   which = FIND_P;
 956   q = 0.0;
 957   p = 0.0;
 958   bound = 0.0;
 959
 960   cdft (&which, &p, &q, &t, &df, &status, &bound);
 961
 962   switch (status)
 963     {
 964     case -1:
 965       {
 966         printf ("t-sig: Parameter 1 is out of range\n");
 967         break;
 968       }
 969     case -2:
 970       {
 971         printf ("t-sig: Parameter 2 is out of range\n");
 972         break;
 973       }
 974     case -3:
 975       {
 976         printf ("t-sig: Parameter 3 is out of range\n");
 977         break;
 978       }
 979     case -4:
 980       {
 981         printf ("t-sig: Parameter 4 is out of range\n");
 982         break;
 983       }
 984     case -5:
 985       {
 986         printf ("t-sig: Parameter 5 is out of range\n");
 987         break;
 988       }
 989     case -6:
 990       {
 991         printf ("t-sig: Parameter 6 is out of range\n");
 992         break;
 993       }
 994     case -7:
 995       {
 996         printf ("t-sig: Parameter 7 is out of range\n");
 997         break;
 998       }
 999     case 0:
1000       {
1001         /* printf( "Command completed successfully\n" ); */
1002         break;
1003       }
1004     case 1:
1005       {
1006         printf ("t-sig: Answer appears to be lower than the lowest search bound\n");
1007         break;
1008       }
1009     case 2:
1010       {
1011         printf ("t-sig: Answer appears to be higher than the greatest search bound\n");
1012         break;
1013       }
1014     case 3:
1015       {
1016         printf ("t-sig: P - Q NE 1\n");
1017         break;
1018       }
1019     }
1020
1021   if (status)
1022     {
1023       return (double) ERROR_SIG;
1024     }
1025   else
1026     {
1027       return q;
1028     }
1029 }
1030
1031 double
1032 covariance (double x_sum, double x_n, double y_sum, double y_n, double ss)
1033 {
1034   double tmp;
1035
1036   tmp = x_sum * y_sum;
1037   tmp = tmp / x_n;
1038   tmp = ss - tmp;
1039   tmp = (tmp / (x_n + y_n - 1.0));
1040   return tmp;
1041 }
1042
1043 double
1044 pearson_r (double c_xy, double c_xx, double c_yy)
1045 {
1046   return (c_xy / (sqrt (c_xx * c_yy)));
1047 }
1048
1049 void
1050 print_t_groups (struct variable * grps, union value * g1, union value * g2,
1051                 double n1, double n2, double mean1, double mean2,
1052                 double sd1, double sd2, double se1, double se2,
1053                 double diff, double l_f, double l_p,
1054                 double p_t, double p_sig, double p_df, double p_sed,
1055                 double p_l, double p_h,
1056                 double s_t, double s_sig, double s_df, double s_sed,
1057                 double s_l, double s_h)
1058 {
1059
1060   /* Display all this shit as SPSS 6.0 does (roughly) */
1061   printf ("\n\n                 Number                                 \n");
1062   printf ("   Variable     of Cases    Mean      SD      SE of Mean\n");
1063   printf ("-----------------------------------------------------------\n");
1064   printf ("   %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label);
1065   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1066           get_val_lab (grps, *g1, 0), g1->f, n1, mean1, sd1, se1);
1067   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1068           get_val_lab (grps, *g2, 0), g2->f, n2, mean2, sd2, se2);
1069   printf ("-----------------------------------------------------------\n");
1070   printf ("\n   Mean Difference = %8.4f\n", diff);
1071   printf ("\n   Levene's Test for Equality of Variances: F= %.3f  P= %.3f\n",
1072           l_f, l_p);
1073   printf ("\n\n   t-test for Equality of Means                         95pc     \n");
1074   printf ("Variances   t-value    df   2-Tail Sig SE of Diff    CI for Diff  \n");
1075   printf ("-----------------------------------------------------------------\n");
1076   printf ("Equal     %8.2f %8.0f %8.3f %8.3f (%8.3f, %8.3f)\n",
1077           p_t, p_df, p_sig, p_sed, p_l, p_h);
1078   printf ("Unequal   %8.2f %8.2f %8.3f %8.3f (%8.3f, %8.3f)\n",
1079           s_t, s_df, s_sig, s_sed, s_l, s_h);
1080   printf ("-----------------------------------------------------------------\n");
1081 }
1082
1083 /*
1084    Local Variables:
1085    mode: c
1086    End:
1087 */