src/t-test.q

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by John Williams <johnr.williams@stonebow.otago.ac.nz>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <math.h>
  25 #include "alloc.h"
  26 #include "str.h"
  27 #include "dcdflib/cdflib.h"
  28 #include "command.h"
  29 #include "lexer.h"
  30 #include "error.h"
  31 #include "magic.h"
  32 #include "value-labels.h"
  33 #include "var.h"
  34 #include "vfm.h"
  35
  36 /* (specification)
  37    "T-TEST" (tts_):
  38      groups=custom;
  39      variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
  40      *+pairs=custom;
  41      +missing=miss:!analysis/listwise,
  42              incl:include/!exclude;
  43      +format=fmt:!labels/nolabels;
  44      +criteria=:ci(d:criteria,"%s > 0. && %s < 1.").
  45 */
  46 /* (declarations) */
  47 /* (functions) */
  48
  49 #include "debug-print.h"
  50
  51 /* Command parsing information. */
  52 static struct cmd_t_test cmd;
  53
  54 /* Variable for the GROUPS subcommand, if given. */
  55 static struct variable *groups;
  56
  57 /* GROUPS: Number of values specified by the user; the values
  58    specified if any. */
  59 static int n_groups_values;
  60 static union value groups_values[2];
  61
  62 /* PAIRED: Number of pairs; each pair. */
  63 static int n_pairs;
  64 static struct variable *(*pairs)[2];
  65
  66 /* Routines to scan data and perform t-tests */
  67 static void precalc (void);
  68 static void postcalc (void);
  69 static void g_postcalc (void);
  70 static void t_pairs (void);
  71 static void t_groups (void);
  72 static int groups_calc (struct ccase *);
  73 static int pairs_calc (struct ccase *);
  74 static int z_calc (struct ccase *);
  75
  76 struct value_list
  77   {
  78     double sum;
  79     double ss;
  80     double n;
  81     struct value_list *next;
  82   };
  83
  84 /* general workhorses - should  move these to a separate library... */
  85 double variance (double n, double ss, double sum);
  86
  87 double covariance (double x_sum, double x_n,
  88                    double y_sum, double y_n, double ss);
  89
  90 double pooled_variance (double n_1, double var_1,
  91                         double n_2, double var_2);
  92
  93 double oneway (double *f, double *p, struct value_list *list);
  94
  95 double pearson_r (double c_xy, double c_xx, double c_yy);
  96
  97 double f_sig (double f, double dfn, double dfd);
  98 double t_crt (double df, double q);
  99 double t_sig (double t, double df);
 100
 101 /* massive function simply to remove any responsibility for output
 102    from the function which does the actual t-test calculations */
 103 void print_t_groups (struct variable * grps, union value * g1, union value * g2,
 104                      double n1, double n2, double mean1, double mean2,
 105                      double sd1, double sd2, double se1, double se2,
 106                      double diff, double l_f, double l_p,
 107                      double p_t, double p_sig, double p_df, double p_sed,
 108                      double p_l, double p_h,
 109                      double s_t, double s_sig, double s_df, double s_sed,
 110                      double s_l, double s_h);
 111
 112 /* Global variables to communicate between calc() and postcalc()
 113    should move to a structure in the p union of variable... */
 114 static double v1_n, v1_ss, v1_sum, v1_se, v1_var, v1_mean;
 115 static double v2_n, v2_ss, v2_sum, v2_se, v2_var, v2_mean;
 116 static double v1_z_sum, v1_z_ss;
 117 static double v2_z_sum, v2_z_ss;
 118 static double diff, se_diff, sp, xy_sum, xy_diff, xy_ss;
 119 static int cur_var;
 120
 121 /* some defines for CDFlib */
 122 #define FIND_P 1
 123 #define FIND_CRITICAL_VALUE 2
 124 #define ERROR_SIG -1
 125
 126 #ifdef DEBUGGING
 127 static void debug_print (void);
 128 #endif
 129
 130 /* Parses and executes the T-TEST procedure. */
 131 int
 132 cmd_t_test (void)
 133 {
 134   struct cmd_t_test cmd;
 135
 136   if (!lex_force_match_id ("T"))
 137     return CMD_FAILURE;
 138   lex_match ('-');
 139   lex_match_id ("TEST");
 140
 141   if (!parse_t_test (&cmd))
 142     return CMD_FAILURE;
 143
 144 #if DEBUGGING
 145   debug_print ();
 146 #endif
 147
 148   if (n_pairs > 0)
 149     procedure (precalc, pairs_calc, postcalc);
 150   else
 151     /* probably groups then... */
 152     {
 153       printf ("\n\n  t-tests for independent samples of %s %s\n",
 154               groups->name, groups->label);
 155
 156       for (cur_var = 0; cur_var < cmd.n_variables; cur_var++)
 157         {
 158           v1_n = v1_ss = v1_sum = v1_se = v1_var = v1_mean = 0.0;
 159           v2_n = v2_ss = v2_sum = v2_se = v2_var = v2_mean = 0.0;
 160           v1_z_sum = v1_z_ss = v2_z_sum = v2_z_ss = 0.0;
 161           diff = se_diff = sp = xy_diff = xy_ss = xy_sum = 0.0;
 162
 163           procedure (precalc, groups_calc, g_postcalc);
 164           procedure (precalc, z_calc, postcalc);
 165         }
 166     }
 167
 168   return CMD_SUCCESS;
 169 }
 170
 171 void
 172 precalc (void)
 173 {
 174   return;                       /* rilly void... */
 175 }
 176
 177 int
 178 groups_calc (struct ccase * c)
 179 {
 180   int bad_weight;
 181   double group, w;
 182   struct variable *v = cmd.v_variables[cur_var];
 183   double X = c->data[v->fv].f;
 184
 185   /* Get the weight for this case. */
 186   w = dict_get_case_weight (default_dict, c);
 187   if (w <= 0.0 || w == SYSMIS)
 188     {
 189       w = 0.0;
 190       bad_weight = 1;
 191       printf ("Bad weight\n");
 192     }
 193
 194   if (X == SYSMIS || X == 0.0)  /* FIXME: should be USER_MISSING? */
 195     {
 196       /* printf("Missing value\n"); */
 197       return 1;
 198     }
 199   else
 200     {
 201       X = X * w;
 202       group = c->data[groups->fv].f;
 203
 204       if (group == groups_values[0].f)
 205         {
 206           v1_sum += X;
 207           v1_ss += X * X;
 208           v1_n += w;
 209         }
 210       else if (group == groups_values[1].f)
 211         {
 212           v2_sum += X;
 213           v2_ss += X * X;
 214           v2_n += w;
 215         }
 216     }
 217
 218   return 1;
 219 }
 220
 221 void
 222 g_postcalc (void)
 223 {
 224   v1_mean = v1_sum / v1_n;
 225   v2_mean = v2_sum / v2_n;
 226   return;
 227 }
 228
 229 int                             /* this pass generates the z-zcores */
 230 z_calc (struct ccase * c)
 231 {
 232   double group, z, w;
 233   struct variable *v = cmd.v_variables[cur_var];
 234   double X = c->data[v->fv].f;
 235
 236   z = 0.0;
 237
 238   /* Get the weight for this case. */
 239   w = dict_get_case_weight (default_dict, c);
 240
 241   if (X == SYSMIS || X == 0.0)  /* FIXME: how to specify user missing? */
 242     {
 243       return 1;
 244     }
 245   else
 246     {
 247       group = c->data[groups->fv].f;
 248       X = w * X;
 249
 250       if (group == groups_values[0].f)
 251         {
 252           z = fabs (X - v1_mean);
 253           v1_z_sum += z;
 254           v1_z_ss += pow (z, 2);
 255         }
 256       else if (group == groups_values[1].f)
 257         {
 258           z = fabs (X - v2_mean);
 259           v2_z_ss += pow (z, 2);
 260           v2_z_sum += z;
 261         }
 262     }
 263
 264   return 1;
 265 }
 266
 267
 268 int
 269 pairs_calc (struct ccase * c)
 270 {
 271   int i;
 272   struct variable *v1, *v2;
 273   double X, Y;
 274
 275   for (i = 0; i < n_pairs; i++)
 276     {
 277
 278       v1 = pairs[i][0];
 279       v2 = pairs[i][1];
 280       X = c->data[v1->fv].f;
 281       Y = c->data[v2->fv].f;
 282
 283       if (X == SYSMIS || Y == SYSMIS)
 284         {
 285           printf ("Missing value\n");
 286         }
 287       else
 288         {
 289           xy_sum += X * Y;
 290           xy_diff += (X - Y);
 291           xy_ss += pow ((X - Y), 2);
 292           v1_sum += X;
 293           v2_sum += Y;
 294           v1_n++;
 295           v2_n++;
 296           v1_ss += (X * X);
 297           v2_ss += (Y * Y);
 298         }
 299     }
 300
 301   return 1;
 302 }
 303
 304 void
 305 postcalc (void)
 306 {
 307   /* Calculate basic statistics */
 308   v1_var = variance (v1_n, v1_ss, v1_sum);      /* variances */
 309   v2_var = variance (v2_n, v2_ss, v2_sum);
 310   v1_se = sqrt (v1_var / v1_n); /* standard errors */
 311   v2_se = sqrt (v2_var / v2_n);
 312   diff = v1_mean - v2_mean;
 313
 314   if (n_pairs > 0)
 315     {
 316       t_pairs ();
 317     }
 318   else
 319     {
 320       t_groups ();
 321     }
 322
 323   return;
 324 }
 325
 326 void
 327 t_groups (void)
 328 {
 329   double df_pooled, t_pooled, t_sep, p_pooled, p_sep;
 330   double crt_t_p, crt_t_s, tmp, v1_z, v2_z, f_levene, p_levene;
 331   double df_sep, se_diff_s, se_diff_p;
 332   struct value_list *val_1, *val_2;
 333
 334   /* Levene's test */
 335   val_1 = malloc (sizeof (struct value_list));
 336   val_1->sum = v1_z_sum;
 337   val_1->ss = v1_z_ss;
 338   val_1->n = v1_n;
 339   val_2 = malloc (sizeof (struct value_list));
 340   val_2->sum = v2_z_sum;
 341   val_2->ss = v2_z_ss;
 342   val_2->n = v2_n;
 343
 344   val_1->next = val_2;
 345   val_2->next = NULL;
 346
 347   f_levene = oneway (&f_levene, &p_levene, val_1);
 348
 349   /* T test results for pooled variances */
 350   se_diff_p = sqrt (pooled_variance (v1_n, v1_var, v2_n, v2_var));
 351   df_pooled = v1_n + v2_n - 2.0;
 352   t_pooled = diff / se_diff_p;
 353   p_pooled = t_sig (t_pooled, df_pooled);
 354   crt_t_p = t_crt (df_pooled, 0.025);
 355
 356   if ((2.0 * p_pooled) >= 1.0)
 357     p_pooled = 1.0 - p_pooled;
 358
 359   /* oh god, the separate variance calculations... */
 360   t_sep = diff / sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 361
 362   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 363   tmp = (v1_var / v1_n) / tmp;
 364   tmp = pow (tmp, 2);
 365   tmp = tmp / (v1_n - 1.0);
 366   v1_z = tmp;
 367
 368   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 369   tmp = (v2_var / v2_n) / tmp;
 370   tmp = pow (tmp, 2);
 371   tmp = tmp / (v2_n - 1.0);
 372   v2_z = tmp;
 373
 374   tmp = 1.0 / (v1_z + v2_z);
 375
 376   df_sep = tmp;
 377   p_sep = t_sig (t_sep, df_sep);
 378   if ((2.0 * p_sep) >= 1.0)
 379     p_sep = 1.0 - p_sep;
 380   crt_t_s = t_crt (df_sep, 0.025);
 381   se_diff_s = sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 382
 383   /* FIXME: convert to a proper PSPP output call */
 384   print_t_groups (groups, &groups_values[0], &groups_values[1],
 385                   v1_n, v2_n, v1_mean, v2_mean,
 386                   sqrt (v1_var), sqrt (v2_var), v1_se, v2_se,
 387                   diff, f_levene, p_levene,
 388                   t_pooled, 2.0 * p_pooled, df_pooled, se_diff_p,
 389                   diff - (crt_t_p * se_diff_p), diff + (crt_t_p * se_diff_p),
 390                   t_sep, 2.0 * p_sep, df_sep, se_diff_s,
 391                 diff - (crt_t_s * se_diff_s), diff + (crt_t_s * se_diff_s));
 392   return;
 393 }
 394
 395 void
 396 t_pairs (void)
 397 {
 398   double cov12, cov11, cov22, r, t, p, crt_t, sp, r_t, r_p;
 399   struct variable *v1, *v2;
 400
 401   v1 = pairs[0][0];
 402   v2 = pairs[0][1];
 403   cov12 = covariance (v1_sum, v1_n, v2_sum, v2_n, xy_sum);
 404   cov11 = covariance (v1_sum, v1_n, v1_sum, v1_n, v1_ss);
 405   cov22 = covariance (v2_sum, v2_n, v2_sum, v2_n, v2_ss);
 406   r = pearson_r (cov12, cov11, cov22);
 407   /* this t and it's associated p is a significance test for the pearson's r */
 408   r_t = r * sqrt ((v1_n - 2.0) / (1.0 - (r * r)));
 409   r_p = t_sig (r_t, v1_n - 2.0);
 410
 411   /* now we move to the t test for the difference in means */
 412   diff = xy_diff / v1_n;
 413   sp = sqrt (variance (v1_n, xy_ss, xy_diff));
 414   se_diff = sp / sqrt (v1_n);
 415   t = diff / se_diff;
 416   crt_t = t_crt (v1_n - 1.0, 0.025);
 417   p = t_sig (t, v1_n - 1.0);
 418
 419
 420   printf ("             Number of        2-tail\n");
 421   printf (" Variable      pairs    Corr   Sig      Mean    SD   SE of Mean\n");
 422   printf ("---------------------------------------------------------------\n");
 423   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 424           v1->name, v1_mean, sqrt (v1_var), v1_se);
 425   printf ("           %8.4f  %0.4f  %0.4f\n", v1_n, r, r_p);
 426   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 427           v2->name, v2_mean, sqrt (v2_var), v2_se);
 428   printf ("---------------------------------------------------------------\n");
 429
 430   printf ("\n\n\n");
 431   printf ("      Paired Differences              |\n");
 432   printf (" Mean          SD         SE of Mean  |  t-value   df   2-tail Sig\n");
 433   printf ("--------------------------------------|---------------------------\n");
 434
 435   printf ("%8.4f    %8.4f    %8.4f      | %8.4f %8.4f %8.4f\n",
 436           diff, sp, se_diff, t, v1_n - 1.0, 2.0 * (1.0 - p));
 437
 438   printf ("95pc CI (%8.4f, %8.4f)          |\n\n",
 439           diff - (se_diff * crt_t), diff + (se_diff * crt_t));
 440
 441   return;
 442 }
 443
 444 static int parse_value (union value *);
 445
 446 /* Parses the GROUPS subcommand. */
 447 int
 448 tts_custom_groups (struct cmd_t_test *cmd unused)
 449 {
 450   groups = parse_variable ();
 451   if (!groups)
 452     {
 453       lex_error (_("expecting variable name in GROUPS subcommand"));
 454       return 0;
 455     }
 456   if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING)
 457     {
 458       msg (SE, _("Long string variable %s is not valid here."),
 459            groups->name);
 460       return 0;
 461     }
 462
 463   if (!lex_match ('('))
 464     {
 465       if (groups->type == NUMERIC)
 466         {
 467           n_groups_values = 2;
 468           groups_values[0].f = 1;
 469           groups_values[1].f = 2;
 470           return 1;
 471         }
 472       else
 473         {
 474           msg (SE, _("When applying GROUPS to a string variable, at "
 475                      "least one value must be specified."));
 476           return 0;
 477         }
 478     }
 479
 480   if (!parse_value (&groups_values[0]))
 481     return 0;
 482   n_groups_values = 1;
 483
 484   lex_match (',');
 485
 486   if (lex_match (')'))
 487     return 1;
 488
 489   if (!parse_value (&groups_values[1]))
 490     return 0;
 491   n_groups_values = 2;
 492
 493   if (!lex_force_match (')'))
 494     return 0;
 495
 496   return 1;
 497 }
 498
 499 /* Parses the current token (numeric or string, depending on the
 500    variable in `groups') into value V and returns success. */
 501 static int
 502 parse_value (union value * v)
 503 {
 504   if (groups->type == NUMERIC)
 505     {
 506       if (!lex_force_num ())
 507         return 0;
 508       v->f = tokval;
 509     }
 510   else
 511     {
 512       if (!lex_force_string ())
 513         return 0;
 514       strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr));
 515     }
 516
 517   lex_get ();
 518
 519   return 1;
 520 }
 521
 522 /* Parses the PAIRS subcommand. */
 523 static int
 524 tts_custom_pairs (struct cmd_t_test *cmd unused)
 525 {
 526   struct variable **vars;
 527   int n_before_WITH;
 528   int n_vars;
 529   int paired;
 530   int extra;
 531 #if DEBUGGING
 532   int n_predicted;
 533 #endif
 534
 535   if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
 536       && token != T_ALL)
 537     return 2;
 538   if (!parse_variables (default_dict, &vars, &n_vars,
 539                         PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
 540     return 0;
 541
 542   assert (n_vars);
 543   if (lex_match (T_WITH))
 544     {
 545       n_before_WITH = n_vars;
 546
 547       if (!parse_variables (default_dict, &vars, &n_vars,
 548                             PV_DUPLICATE | PV_APPEND
 549                             | PV_NUMERIC | PV_NO_SCRATCH))
 550         {
 551           free (vars);
 552           return 0;
 553         }
 554     }
 555   else
 556     n_before_WITH = 0;
 557
 558   paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')'));
 559
 560   if (paired)
 561     {
 562       if (n_before_WITH * 2 != n_vars)
 563         {
 564           free (vars);
 565           msg (SE, _("PAIRED was specified but the number of variables "
 566                      "preceding WITH (%d) did not match the number "
 567                      "following (%d)."),
 568                n_before_WITH, n_vars - n_before_WITH);
 569           return 0;
 570         }
 571
 572       extra = n_before_WITH;
 573     }
 574   else if (n_before_WITH)
 575     extra = n_before_WITH * (n_vars - n_before_WITH);
 576   else
 577     {
 578       if (n_vars < 2)
 579         {
 580           free (vars);
 581           msg (SE, _("At least two variables must be specified "
 582                      "on PAIRS."));
 583           return 0;
 584         }
 585
 586       extra = n_vars * (n_vars - 1) / 2;
 587     }
 588
 589 #if DEBUGGING
 590   n_predicted = n_pairs + extra;
 591 #endif
 592
 593   pairs = xrealloc (pairs, sizeof (struct variable *[2]) * (n_pairs + extra));
 594
 595   if (paired)
 596     {
 597       int i;
 598
 599       for (i = 0; i < extra; i++)
 600         {
 601           pairs[n_pairs][0] = vars[i];
 602           pairs[n_pairs++][1] = vars[i + extra];
 603         }
 604     }
 605   else if (n_before_WITH)
 606     {
 607       int i;
 608
 609       for (i = 0; i < n_before_WITH; i++)
 610         {
 611           int j;
 612
 613           for (j = n_before_WITH; j < n_vars; j++)
 614             {
 615               pairs[n_pairs][0] = vars[i];
 616               pairs[n_pairs++][1] = vars[j];
 617             }
 618         }
 619     }
 620   else
 621     {
 622       int i;
 623
 624       for (i = 0; i < n_vars; i++)
 625         {
 626           int j;
 627
 628           for (j = i + 1; j < n_vars; j++)
 629             {
 630               pairs[n_pairs][0] = vars[i];
 631               pairs[n_pairs++][1] = vars[j];
 632             }
 633         }
 634     }
 635
 636 #if DEBUGGING
 637   assert (n_pairs == n_predicted);
 638 #endif
 639
 640   free (vars);
 641   return 1;
 642 }
 643
 644 #if DEBUGGING
 645 static void
 646 debug_print (void)
 647 {
 648   printf ("T-TEST\n");
 649   if (groups)
 650     {
 651       printf ("  GROUPS=%s", groups->name);
 652       if (n_groups_values)
 653         {
 654           int i;
 655
 656           printf (" (");
 657           for (i = 0; i < n_groups_values; i++)
 658             if (groups->type == NUMERIC)
 659               printf ("%g%s", groups_values[i].f, i ? " " : "");
 660             else
 661               printf ("%.*s%s", groups->width, groups_values[i].s,
 662                       i ? " " : "");
 663           printf (")");
 664         }
 665       printf ("\n");
 666     }
 667   if (cmd.n_variables)
 668     {
 669       int i;
 670
 671       printf ("  VARIABLES=");
 672       for (i = 0; i < cmd.n_variables; i++)
 673         printf ("%s ", cmd.v_variables[i]->name);
 674       printf ("\n");
 675     }
 676   if (cmd.sbc_pairs)
 677     {
 678       int i;
 679
 680       printf ("  PAIRS=");
 681       for (i = 0; i < n_pairs; i++)
 682         printf ("%s ", pairs[i][0]->name);
 683       printf ("WITH");
 684       for (i = 0; i < n_pairs; i++)
 685         printf (" %s", pairs[i][1]->name);
 686       printf (" (PAIRED)\n");
 687     }
 688   printf ("  MISSING=%s %s\n",
 689           cmd.miss == TTS_ANALYSIS ? "ANALYSIS" : "LISTWISE",
 690           cmd.miss == TTS_INCLUDE ? "INCLUDE" : "EXCLUDE");
 691   printf ("  FORMAT=%s\n",
 692           cmd.fmt == TTS_LABELS ? "LABELS" : "NOLABELS");
 693   if (cmd.criteria != NOT_LONG)
 694     printf ("  CRITERIA=%f\n", cmd.criteria);
 695 }
 696
 697 #endif /* DEBUGGING */
 698
 699 /* Here are some general routines tha should probably be moved into
 700    a separate library and documented as part of the PSPP "API"   */
 701 double
 702 variance (double n, double ss, double sum)
 703 {
 704   return ((ss - ((sum * sum) / n)) / (n - 1.0));
 705 }
 706
 707 double
 708 pooled_variance (double n_1, double var_1, double n_2, double var_2)
 709 {
 710   double tmp;
 711
 712   tmp = n_1 + n_2 - 2.0;
 713   tmp = (((n_1 - 1.0) * var_1) + ((n_2 - 1.0) * var_2)) / tmp;
 714   tmp = tmp * ((n_1 + n_2) / (n_1 * n_2));
 715   return tmp;
 716 }
 717
 718 double
 719 oneway (double *f, double *p, struct value_list *levels)
 720 {
 721   double k, SSTR, SSE, SSTO, N, MSTR, MSE, sum, dftr, dfe, print;
 722   struct value_list *g;
 723
 724   k = 0.0;
 725
 726   for (g = levels; g != NULL; g = g->next)
 727     {
 728       k++;
 729       sum += g->sum;
 730       N += g->n;
 731       SSTR += g->ss - (pow (g->sum, 2) / g->n);
 732       SSTO += g->ss;
 733     }
 734
 735   SSTO = SSTO - (pow (sum, 2) / N);
 736   SSE = SSTO - SSTR;
 737
 738   dftr = N - k;
 739   dfe = k - 1.0;
 740   MSTR = SSTR / dftr;
 741   MSE = SSE / dfe;
 742
 743   *f = (MSE / MSTR);
 744   *p = f_sig (*f, dfe, dftr);
 745
 746   print = 1.0;
 747   if (print == 1.0)
 748     {
 749       printf ("sum1 %f, sum2 %f, ss1 %f, ss2 %f\n",
 750               levels->sum, levels->next->sum, levels->ss, levels->next->ss);
 751       printf ("                - - - - - - O N E W A Y - - - - - -\n\n");
 752       printf ("   Variable %s %s\n",
 753               cmd.v_variables[0]->name, cmd.v_variables[0]->label);
 754       printf ("By Variable %s %s\n", groups->name, groups->label);
 755       printf ("\n             Analysis of Variance\n\n");
 756       printf ("                    Sum of    Mean     F       F\n");
 757       printf ("Source       D.F.  Squares  Squares  Ratio   Prob\n\n");
 758       printf ("Between   %8.0f %8.4f %8.4f %8.4f %8.4f\n",
 759               dfe, SSE, MSE, *f, *p);
 760       printf ("Within    %8.0f %8.4f %8.4f\n", dftr, SSTR, MSTR);
 761       printf ("Total     %8.0f %8.4f\n\n\n", N - 1.0, SSTO);
 762     }
 763   return (*f);
 764 }
 765
 766 double
 767 f_sig (double f, double dfn, double dfd)
 768 {
 769   int which, status;
 770   double p, q, bound;
 771
 772   which = FIND_P;
 773   status = 1;
 774   p = q = bound = 0.0;
 775   cdff (&which, &p, &q, &f, &dfn, &dfd, &status, &bound);
 776
 777   switch (status)
 778     {
 779     case -1:
 780       {
 781         printf ("Parameter 1 is out of range\n");
 782         break;
 783       }
 784     case -2:
 785       {
 786         printf ("Parameter 2 is out of range\n");
 787         break;
 788       }
 789     case -3:
 790       {
 791         printf ("Parameter 3 is out of range\n");
 792         break;
 793       }
 794     case -4:
 795       {
 796         printf ("Parameter 4 is out of range\n");
 797         break;
 798       }
 799     case -5:
 800       {
 801         printf ("Parameter 5 is out of range\n");
 802         break;
 803       }
 804     case -6:
 805       {
 806         printf ("Parameter 6 is out of range\n");
 807         break;
 808       }
 809     case -7:
 810       {
 811         printf ("Parameter 7 is out of range\n");
 812         break;
 813       }
 814     case -8:
 815       {
 816         printf ("Parameter 8 is out of range\n");
 817         break;
 818       }
 819     case 0:
 820       {
 821         /* printf( "Command completed successfully\n" ); */
 822         break;
 823       }
 824     case 1:
 825       {
 826         printf ("Answer appears to be lower than the lowest search bound\n");
 827         break;
 828       }
 829     case 2:
 830       {
 831         printf ("Answer appears to be higher than the greatest search bound\n");
 832         break;
 833       }
 834     case 3:
 835       {
 836         printf ("P - Q NE 1\n");
 837         break;
 838       }
 839     }
 840
 841   if (status)
 842     {
 843       return (double) ERROR_SIG;
 844     }
 845   else
 846     {
 847       return q;
 848     }
 849 }
 850
 851 double
 852 t_crt (double df, double q)
 853 {
 854   int which, status;
 855   double p, bound, t;
 856
 857   which = FIND_CRITICAL_VALUE;
 858   bound = 0.0;
 859   p = 1.0 - q;
 860   t = 0.0;
 861
 862   cdft (&which, &p, &q, &t, &df, &status, &bound);
 863
 864   switch (status)
 865     {
 866     case -1:
 867       {
 868         printf ("t_crt: Parameter 1 is out of range\n");
 869         break;
 870       }
 871     case -2:
 872       {
 873         printf ("t_crt: value of p (%f) is out of range\n", p);
 874         break;
 875       }
 876     case -3:
 877       {
 878         printf ("t_crt: value of q (%f) is out of range\n", q);
 879         break;
 880       }
 881     case -4:
 882       {
 883         printf ("t_crt: value of df (%f) is out of range\n", df);
 884         break;
 885       }
 886     case -5:
 887       {
 888         printf ("t_crt: Parameter 5 is out of range\n");
 889         break;
 890       }
 891     case -6:
 892       {
 893         printf ("t_crt: Parameter 6 is out of range\n");
 894         break;
 895       }
 896     case -7:
 897       {
 898         printf ("t_crt: Parameter 7 is out of range\n");
 899         break;
 900       }
 901     case 0:
 902       {
 903         /* printf( "Command completed successfully\n" ); */
 904         break;
 905       }
 906     case 1:
 907       {
 908         printf ("t_crt: Answer appears to be lower than the lowest search bound\n");
 909         break;
 910       }
 911     case 2:
 912       {
 913         printf ("t_crt: Answer appears to be higher than the greatest search bound\n");
 914         break;
 915       }
 916     case 3:
 917       {
 918         printf ("t_crt: P - Q NE 1\n");
 919         break;
 920       }
 921     }
 922
 923   if (status)
 924     {
 925       return (double) ERROR_SIG;
 926     }
 927   else
 928     {
 929       return t;
 930     }
 931 }
 932
 933 double
 934 t_sig (double t, double df)
 935 {
 936   int which, status;
 937   double p, q, bound;
 938
 939   which = FIND_P;
 940   q = 0.0;
 941   p = 0.0;
 942   bound = 0.0;
 943
 944   cdft (&which, &p, &q, &t, &df, &status, &bound);
 945
 946   switch (status)
 947     {
 948     case -1:
 949       {
 950         printf ("t-sig: Parameter 1 is out of range\n");
 951         break;
 952       }
 953     case -2:
 954       {
 955         printf ("t-sig: Parameter 2 is out of range\n");
 956         break;
 957       }
 958     case -3:
 959       {
 960         printf ("t-sig: Parameter 3 is out of range\n");
 961         break;
 962       }
 963     case -4:
 964       {
 965         printf ("t-sig: Parameter 4 is out of range\n");
 966         break;
 967       }
 968     case -5:
 969       {
 970         printf ("t-sig: Parameter 5 is out of range\n");
 971         break;
 972       }
 973     case -6:
 974       {
 975         printf ("t-sig: Parameter 6 is out of range\n");
 976         break;
 977       }
 978     case -7:
 979       {
 980         printf ("t-sig: Parameter 7 is out of range\n");
 981         break;
 982       }
 983     case 0:
 984       {
 985         /* printf( "Command completed successfully\n" ); */
 986         break;
 987       }
 988     case 1:
 989       {
 990         printf ("t-sig: Answer appears to be lower than the lowest search bound\n");
 991         break;
 992       }
 993     case 2:
 994       {
 995         printf ("t-sig: Answer appears to be higher than the greatest search bound\n");
 996         break;
 997       }
 998     case 3:
 999       {
1000         printf ("t-sig: P - Q NE 1\n");
1001         break;
1002       }
1003     }
1004
1005   if (status)
1006     {
1007       return (double) ERROR_SIG;
1008     }
1009   else
1010     {
1011       return q;
1012     }
1013 }
1014
1015 double
1016 covariance (double x_sum, double x_n, double y_sum, double y_n, double ss)
1017 {
1018   double tmp;
1019
1020   tmp = x_sum * y_sum;
1021   tmp = tmp / x_n;
1022   tmp = ss - tmp;
1023   tmp = (tmp / (x_n + y_n - 1.0));
1024   return tmp;
1025 }
1026
1027 double
1028 pearson_r (double c_xy, double c_xx, double c_yy)
1029 {
1030   return (c_xy / (sqrt (c_xx * c_yy)));
1031 }
1032
1033 void
1034 print_t_groups (struct variable * grps, union value * g1, union value * g2,
1035                 double n1, double n2, double mean1, double mean2,
1036                 double sd1, double sd2, double se1, double se2,
1037                 double diff, double l_f, double l_p,
1038                 double p_t, double p_sig, double p_df, double p_sed,
1039                 double p_l, double p_h,
1040                 double s_t, double s_sig, double s_df, double s_sed,
1041                 double s_l, double s_h)
1042 {
1043
1044   /* Display all this shit as SPSS 6.0 does (roughly) */
1045   printf ("\n\n                 Number                                 \n");
1046   printf ("   Variable     of Cases    Mean      SD      SE of Mean\n");
1047   printf ("-----------------------------------------------------------\n");
1048   printf ("   %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label);
1049   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1050           val_labs_find (grps->val_labs, *g1), g1->f, n1, mean1, sd1, se1);
1051   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1052           val_labs_find (grps->val_labs, *g2), g2->f, n2, mean2, sd2, se2);
1053   printf ("-----------------------------------------------------------\n");
1054   printf ("\n   Mean Difference = %8.4f\n", diff);
1055   printf ("\n   Levene's Test for Equality of Variances: F= %.3f  P= %.3f\n",
1056           l_f, l_p);
1057   printf ("\n\n   t-test for Equality of Means                         95pc     \n");
1058   printf ("Variances   t-value    df   2-Tail Sig SE of Diff    CI for Diff  \n");
1059   printf ("-----------------------------------------------------------------\n");
1060   printf ("Equal     %8.2f %8.0f %8.3f %8.3f (%8.3f, %8.3f)\n",
1061           p_t, p_df, p_sig, p_sed, p_l, p_h);
1062   printf ("Unequal   %8.2f %8.2f %8.3f %8.3f (%8.3f, %8.3f)\n",
1063           s_t, s_df, s_sig, s_sed, s_l, s_h);
1064   printf ("-----------------------------------------------------------------\n");
1065 }
1066
1067 /*
1068    Local Variables:
1069    mode: c
1070    End:
1071 */