src/t-test.q

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdio.h>
  23 #include <stdlib.h>
  24 #include <math.h>
  25 #include "alloc.h"
  26 #include "str.h"
  27 #include "dcdflib/cdflib.h"
  28 #include "command.h"
  29 #include "lexer.h"
  30 #include "error.h"
  31 #include "magic.h"
  32 #include "var.h"
  33 #include "vfm.h"
  34
  35 /* (specification)
  36    "T-TEST" (tts_):
  37      groups=custom;
  38      variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
  39      *+pairs=custom;
  40      +missing=miss:!analysis/listwise,
  41              incl:include/!exclude;
  42      +format=fmt:!labels/nolabels;
  43      +criteria=:ci(d:criteria,"%s > 0. && %s < 1.").
  44 */
  45 /* (declarations) */
  46 /* (functions) */
  47
  48 #include "debug-print.h"
  49
  50 /* Command parsing information. */
  51 static struct cmd_t_test cmd;
  52
  53 /* Variable for the GROUPS subcommand, if given. */
  54 static struct variable *groups;
  55
  56 /* GROUPS: Number of values specified by the user; the values
  57    specified if any. */
  58 static int n_groups_values;
  59 static union value groups_values[2];
  60
  61 /* PAIRED: Number of pairs; each pair. */
  62 static int n_pairs;
  63 static struct variable *(*pairs)[2];
  64
  65 /* Routines to scan data and perform t-tests */
  66 static void precalc (void);
  67 static void postcalc (void);
  68 static void g_postcalc (void);
  69 static void t_pairs (void);
  70 static void t_groups (void);
  71 static int groups_calc (struct ccase *);
  72 static int pairs_calc (struct ccase *);
  73 static int z_calc (struct ccase *);
  74
  75 struct value_list
  76   {
  77     double sum;
  78     double ss;
  79     double n;
  80     struct value_list *next;
  81   };
  82
  83 /* general workhorses - should  move these to a separate library... */
  84 double variance (double n, double ss, double sum);
  85
  86 double covariance (double x_sum, double x_n,
  87                    double y_sum, double y_n, double ss);
  88
  89 double pooled_variance (double n_1, double var_1,
  90                         double n_2, double var_2);
  91
  92 double oneway (double *f, double *p, struct value_list *list);
  93
  94 double pearson_r (double c_xy, double c_xx, double c_yy);
  95
  96 double f_sig (double f, double dfn, double dfd);
  97 double t_crt (double df, double q);
  98 double t_sig (double t, double df);
  99
 100 /* massive function simply to remove any responsibility for output
 101    from the function which does the actual t-test calculations */
 102 void print_t_groups (struct variable * grps, union value * g1, union value * g2,
 103                      double n1, double n2, double mean1, double mean2,
 104                      double sd1, double sd2, double se1, double se2,
 105                      double diff, double l_f, double l_p,
 106                      double p_t, double p_sig, double p_df, double p_sed,
 107                      double p_l, double p_h,
 108                      double s_t, double s_sig, double s_df, double s_sed,
 109                      double s_l, double s_h);
 110
 111 /* Global variables to communicate between calc() and postcalc()
 112    should move to a structure in the p union of variable... */
 113 static double v1_n, v1_ss, v1_sum, v1_se, v1_var, v1_mean;
 114 static double v2_n, v2_ss, v2_sum, v2_se, v2_var, v2_mean;
 115 static double v1_z_sum, v1_z_ss;
 116 static double v2_z_sum, v2_z_ss;
 117 static double diff, se_diff, sp, xy_sum, xy_diff, xy_ss;
 118 static int cur_var;
 119
 120 /* some defines for CDFlib */
 121 #define FIND_P 1
 122 #define FIND_CRITICAL_VALUE 2
 123 #define ERROR_SIG -1
 124
 125 #ifdef DEBUGGING
 126 static void debug_print (void);
 127 #endif
 128
 129 /* Parses and executes the T-TEST procedure. */
 130 int
 131 cmd_t_test (void)
 132 {
 133   struct cmd_t_test cmd;
 134
 135   if (!lex_force_match_id ("T"))
 136     return CMD_FAILURE;
 137   lex_match ('-');
 138   lex_match_id ("TEST");
 139
 140   if (!parse_t_test (&cmd))
 141     return CMD_FAILURE;
 142
 143 #if DEBUGGING
 144   debug_print ();
 145 #endif
 146
 147   if (n_pairs > 0)
 148     procedure (precalc, pairs_calc, postcalc);
 149   else
 150     /* probably groups then... */
 151     {
 152       printf ("\n\n  t-tests for independent samples of %s %s\n",
 153               groups->name, groups->label);
 154
 155       for (cur_var = 0; cur_var < cmd.n_variables; cur_var++)
 156         {
 157           v1_n = v1_ss = v1_sum = v1_se = v1_var = v1_mean = 0.0;
 158           v2_n = v2_ss = v2_sum = v2_se = v2_var = v2_mean = 0.0;
 159           v1_z_sum = v1_z_ss = v2_z_sum = v2_z_ss = 0.0;
 160           diff = se_diff = sp = xy_diff = xy_ss = xy_sum = 0.0;
 161
 162           procedure (precalc, groups_calc, g_postcalc);
 163           procedure (precalc, z_calc, postcalc);
 164         }
 165     }
 166
 167   return CMD_SUCCESS;
 168 }
 169
 170 void
 171 precalc (void)
 172 {
 173   return;                       /* rilly void... */
 174 }
 175
 176 int
 177 groups_calc (struct ccase * c)
 178 {
 179   int bad_weight;
 180   double group, w;
 181   struct variable *v = cmd.v_variables[cur_var];
 182   double X = c->data[v->fv].f;
 183
 184   /* Get the weight for this case. */
 185   if (default_dict.weight_index == -1)
 186     w = 1.0;
 187   else
 188     {
 189       w = c->data[default_dict.weight_index].f;
 190       if (w <= 0.0 || w == SYSMIS)
 191         {
 192           w = 0.0;
 193           bad_weight = 1;
 194           printf ("Bad weight\n");
 195         }
 196     }
 197
 198   if (X == SYSMIS || X == 0.0)  /* FIXME: should be USER_MISSING? */
 199     {
 200       /* printf("Missing value\n"); */
 201       return 1;
 202     }
 203   else
 204     {
 205       X = X * w;
 206       group = c->data[groups->fv].f;
 207
 208       if (group == groups_values[0].f)
 209         {
 210           v1_sum += X;
 211           v1_ss += X * X;
 212           v1_n += w;
 213         }
 214       else if (group == groups_values[1].f)
 215         {
 216           v2_sum += X;
 217           v2_ss += X * X;
 218           v2_n += w;
 219         }
 220     }
 221
 222   return 1;
 223 }
 224
 225 void
 226 g_postcalc (void)
 227 {
 228   v1_mean = v1_sum / v1_n;
 229   v2_mean = v2_sum / v2_n;
 230   return;
 231 }
 232
 233 int                             /* this pass generates the z-zcores */
 234 z_calc (struct ccase * c)
 235 {
 236   int bad_weight;
 237   double group, z, w;
 238   struct variable *v = cmd.v_variables[cur_var];
 239   double X = c->data[v->fv].f;
 240
 241   z = 0.0;
 242
 243   /* Get the weight for this case. */
 244   if (default_dict.weight_index == -1)
 245     w = 1.0;
 246   else
 247     {
 248       w = c->data[default_dict.weight_index].f;
 249       if (w <= 0.0 || w == SYSMIS)
 250         {
 251           w = 0.0;
 252           bad_weight = 1;
 253         }
 254     }
 255
 256   if (X == SYSMIS || X == 0.0)  /* FIXME: how to specify user missing? */
 257     {
 258       return 1;
 259     }
 260   else
 261     {
 262       group = c->data[groups->fv].f;
 263       X = w * X;
 264
 265       if (group == groups_values[0].f)
 266         {
 267           z = fabs (X - v1_mean);
 268           v1_z_sum += z;
 269           v1_z_ss += pow (z, 2);
 270         }
 271       else if (group == groups_values[1].f)
 272         {
 273           z = fabs (X - v2_mean);
 274           v2_z_ss += pow (z, 2);
 275           v2_z_sum += z;
 276         }
 277     }
 278
 279   return 1;
 280 }
 281
 282
 283 int
 284 pairs_calc (struct ccase * c)
 285 {
 286   int i;
 287   struct variable *v1, *v2;
 288   double X, Y;
 289
 290   for (i = 0; i < n_pairs; i++)
 291     {
 292
 293       v1 = pairs[i][0];
 294       v2 = pairs[i][1];
 295       X = c->data[v1->fv].f;
 296       Y = c->data[v2->fv].f;
 297
 298       if (X == SYSMIS || Y == SYSMIS)
 299         {
 300           printf ("Missing value\n");
 301         }
 302       else
 303         {
 304           xy_sum += X * Y;
 305           xy_diff += (X - Y);
 306           xy_ss += pow ((X - Y), 2);
 307           v1_sum += X;
 308           v2_sum += Y;
 309           v1_n++;
 310           v2_n++;
 311           v1_ss += (X * X);
 312           v2_ss += (Y * Y);
 313         }
 314     }
 315
 316   return 1;
 317 }
 318
 319 void
 320 postcalc (void)
 321 {
 322   /* Calculate basic statistics */
 323   v1_var = variance (v1_n, v1_ss, v1_sum);      /* variances */
 324   v2_var = variance (v2_n, v2_ss, v2_sum);
 325   v1_se = sqrt (v1_var / v1_n); /* standard errors */
 326   v2_se = sqrt (v2_var / v2_n);
 327   diff = v1_mean - v2_mean;
 328
 329   if (n_pairs > 0)
 330     {
 331       t_pairs ();
 332     }
 333   else
 334     {
 335       t_groups ();
 336     }
 337
 338   return;
 339 }
 340
 341 void
 342 t_groups (void)
 343 {
 344   double df_pooled, t_pooled, t_sep, p_pooled, p_sep;
 345   double crt_t_p, crt_t_s, tmp, v1_z, v2_z, f_levene, p_levene;
 346   double df_sep, se_diff_s, se_diff_p;
 347   struct value_list *val_1, *val_2;
 348
 349   /* Levene's test */
 350   val_1 = malloc (sizeof (struct value_list));
 351   val_1->sum = v1_z_sum;
 352   val_1->ss = v1_z_ss;
 353   val_1->n = v1_n;
 354   val_2 = malloc (sizeof (struct value_list));
 355   val_2->sum = v2_z_sum;
 356   val_2->ss = v2_z_ss;
 357   val_2->n = v2_n;
 358
 359   val_1->next = val_2;
 360   val_2->next = NULL;
 361
 362   f_levene = oneway (&f_levene, &p_levene, val_1);
 363
 364   /* T test results for pooled variances */
 365   se_diff_p = sqrt (pooled_variance (v1_n, v1_var, v2_n, v2_var));
 366   df_pooled = v1_n + v2_n - 2.0;
 367   t_pooled = diff / se_diff_p;
 368   p_pooled = t_sig (t_pooled, df_pooled);
 369   crt_t_p = t_crt (df_pooled, 0.025);
 370
 371   if ((2.0 * p_pooled) >= 1.0)
 372     p_pooled = 1.0 - p_pooled;
 373
 374   /* oh god, the separate variance calculations... */
 375   t_sep = diff / sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 376
 377   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 378   tmp = (v1_var / v1_n) / tmp;
 379   tmp = pow (tmp, 2);
 380   tmp = tmp / (v1_n - 1.0);
 381   v1_z = tmp;
 382
 383   tmp = (v1_var / v1_n) + (v2_var / v2_n);
 384   tmp = (v2_var / v2_n) / tmp;
 385   tmp = pow (tmp, 2);
 386   tmp = tmp / (v2_n - 1.0);
 387   v2_z = tmp;
 388
 389   tmp = 1.0 / (v1_z + v2_z);
 390
 391   df_sep = tmp;
 392   p_sep = t_sig (t_sep, df_sep);
 393   if ((2.0 * p_sep) >= 1.0)
 394     p_sep = 1.0 - p_sep;
 395   crt_t_s = t_crt (df_sep, 0.025);
 396   se_diff_s = sqrt ((v1_var / v1_n) + (v2_var / v2_n));
 397
 398   /* FIXME: convert to a proper PSPP output call */
 399   print_t_groups (groups, &groups_values[0], &groups_values[1],
 400                   v1_n, v2_n, v1_mean, v2_mean,
 401                   sqrt (v1_var), sqrt (v2_var), v1_se, v2_se,
 402                   diff, f_levene, p_levene,
 403                   t_pooled, 2.0 * p_pooled, df_pooled, se_diff_p,
 404                   diff - (crt_t_p * se_diff_p), diff + (crt_t_p * se_diff_p),
 405                   t_sep, 2.0 * p_sep, df_sep, se_diff_s,
 406                 diff - (crt_t_s * se_diff_s), diff + (crt_t_s * se_diff_s));
 407   return;
 408 }
 409
 410 void
 411 t_pairs (void)
 412 {
 413   double cov12, cov11, cov22, r, t, p, crt_t, sp, r_t, r_p;
 414   struct variable *v1, *v2;
 415
 416   v1 = pairs[0][0];
 417   v2 = pairs[0][1];
 418   cov12 = covariance (v1_sum, v1_n, v2_sum, v2_n, xy_sum);
 419   cov11 = covariance (v1_sum, v1_n, v1_sum, v1_n, v1_ss);
 420   cov22 = covariance (v2_sum, v2_n, v2_sum, v2_n, v2_ss);
 421   r = pearson_r (cov12, cov11, cov22);
 422   /* this t and it's associated p is a significance test for the pearson's r */
 423   r_t = r * sqrt ((v1_n - 2.0) / (1.0 - (r * r)));
 424   r_p = t_sig (r_t, v1_n - 2.0);
 425
 426   /* now we move to the t test for the difference in means */
 427   diff = xy_diff / v1_n;
 428   sp = sqrt (variance (v1_n, xy_ss, xy_diff));
 429   se_diff = sp / sqrt (v1_n);
 430   t = diff / se_diff;
 431   crt_t = t_crt (v1_n - 1.0, 0.025);
 432   p = t_sig (t, v1_n - 1.0);
 433
 434
 435   printf ("             Number of        2-tail\n");
 436   printf (" Variable      pairs    Corr   Sig      Mean    SD   SE of Mean\n");
 437   printf ("---------------------------------------------------------------\n");
 438   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 439           v1->name, v1_mean, sqrt (v1_var), v1_se);
 440   printf ("           %8.4f  %0.4f  %0.4f\n", v1_n, r, r_p);
 441   printf ("%s                                  %8.4f %8.4f %8.4f\n",
 442           v2->name, v2_mean, sqrt (v2_var), v2_se);
 443   printf ("---------------------------------------------------------------\n");
 444
 445   printf ("\n\n\n");
 446   printf ("      Paired Differences              |\n");
 447   printf (" Mean          SD         SE of Mean  |  t-value   df   2-tail Sig\n");
 448   printf ("--------------------------------------|---------------------------\n");
 449
 450   printf ("%8.4f    %8.4f    %8.4f      | %8.4f %8.4f %8.4f\n",
 451           diff, sp, se_diff, t, v1_n - 1.0, 2.0 * (1.0 - p));
 452
 453   printf ("95pc CI (%8.4f, %8.4f)          |\n\n",
 454           diff - (se_diff * crt_t), diff + (se_diff * crt_t));
 455
 456   return;
 457 }
 458
 459 static int parse_value (union value *);
 460
 461 /* Parses the GROUPS subcommand. */
 462 int
 463 tts_custom_groups (struct cmd_t_test *cmd unused)
 464 {
 465   groups = parse_variable ();
 466   if (!groups)
 467     {
 468       lex_error (_("expecting variable name in GROUPS subcommand"));
 469       return 0;
 470     }
 471   if (groups->type == T_STRING && groups->width > MAX_SHORT_STRING)
 472     {
 473       msg (SE, _("Long string variable %s is not valid here."),
 474            groups->name);
 475       return 0;
 476     }
 477
 478   if (!lex_match ('('))
 479     {
 480       if (groups->type == NUMERIC)
 481         {
 482           n_groups_values = 2;
 483           groups_values[0].f = 1;
 484           groups_values[1].f = 2;
 485           return 1;
 486         }
 487       else
 488         {
 489           msg (SE, _("When applying GROUPS to a string variable, at "
 490                      "least one value must be specified."));
 491           return 0;
 492         }
 493     }
 494
 495   if (!parse_value (&groups_values[0]))
 496     return 0;
 497   n_groups_values = 1;
 498
 499   lex_match (',');
 500
 501   if (lex_match (')'))
 502     return 1;
 503
 504   if (!parse_value (&groups_values[1]))
 505     return 0;
 506   n_groups_values = 2;
 507
 508   if (!lex_force_match (')'))
 509     return 0;
 510
 511   return 1;
 512 }
 513
 514 /* Parses the current token (numeric or string, depending on the
 515    variable in `groups') into value V and returns success. */
 516 static int
 517 parse_value (union value * v)
 518 {
 519   if (groups->type == NUMERIC)
 520     {
 521       if (!lex_force_num ())
 522         return 0;
 523       v->f = tokval;
 524     }
 525   else
 526     {
 527       if (!lex_force_string ())
 528         return 0;
 529       strncpy (v->s, ds_value (&tokstr), ds_length (&tokstr));
 530     }
 531
 532   lex_get ();
 533
 534   return 1;
 535 }
 536
 537 /* Parses the PAIRS subcommand. */
 538 static int
 539 tts_custom_pairs (struct cmd_t_test *cmd unused)
 540 {
 541   struct variable **vars;
 542   int n_before_WITH;
 543   int n_vars;
 544   int paired;
 545   int extra;
 546 #if DEBUGGING
 547   int n_predicted;
 548 #endif
 549
 550   if ((token != T_ID || !is_varname (tokid)) && token != T_ALL)
 551     return 2;
 552   if (!parse_variables (&default_dict, &vars, &n_vars,
 553                         PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
 554     return 0;
 555
 556   assert (n_vars);
 557   if (lex_match (T_WITH))
 558     {
 559       n_before_WITH = n_vars;
 560
 561       if (!parse_variables (&default_dict, &vars, &n_vars,
 562                             PV_DUPLICATE | PV_APPEND
 563                             | PV_NUMERIC | PV_NO_SCRATCH))
 564         {
 565           free (vars);
 566           return 0;
 567         }
 568     }
 569   else
 570     n_before_WITH = 0;
 571
 572   paired = (lex_match ('(') && lex_match_id ("PAIRED") && lex_match (')'));
 573
 574   if (paired)
 575     {
 576       if (n_before_WITH * 2 != n_vars)
 577         {
 578           free (vars);
 579           msg (SE, _("PAIRED was specified but the number of variables "
 580                      "preceding WITH (%d) did not match the number "
 581                      "following (%d)."),
 582                n_before_WITH, n_vars - n_before_WITH);
 583           return 0;
 584         }
 585
 586       extra = n_before_WITH;
 587     }
 588   else if (n_before_WITH)
 589     extra = n_before_WITH * (n_vars - n_before_WITH);
 590   else
 591     {
 592       if (n_vars < 2)
 593         {
 594           free (vars);
 595           msg (SE, _("At least two variables must be specified "
 596                      "on PAIRS."));
 597           return 0;
 598         }
 599
 600       extra = n_vars * (n_vars - 1) / 2;
 601     }
 602
 603 #if DEBUGGING
 604   n_predicted = n_pairs + extra;
 605 #endif
 606
 607   pairs = xrealloc (pairs, sizeof (struct variable *[2]) * (n_pairs + extra));
 608
 609   if (paired)
 610     {
 611       int i;
 612
 613       for (i = 0; i < extra; i++)
 614         {
 615           pairs[n_pairs][0] = vars[i];
 616           pairs[n_pairs++][1] = vars[i + extra];
 617         }
 618     }
 619   else if (n_before_WITH)
 620     {
 621       int i;
 622
 623       for (i = 0; i < n_before_WITH; i++)
 624         {
 625           int j;
 626
 627           for (j = n_before_WITH; j < n_vars; j++)
 628             {
 629               pairs[n_pairs][0] = vars[i];
 630               pairs[n_pairs++][1] = vars[j];
 631             }
 632         }
 633     }
 634   else
 635     {
 636       int i;
 637
 638       for (i = 0; i < n_vars; i++)
 639         {
 640           int j;
 641
 642           for (j = i + 1; j < n_vars; j++)
 643             {
 644               pairs[n_pairs][0] = vars[i];
 645               pairs[n_pairs++][1] = vars[j];
 646             }
 647         }
 648     }
 649
 650 #if DEBUGGING
 651   assert (n_pairs == n_predicted);
 652 #endif
 653
 654   free (vars);
 655   return 1;
 656 }
 657
 658 #if DEBUGGING
 659 static void
 660 debug_print (void)
 661 {
 662   printf ("T-TEST\n");
 663   if (groups)
 664     {
 665       printf ("  GROUPS=%s", groups->name);
 666       if (n_groups_values)
 667         {
 668           int i;
 669
 670           printf (" (");
 671           for (i = 0; i < n_groups_values; i++)
 672             if (groups->type == NUMERIC)
 673               printf ("%g%s", groups_values[i].f, i ? " " : "");
 674             else
 675               printf ("%.*s%s", groups->width, groups_values[i].s,
 676                       i ? " " : "");
 677           printf (")");
 678         }
 679       printf ("\n");
 680     }
 681   if (cmd.n_variables)
 682     {
 683       int i;
 684
 685       printf ("  VARIABLES=");
 686       for (i = 0; i < cmd.n_variables; i++)
 687         printf ("%s ", cmd.v_variables[i]->name);
 688       printf ("\n");
 689     }
 690   if (cmd.sbc_pairs)
 691     {
 692       int i;
 693
 694       printf ("  PAIRS=");
 695       for (i = 0; i < n_pairs; i++)
 696         printf ("%s ", pairs[i][0]->name);
 697       printf ("WITH");
 698       for (i = 0; i < n_pairs; i++)
 699         printf (" %s", pairs[i][1]->name);
 700       printf (" (PAIRED)\n");
 701     }
 702   printf ("  MISSING=%s %s\n",
 703           cmd.miss == TTS_ANALYSIS ? "ANALYSIS" : "LISTWISE",
 704           cmd.miss == TTS_INCLUDE ? "INCLUDE" : "EXCLUDE");
 705   printf ("  FORMAT=%s\n",
 706           cmd.fmt == TTS_LABELS ? "LABELS" : "NOLABELS");
 707   if (cmd.criteria != NOT_LONG)
 708     printf ("  CRITERIA=%f\n", cmd.criteria);
 709 }
 710
 711 #endif /* DEBUGGING */
 712
 713 /* Here are some general routines tha should probably be moved into
 714    a separate library and documented as part of the PSPP "API"   */
 715 double
 716 variance (double n, double ss, double sum)
 717 {
 718   return ((ss - ((sum * sum) / n)) / (n - 1.0));
 719 }
 720
 721 double
 722 pooled_variance (double n_1, double var_1, double n_2, double var_2)
 723 {
 724   double tmp;
 725
 726   tmp = n_1 + n_2 - 2.0;
 727   tmp = (((n_1 - 1.0) * var_1) + ((n_2 - 1.0) * var_2)) / tmp;
 728   tmp = tmp * ((n_1 + n_2) / (n_1 * n_2));
 729   return tmp;
 730 }
 731
 732 double
 733 oneway (double *f, double *p, struct value_list *levels)
 734 {
 735   double k, SSTR, SSE, SSTO, N, MSTR, MSE, sum, dftr, dfe, print;
 736   struct value_list *g;
 737
 738   k = 0.0;
 739
 740   for (g = levels; g != NULL; g = g->next)
 741     {
 742       k++;
 743       sum += g->sum;
 744       N += g->n;
 745       SSTR += g->ss - (pow (g->sum, 2) / g->n);
 746       SSTO += g->ss;
 747     }
 748
 749   SSTO = SSTO - (pow (sum, 2) / N);
 750   SSE = SSTO - SSTR;
 751
 752   dftr = N - k;
 753   dfe = k - 1.0;
 754   MSTR = SSTR / dftr;
 755   MSE = SSE / dfe;
 756
 757   *f = (MSE / MSTR);
 758   *p = f_sig (*f, dfe, dftr);
 759
 760   print = 1.0;
 761   if (print == 1.0)
 762     {
 763       printf ("sum1 %f, sum2 %f, ss1 %f, ss2 %f\n",
 764               levels->sum, levels->next->sum, levels->ss, levels->next->ss);
 765       printf ("                - - - - - - O N E W A Y - - - - - -\n\n");
 766       printf ("   Variable %s %s\n",
 767               cmd.v_variables[0]->name, cmd.v_variables[0]->label);
 768       printf ("By Variable %s %s\n", groups->name, groups->label);
 769       printf ("\n             Analysis of Variance\n\n");
 770       printf ("                    Sum of    Mean     F       F\n");
 771       printf ("Source       D.F.  Squares  Squares  Ratio   Prob\n\n");
 772       printf ("Between   %8.0f %8.4f %8.4f %8.4f %8.4f\n",
 773               dfe, SSE, MSE, *f, *p);
 774       printf ("Within    %8.0f %8.4f %8.4f\n", dftr, SSTR, MSTR);
 775       printf ("Total     %8.0f %8.4f\n\n\n", N - 1.0, SSTO);
 776     }
 777   return (*f);
 778 }
 779
 780 double
 781 f_sig (double f, double dfn, double dfd)
 782 {
 783   int which, status;
 784   double p, q, bound;
 785
 786   which = FIND_P;
 787   status = 1;
 788   p = q = bound = 0.0;
 789   cdff (&which, &p, &q, &f, &dfn, &dfd, &status, &bound);
 790
 791   switch (status)
 792     {
 793     case -1:
 794       {
 795         printf ("Parameter 1 is out of range\n");
 796         break;
 797       }
 798     case -2:
 799       {
 800         printf ("Parameter 2 is out of range\n");
 801         break;
 802       }
 803     case -3:
 804       {
 805         printf ("Parameter 3 is out of range\n");
 806         break;
 807       }
 808     case -4:
 809       {
 810         printf ("Parameter 4 is out of range\n");
 811         break;
 812       }
 813     case -5:
 814       {
 815         printf ("Parameter 5 is out of range\n");
 816         break;
 817       }
 818     case -6:
 819       {
 820         printf ("Parameter 6 is out of range\n");
 821         break;
 822       }
 823     case -7:
 824       {
 825         printf ("Parameter 7 is out of range\n");
 826         break;
 827       }
 828     case -8:
 829       {
 830         printf ("Parameter 8 is out of range\n");
 831         break;
 832       }
 833     case 0:
 834       {
 835         /* printf( "Command completed successfully\n" ); */
 836         break;
 837       }
 838     case 1:
 839       {
 840         printf ("Answer appears to be lower than the lowest search bound\n");
 841         break;
 842       }
 843     case 2:
 844       {
 845         printf ("Answer appears to be higher than the greatest search bound\n");
 846         break;
 847       }
 848     case 3:
 849       {
 850         printf ("P - Q NE 1\n");
 851         break;
 852       }
 853     }
 854
 855   if (status)
 856     {
 857       return (double) ERROR_SIG;
 858     }
 859   else
 860     {
 861       return q;
 862     }
 863 }
 864
 865 double
 866 t_crt (double df, double q)
 867 {
 868   int which, status;
 869   double p, bound, t;
 870
 871   which = FIND_CRITICAL_VALUE;
 872   bound = 0.0;
 873   p = 1.0 - q;
 874   t = 0.0;
 875
 876   cdft (&which, &p, &q, &t, &df, &status, &bound);
 877
 878   switch (status)
 879     {
 880     case -1:
 881       {
 882         printf ("t_crt: Parameter 1 is out of range\n");
 883         break;
 884       }
 885     case -2:
 886       {
 887         printf ("t_crt: value of p (%f) is out of range\n", p);
 888         break;
 889       }
 890     case -3:
 891       {
 892         printf ("t_crt: value of q (%f) is out of range\n", q);
 893         break;
 894       }
 895     case -4:
 896       {
 897         printf ("t_crt: value of df (%f) is out of range\n", df);
 898         break;
 899       }
 900     case -5:
 901       {
 902         printf ("t_crt: Parameter 5 is out of range\n");
 903         break;
 904       }
 905     case -6:
 906       {
 907         printf ("t_crt: Parameter 6 is out of range\n");
 908         break;
 909       }
 910     case -7:
 911       {
 912         printf ("t_crt: Parameter 7 is out of range\n");
 913         break;
 914       }
 915     case 0:
 916       {
 917         /* printf( "Command completed successfully\n" ); */
 918         break;
 919       }
 920     case 1:
 921       {
 922         printf ("t_crt: Answer appears to be lower than the lowest search bound\n");
 923         break;
 924       }
 925     case 2:
 926       {
 927         printf ("t_crt: Answer appears to be higher than the greatest search bound\n");
 928         break;
 929       }
 930     case 3:
 931       {
 932         printf ("t_crt: P - Q NE 1\n");
 933         break;
 934       }
 935     }
 936
 937   if (status)
 938     {
 939       return (double) ERROR_SIG;
 940     }
 941   else
 942     {
 943       return t;
 944     }
 945 }
 946
 947 double
 948 t_sig (double t, double df)
 949 {
 950   int which, status;
 951   double p, q, bound;
 952
 953   which = FIND_P;
 954   q = 0.0;
 955   p = 0.0;
 956   bound = 0.0;
 957
 958   cdft (&which, &p, &q, &t, &df, &status, &bound);
 959
 960   switch (status)
 961     {
 962     case -1:
 963       {
 964         printf ("t-sig: Parameter 1 is out of range\n");
 965         break;
 966       }
 967     case -2:
 968       {
 969         printf ("t-sig: Parameter 2 is out of range\n");
 970         break;
 971       }
 972     case -3:
 973       {
 974         printf ("t-sig: Parameter 3 is out of range\n");
 975         break;
 976       }
 977     case -4:
 978       {
 979         printf ("t-sig: Parameter 4 is out of range\n");
 980         break;
 981       }
 982     case -5:
 983       {
 984         printf ("t-sig: Parameter 5 is out of range\n");
 985         break;
 986       }
 987     case -6:
 988       {
 989         printf ("t-sig: Parameter 6 is out of range\n");
 990         break;
 991       }
 992     case -7:
 993       {
 994         printf ("t-sig: Parameter 7 is out of range\n");
 995         break;
 996       }
 997     case 0:
 998       {
 999         /* printf( "Command completed successfully\n" ); */
1000         break;
1001       }
1002     case 1:
1003       {
1004         printf ("t-sig: Answer appears to be lower than the lowest search bound\n");
1005         break;
1006       }
1007     case 2:
1008       {
1009         printf ("t-sig: Answer appears to be higher than the greatest search bound\n");
1010         break;
1011       }
1012     case 3:
1013       {
1014         printf ("t-sig: P - Q NE 1\n");
1015         break;
1016       }
1017     }
1018
1019   if (status)
1020     {
1021       return (double) ERROR_SIG;
1022     }
1023   else
1024     {
1025       return q;
1026     }
1027 }
1028
1029 double
1030 covariance (double x_sum, double x_n, double y_sum, double y_n, double ss)
1031 {
1032   double tmp;
1033
1034   tmp = x_sum * y_sum;
1035   tmp = tmp / x_n;
1036   tmp = ss - tmp;
1037   tmp = (tmp / (x_n + y_n - 1.0));
1038   return tmp;
1039 }
1040
1041 double
1042 pearson_r (double c_xy, double c_xx, double c_yy)
1043 {
1044   return (c_xy / (sqrt (c_xx * c_yy)));
1045 }
1046
1047 void
1048 print_t_groups (struct variable * grps, union value * g1, union value * g2,
1049                 double n1, double n2, double mean1, double mean2,
1050                 double sd1, double sd2, double se1, double se2,
1051                 double diff, double l_f, double l_p,
1052                 double p_t, double p_sig, double p_df, double p_sed,
1053                 double p_l, double p_h,
1054                 double s_t, double s_sig, double s_df, double s_sed,
1055                 double s_l, double s_h)
1056 {
1057
1058   /* Display all this shit as SPSS 6.0 does (roughly) */
1059   printf ("\n\n                 Number                                 \n");
1060   printf ("   Variable     of Cases    Mean      SD      SE of Mean\n");
1061   printf ("-----------------------------------------------------------\n");
1062   printf ("   %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label);
1063   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1064           get_val_lab (grps, *g1, 0), g1->f, n1, mean1, sd1, se1);
1065   printf ("%s %8.4f %8.0f    %8.4f  %8.3f    %8.3f\n",
1066           get_val_lab (grps, *g2, 0), g2->f, n2, mean2, sd2, se2);
1067   printf ("-----------------------------------------------------------\n");
1068   printf ("\n   Mean Difference = %8.4f\n", diff);
1069   printf ("\n   Levene's Test for Equality of Variances: F= %.3f  P= %.3f\n",
1070           l_f, l_p);
1071   printf ("\n\n   t-test for Equality of Means                         95pc     \n");
1072   printf ("Variances   t-value    df   2-Tail Sig SE of Diff    CI for Diff  \n");
1073   printf ("-----------------------------------------------------------------\n");
1074   printf ("Equal     %8.2f %8.0f %8.3f %8.3f (%8.3f, %8.3f)\n",
1075           p_t, p_df, p_sig, p_sed, p_l, p_h);
1076   printf ("Unequal   %8.2f %8.2f %8.3f %8.3f (%8.3f, %8.3f)\n",
1077           s_t, s_df, s_sig, s_sed, s_l, s_h);
1078   printf ("-----------------------------------------------------------------\n");
1079 }
1080
1081 /*
1082    Local Variables:
1083    mode: c
1084    End:
1085 */