1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
25 #include <data/case.h>
26 #include <data/casegrouper.h>
27 #include <data/casereader.h>
28 #include <data/dictionary.h>
29 #include <data/procedure.h>
30 #include <data/value-labels.h>
31 #include <data/variable.h>
32 #include <language/command.h>
33 #include <language/dictionary/split-file.h>
34 #include <language/lexer/lexer.h>
35 #include <libpspp/array.h>
36 #include <libpspp/assertion.h>
37 #include <libpspp/compiler.h>
38 #include <libpspp/hash.h>
39 #include <libpspp/message.h>
40 #include <libpspp/misc.h>
41 #include <libpspp/str.h>
42 #include <libpspp/taint.h>
43 #include <math/group-proc.h>
44 #include <math/levene.h>
45 #include <output/manager.h>
46 #include <output/table.h>
47 #include <data/format.h>
53 #define _(msgid) gettext (msgid)
61 +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
63 missing=miss:!analysis/listwise,
64 incl:include/!exclude;
65 +format=fmt:!labels/nolabels;
66 criteria=:cin(d:criteria,"%s > 0. && %s < 1.").
77 /* A pair of variables to be compared. */
80 const struct variable *v[2]; /* The paired variables. */
81 double n; /* The number of valid variable pairs */
82 double sum[2]; /* The sum of the members */
83 double ssq[2]; /* sum of squares of the members */
84 double std_dev[2]; /* Std deviation of the members */
85 double s_std_dev[2]; /* Sample Std deviation of the members */
86 double mean[2]; /* The means of the members */
87 double correlation; /* Correlation coefficient between the variables. */
88 double sum_of_diffs; /* The sum of the differences */
89 double sum_of_prod; /* The sum of the products */
90 double mean_diff; /* The mean of the differences */
91 double ssq_diffs; /* The sum of the squares of the differences */
92 double std_dev_diff; /* The std deviation of the differences */
95 /* Which mode was T-TEST invoked */
97 T_1_SAMPLE, /* One-sample tests. */
98 T_IND_SAMPLES, /* Independent-sample tests. */
99 T_PAIRED /* Paired-sample tests. */
102 /* Total state of a T-TEST procedure. */
105 enum t_test_mode mode; /* Mode that T-TEST was invoked in. */
106 double criteria; /* Confidence interval in (0, 1). */
107 enum mv_class exclude; /* Classes of missing values to exclude. */
108 bool listwise_missing; /* Drop whole case if one missing var? */
109 struct fmt_spec weight_format; /* Format of weight variable. */
111 /* Dependent variables. */
112 const struct variable **vars;
115 /* For mode == T_1_SAMPLE. */
118 /* For mode == T_PAIRED only. */
122 /* For mode == T_IND_SAMPLES only. */
123 struct variable *indep_var; /* Independent variable. */
124 enum comparison criterion; /* Type of comparison. */
125 double critical_value; /* CMP_LE only: Grouping threshold value. */
126 union value g_value[2]; /* CMP_EQ only: Per-group indep var values. */
129 static int parse_value (struct lexer *, union value *, int width);
131 /* Statistics Summary Box */
135 void (*populate) (struct ssbox *, struct t_test_proc *);
136 void (*finalize) (struct ssbox *);
139 static void ssbox_create (struct ssbox *, struct t_test_proc *);
140 static void ssbox_populate (struct ssbox *, struct t_test_proc *);
141 static void ssbox_finalize (struct ssbox *);
143 /* Paired Samples Correlation box */
144 static void pscbox (struct t_test_proc *);
147 /* Test Results Box. */
150 void (*populate) (struct trbox *, struct t_test_proc *);
151 void (*finalize) (struct trbox *);
154 static void trbox_create (struct trbox *, struct t_test_proc *);
155 static void trbox_populate (struct trbox *, struct t_test_proc *);
156 static void trbox_finalize (struct trbox *);
158 static void calculate (struct t_test_proc *, struct casereader *,
159 const struct dataset *);
161 static int compare_group_binary (const struct group_statistics *a,
162 const struct group_statistics *b,
163 const struct t_test_proc *);
164 static unsigned hash_group_binary (const struct group_statistics *g,
165 const struct t_test_proc *p);
168 cmd_t_test (struct lexer *lexer, struct dataset *ds)
170 struct cmd_t_test cmd;
171 struct t_test_proc proc;
172 struct casegrouper *grouper;
173 struct casereader *group;
180 proc.indep_var = NULL;
181 if (!parse_t_test (lexer, ds, &cmd, &proc))
184 wv = dict_get_weight (dataset_dict (ds));
185 proc.weight_format = wv ? *var_get_print_format (wv) : F_8_0;
187 if ((cmd.sbc_testval != 0) + (cmd.sbc_groups != 0) + (cmd.sbc_pairs != 0)
190 msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands "
191 "must be specified."));
195 proc.mode = (cmd.sbc_testval ? T_1_SAMPLE
196 : cmd.sbc_groups ? T_IND_SAMPLES
198 proc.criteria = cmd.sbc_criteria ? cmd.criteria : 0.95;
199 proc.exclude = cmd.incl != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
200 proc.listwise_missing = cmd.miss == TTS_LISTWISE;
202 if (proc.mode == T_1_SAMPLE)
203 proc.testval = cmd.n_testval[0];
205 if (proc.mode == T_PAIRED)
209 if (cmd.sbc_variables)
211 msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
215 /* Fill proc.vars with the unique variables from pairs. */
216 proc.n_vars = proc.n_pairs * 2;
217 proc.vars = xmalloc (sizeof *proc.vars * proc.n_vars);
218 for (i = j = 0; i < proc.n_pairs; i++)
220 proc.vars[j++] = proc.pairs[i].v[0];
221 proc.vars[j++] = proc.pairs[i].v[1];
223 proc.n_vars = sort_unique (proc.vars, proc.n_vars, sizeof *proc.vars,
224 compare_var_ptrs_by_name, NULL);
228 if (!cmd.n_variables)
230 msg (SE, _("One or more VARIABLES must be specified."));
233 proc.n_vars = cmd.n_variables;
234 proc.vars = cmd.v_variables;
235 cmd.v_variables = NULL;
239 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
240 while (casegrouper_get_next_group (grouper, &group))
241 calculate (&proc, group, ds);
242 ok = casegrouper_destroy (grouper);
243 ok = proc_commit (ds) && ok;
245 if (proc.mode == T_IND_SAMPLES)
248 /* Destroy any group statistics we created */
249 for (v = 0; v < proc.n_vars; v++)
251 struct group_proc *grpp = group_proc_get (proc.vars[v]);
252 hsh_destroy (grpp->group_hash);
259 if (proc.indep_var != NULL)
261 int width = var_get_width (proc.indep_var);
262 value_destroy (&proc.g_value[0], width);
263 value_destroy (&proc.g_value[1], width);
267 return ok ? CMD_SUCCESS : CMD_FAILURE;
271 tts_custom_groups (struct lexer *lexer, struct dataset *ds,
272 struct cmd_t_test *cmd UNUSED, void *proc_)
274 struct t_test_proc *proc = proc_;
278 lex_match (lexer, '=');
280 proc->indep_var = parse_variable (lexer, dataset_dict (ds));
281 if (proc->indep_var == NULL)
283 lex_error (lexer, "expecting variable name in GROUPS subcommand");
286 width = var_get_width (proc->indep_var);
287 value_init (&proc->g_value[0], width);
288 value_init (&proc->g_value[1], width);
290 if (!lex_match (lexer, '('))
294 if (!parse_value (lexer, &proc->g_value[0], width))
296 lex_match (lexer, ',');
297 if (lex_match (lexer, ')'))
301 if (!parse_value (lexer, &proc->g_value[1], width)
302 || !lex_force_match (lexer, ')'))
308 if (var_is_numeric (proc->indep_var))
310 proc->criterion = n_values == 1 ? CMP_LE : CMP_EQ;
312 proc->critical_value = proc->g_value[0].f;
313 else if (n_values == 0)
315 proc->g_value[0].f = 1;
316 proc->g_value[1].f = 2;
321 proc->criterion = CMP_EQ;
324 msg (SE, _("When applying GROUPS to a string variable, two "
325 "values must be specified."));
333 add_pair (struct t_test_proc *proc,
334 const struct variable *v0, const struct variable *v1)
336 struct pair *p = &proc->pairs[proc->n_pairs++];
342 tts_custom_pairs (struct lexer *lexer, struct dataset *ds,
343 struct cmd_t_test *cmd UNUSED, void *proc_)
345 struct t_test_proc *proc = proc_;
347 const struct variable **vars1 = NULL;
350 const struct variable **vars2 = NULL;
355 size_t n_total_pairs;
358 lex_match (lexer, '=');
360 if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1,
361 PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
364 if (lex_match (lexer, T_WITH))
366 if (!parse_variables_const (lexer, dataset_dict (ds), &vars2, &n_vars2,
367 PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
373 if (lex_match (lexer, '(')
374 && lex_match_id (lexer, "PAIRED")
375 && lex_match (lexer, ')'))
378 if (n_vars1 != n_vars2)
380 msg (SE, _("PAIRED was specified but the number of variables "
381 "preceding WITH (%zu) did not match the number "
395 msg (SE, _("At least two variables must be specified on PAIRS."));
400 /* Allocate storage for the new pairs. */
401 n_total_pairs = proc->n_pairs + (paired ? n_vars1
402 : n_vars2 > 0 ? n_vars1 * n_vars2
403 : n_vars1 * (n_vars1 - 1) / 2);
404 proc->pairs = xnrealloc (proc->pairs, n_total_pairs, sizeof *proc->pairs);
406 /* Populate the pairs with the appropriate variables. */
408 for (i = 0; i < n_vars1; i++)
409 add_pair (proc, vars1[i], vars2[i]);
410 else if (n_vars2 > 0)
411 for (i = 0; i < n_vars1; i++)
412 for (j = 0; j < n_vars2; j++)
413 add_pair (proc, vars1[i], vars2[j]);
415 for (i = 0; i < n_vars1; i++)
416 for (j = i + 1; j < n_vars1; j++)
417 add_pair (proc, vars1[i], vars1[j]);
418 assert (proc->n_pairs == n_total_pairs);
425 /* Parses the current token (numeric or string, depending on type)
426 value v and returns success. */
428 parse_value (struct lexer *lexer, union value *v, int width)
432 if (!lex_force_num (lexer))
434 v->f = lex_tokval (lexer);
438 if (!lex_force_string (lexer))
440 value_copy_str_rpad (v, width, ds_cstr (lex_tokstr (lexer)), ' ');
448 /* Implementation of the SSBOX object. */
450 static void ssbox_base_init (struct ssbox *, int cols, int rows);
451 static void ssbox_base_finalize (struct ssbox *);
452 static void ssbox_one_sample_init (struct ssbox *, struct t_test_proc *);
453 static void ssbox_independent_samples_init (struct ssbox *, struct t_test_proc *);
454 static void ssbox_paired_init (struct ssbox *, struct t_test_proc *);
456 /* Factory to create an ssbox. */
458 ssbox_create (struct ssbox *ssb, struct t_test_proc *proc)
463 ssbox_one_sample_init (ssb, proc);
466 ssbox_independent_samples_init (ssb, proc);
469 ssbox_paired_init (ssb, proc);
476 /* Despatcher for the populate method */
478 ssbox_populate (struct ssbox *ssb, struct t_test_proc *proc)
480 ssb->populate (ssb, proc);
483 /* Despatcher for finalize */
485 ssbox_finalize (struct ssbox *ssb)
490 /* Submit the box and clear up */
492 ssbox_base_finalize (struct ssbox *ssb)
497 /* Initialize a ssbox struct */
499 ssbox_base_init (struct ssbox *this, int cols, int rows)
501 this->finalize = ssbox_base_finalize;
502 this->t = tab_create (cols, rows, 0);
504 tab_columns (this->t, SOM_COL_DOWN, 1);
505 tab_headers (this->t, 0, 0, 1, 0);
506 tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
507 tab_hline (this->t, TAL_2, 0, cols- 1, 1);
508 tab_dim (this->t, tab_natural_dimensions, NULL);
511 /* ssbox implementations. */
513 static void ssbox_one_sample_populate (struct ssbox *, struct t_test_proc *);
514 static void ssbox_independent_samples_populate (struct ssbox *,
515 struct t_test_proc *);
516 static void ssbox_paired_populate (struct ssbox *, struct t_test_proc *);
518 /* Initialize the one_sample ssbox */
520 ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc)
523 const int vsize = proc->n_vars + 1;
525 this->populate = ssbox_one_sample_populate;
527 ssbox_base_init (this, hsize, vsize);
528 tab_title (this->t, _("One-Sample Statistics"));
529 tab_vline (this->t, TAL_2, 1, 0, vsize - 1);
530 tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N"));
531 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
532 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
533 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
536 /* Initialize the independent samples ssbox */
538 ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc)
541 int vsize = proc->n_vars * 2 + 1;
543 this->populate = ssbox_independent_samples_populate;
545 ssbox_base_init (this, hsize, vsize);
546 tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1);
547 tab_title (this->t, _("Group Statistics"));
548 tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE,
549 var_get_name (proc->indep_var));
550 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
551 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
552 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
553 tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
556 /* Populate the ssbox for independent samples */
558 ssbox_independent_samples_populate (struct ssbox *ssb,
559 struct t_test_proc *proc)
564 double indep_value[2];
568 for (i = 0; i < 2; i++)
570 union value *value = &proc->g_value[i];
571 int width = var_get_width (proc->indep_var);
573 indep_value[i] = (proc->criterion == CMP_LE ? proc->critical_value
576 if (val_type_from_width (width) == VAL_NUMERIC)
578 const char *s = var_lookup_value_label (proc->indep_var, value);
579 val_lab[i] = s ? xstrdup (s) : xasprintf ("%g", indep_value[i]);
582 val_lab[i] = xmemdup0 (value_str (value, width), width);
585 if (proc->criterion == CMP_LE)
587 strcpy (prefix[0], ">=");
588 strcpy (prefix[1], "<");
592 strcpy (prefix[0], "");
593 strcpy (prefix[1], "");
596 for (i = 0; i < proc->n_vars; i++)
598 const struct variable *var = proc->vars[i];
599 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
602 tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT,
603 var_get_name (proc->vars[i]));
604 tab_text (ssb->t, 1, i * 2 + 1, TAB_LEFT | TAT_PRINTF,
605 "%s%s", prefix[0], val_lab[0]);
606 tab_text (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT | TAT_PRINTF,
607 "%s%s", prefix[1], val_lab[1]);
609 /* Fill in the group statistics */
610 for (count = 0; count < 2; count++)
612 union value search_val;
613 struct group_statistics *gs;
615 if (proc->criterion == CMP_LE)
616 search_val.f = proc->critical_value + (count == 0 ? 1.0 : -1.0);
618 search_val = proc->g_value[count];
620 gs = hsh_find (grp_hash, &search_val);
623 tab_double (ssb->t, 2, i * 2 + count+ 1, TAB_RIGHT, gs->n,
624 &proc->weight_format);
625 tab_double (ssb->t, 3, i * 2 + count+ 1, TAB_RIGHT, gs->mean, NULL);
626 tab_double (ssb->t, 4, i * 2 + count+ 1, TAB_RIGHT, gs->std_dev,
628 tab_double (ssb->t, 5, i * 2 + count+ 1, TAB_RIGHT, gs->se_mean,
636 /* Initialize the paired values ssbox */
638 ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc)
641 int vsize = proc->n_pairs * 2 + 1;
643 this->populate = ssbox_paired_populate;
645 ssbox_base_init (this, hsize, vsize);
646 tab_title (this->t, _("Paired Sample Statistics"));
647 tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1);
648 tab_vline (this->t, TAL_2, 2, 0, vsize - 1);
649 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
650 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
651 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
652 tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
655 /* Populate the ssbox for paired values */
657 ssbox_paired_populate (struct ssbox *ssb, struct t_test_proc *proc)
661 for (i = 0; i < proc->n_pairs; i++)
663 struct pair *p = &proc->pairs[i];
666 tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i);
667 for (j=0; j < 2; j++)
670 tab_text (ssb->t, 1, i * 2 + j + 1, TAB_LEFT,
671 var_get_name (p->v[j]));
674 tab_double (ssb->t, 2, i * 2 + j + 1, TAB_RIGHT, p->mean[j], NULL);
675 tab_double (ssb->t, 3, i * 2 + j + 1, TAB_RIGHT, p->n,
676 &proc->weight_format);
677 tab_double (ssb->t, 4, i * 2 + j + 1, TAB_RIGHT, p->std_dev[j],
679 tab_double (ssb->t, 5, i * 2 + j + 1, TAB_RIGHT,
680 p->std_dev[j] /sqrt (p->n), NULL);
685 /* Populate the one sample ssbox */
687 ssbox_one_sample_populate (struct ssbox *ssb, struct t_test_proc *proc)
691 for (i = 0; i < proc->n_vars; i++)
693 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
695 tab_text (ssb->t, 0, i + 1, TAB_LEFT, var_get_name (proc->vars[i]));
696 tab_double (ssb->t, 1, i + 1, TAB_RIGHT, gs->n, &proc->weight_format);
697 tab_double (ssb->t, 2, i + 1, TAB_RIGHT, gs->mean, NULL);
698 tab_double (ssb->t, 3, i + 1, TAB_RIGHT, gs->std_dev, NULL);
699 tab_double (ssb->t, 4, i + 1, TAB_RIGHT, gs->se_mean, NULL);
703 /* Implementation of the Test Results box struct */
705 static void trbox_base_init (struct trbox *, size_t n_vars, int cols);
706 static void trbox_base_finalize (struct trbox *);
707 static void trbox_independent_samples_init (struct trbox *,
708 struct t_test_proc *);
709 static void trbox_independent_samples_populate (struct trbox *,
710 struct t_test_proc *);
711 static void trbox_one_sample_init (struct trbox *, struct t_test_proc *);
712 static void trbox_one_sample_populate (struct trbox *, struct t_test_proc *);
713 static void trbox_paired_init (struct trbox *, struct t_test_proc *);
714 static void trbox_paired_populate (struct trbox *, struct t_test_proc *);
716 /* Create a trbox according to mode*/
718 trbox_create (struct trbox *trb, struct t_test_proc *proc)
723 trbox_one_sample_init (trb, proc);
726 trbox_independent_samples_init (trb, proc);
729 trbox_paired_init (trb, proc);
736 /* Populate a trbox according to proc */
738 trbox_populate (struct trbox *trb, struct t_test_proc *proc)
740 trb->populate (trb, proc);
743 /* Submit and destroy a trbox */
745 trbox_finalize (struct trbox *trb)
750 /* Initialize the independent samples trbox */
752 trbox_independent_samples_init (struct trbox *self,
753 struct t_test_proc *proc)
755 const int hsize = 11;
756 const int vsize = proc->n_vars * 2 + 3;
759 self->populate = trbox_independent_samples_populate;
761 trbox_base_init (self, proc->n_vars * 2, hsize);
762 tab_title (self->t, _("Independent Samples Test"));
763 tab_hline (self->t, TAL_1, 2, hsize - 1, 1);
764 tab_vline (self->t, TAL_2, 2, 0, vsize - 1);
765 tab_vline (self->t, TAL_1, 4, 0, vsize - 1);
766 tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, hsize - 2, vsize - 1);
767 tab_hline (self->t, TAL_1, hsize - 2, hsize - 1, 2);
768 tab_box (self->t, -1, -1, -1, TAL_1, hsize - 2, 2, hsize - 1, vsize - 1);
769 tab_joint_text (self->t, 2, 0, 3, 0,
770 TAB_CENTER, _("Levene's Test for Equality of Variances"));
771 tab_joint_text (self->t, 4, 0, hsize- 1, 0,
772 TAB_CENTER, _("t-test for Equality of Means"));
774 tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("F"));
775 tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig."));
776 tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("t"));
777 tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("df"));
778 tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
779 tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
780 tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference"));
781 tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
782 tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
784 tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF,
785 _("%g%% Confidence Interval of the Difference"),
786 proc->criteria * 100.0);
789 /* Populate the independent samples trbox */
791 trbox_independent_samples_populate (struct trbox *self,
792 struct t_test_proc *proc)
796 for (i = 0; i < proc->n_vars; i++)
805 double pooled_variance;
811 const struct variable *var = proc->vars[i];
812 struct group_proc *grp_data = group_proc_get (var);
814 struct hsh_table *grp_hash = grp_data->group_hash;
816 struct group_statistics *gs0;
817 struct group_statistics *gs1;
819 union value search_val;
821 if (proc->criterion == CMP_LE)
822 search_val.f = proc->critical_value - 1.0;
824 search_val = proc->g_value[0];
826 gs0 = hsh_find (grp_hash, &search_val);
829 if (proc->criterion == CMP_LE)
830 search_val.f = proc->critical_value + 1.0;
832 search_val = proc->g_value[1];
834 gs1 = hsh_find (grp_hash, &search_val);
838 tab_text (self->t, 0, i * 2 + 3, TAB_LEFT, var_get_name (proc->vars[i]));
839 tab_text (self->t, 1, i * 2 + 3, TAB_LEFT, _("Equal variances assumed"));
840 tab_double (self->t, 2, i * 2 + 3, TAB_CENTER, grp_data->levene, NULL);
842 /* Now work out the significance of the Levene test */
844 df2 = grp_data->ugs.n - 2;
845 q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2);
846 tab_double (self->t, 3, i * 2 + 3, TAB_CENTER, q, NULL);
848 df = gs0->n + gs1->n - 2.0;
849 tab_double (self->t, 5, i * 2 + 3, TAB_RIGHT, df, NULL);
851 pooled_variance = (gs0->n * pow2 (gs0->s_std_dev)
852 + gs1->n *pow2 (gs1->s_std_dev)) / df ;
854 t = (gs0->mean - gs1->mean) / sqrt (pooled_variance);
855 t /= sqrt ((gs0->n + gs1->n) / (gs0->n * gs1->n));
857 tab_double (self->t, 4, i * 2 + 3, TAB_RIGHT, t, NULL);
859 p = gsl_cdf_tdist_P (t, df);
860 q = gsl_cdf_tdist_Q (t, df);
862 tab_double (self->t, 6, i * 2 + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p),
865 mean_diff = gs0->mean - gs1->mean;
866 tab_double (self->t, 7, i * 2 + 3, TAB_RIGHT, mean_diff, NULL);
869 std_err_diff = sqrt (pow2 (gs0->se_mean) + pow2 (gs1->se_mean));
870 tab_double (self->t, 8, i * 2 + 3, TAB_RIGHT, std_err_diff, NULL);
872 /* Now work out the confidence interval */
873 q = (1 - proc->criteria)/2.0; /* 2-tailed test */
875 t = gsl_cdf_tdist_Qinv (q, df);
876 tab_double (self->t, 9, i * 2 + 3, TAB_RIGHT,
877 mean_diff - t * std_err_diff, NULL);
879 tab_double (self->t, 10, i * 2 + 3, TAB_RIGHT,
880 mean_diff + t * std_err_diff, NULL);
883 /* Now for the \sigma_1 != \sigma_2 case */
884 tab_text (self->t, 1, i * 2 + 3 + 1,
885 TAB_LEFT, _("Equal variances not assumed"));
887 se2 = ((pow2 (gs0->s_std_dev) / (gs0->n - 1)) +
888 (pow2 (gs1->s_std_dev) / (gs1->n - 1)));
890 t = mean_diff / sqrt (se2);
891 tab_double (self->t, 4, i * 2 + 3 + 1, TAB_RIGHT, t, NULL);
893 df = pow2 (se2) / ((pow2 (pow2 (gs0->s_std_dev) / (gs0->n - 1))
895 + (pow2 (pow2 (gs1->s_std_dev) / (gs1->n - 1))
897 tab_double (self->t, 5, i * 2 + 3 + 1, TAB_RIGHT, df, NULL);
899 p = gsl_cdf_tdist_P (t, df);
900 q = gsl_cdf_tdist_Q (t, df);
902 tab_double (self->t, 6, i * 2 + 3 + 1, TAB_RIGHT, 2.0 * (t > 0 ? q : p),
905 /* Now work out the confidence interval */
906 q = (1 - proc->criteria) / 2.0; /* 2-tailed test */
908 t = gsl_cdf_tdist_Qinv (q, df);
910 tab_double (self->t, 7, i * 2 + 3 + 1, TAB_RIGHT, mean_diff, NULL);
911 tab_double (self->t, 8, i * 2 + 3 + 1, TAB_RIGHT, std_err_diff, NULL);
912 tab_double (self->t, 9, i * 2 + 3 + 1, TAB_RIGHT,
913 mean_diff - t * std_err_diff, NULL);
914 tab_double (self->t, 10, i * 2 + 3 + 1, TAB_RIGHT,
915 mean_diff + t * std_err_diff, NULL);
919 /* Initialize the paired samples trbox */
921 trbox_paired_init (struct trbox *self, struct t_test_proc *proc)
924 const int vsize=proc->n_pairs+ 3;
926 self->populate = trbox_paired_populate;
928 trbox_base_init (self, proc->n_pairs, hsize);
929 tab_title (self->t, _("Paired Samples Test"));
930 tab_hline (self->t, TAL_1, 2, 6, 1);
931 tab_vline (self->t, TAL_2, 2, 0, vsize - 1);
932 tab_joint_text (self->t, 2, 0, 6, 0, TAB_CENTER, _("Paired Differences"));
933 tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, 6, vsize - 1);
934 tab_box (self->t, -1, -1, -1, TAL_1, 6, 0, hsize - 1, vsize - 1);
935 tab_hline (self->t, TAL_1, 5, 6, 2);
936 tab_vline (self->t, TAL_GAP, 6, 0, 1);
938 tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
939 _("%g%% Confidence Interval of the Difference"),
940 proc->criteria*100.0);
942 tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
943 tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
944 tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean"));
945 tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
946 tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
947 tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t"));
948 tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df"));
949 tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
952 /* Populate the paired samples trbox */
954 trbox_paired_populate (struct trbox *trb,
955 struct t_test_proc *proc)
959 for (i = 0; i < proc->n_pairs; i++)
961 struct pair *pair = &proc->pairs[i];
969 tab_text (trb->t, 0, i + 3, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i);
970 tab_text (trb->t, 1, i + 3, TAB_LEFT | TAT_PRINTF, "%s - %s",
971 var_get_name (pair->v[0]),
972 var_get_name (pair->v[1]));
973 tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL);
974 tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL);
977 se_mean = pair->std_dev_diff / sqrt (n);
978 tab_double (trb->t, 4, i + 3, TAB_RIGHT, se_mean, NULL);
980 /* Now work out the confidence interval */
981 q = (1 - proc->criteria) / 2.0; /* 2-tailed test */
983 t = gsl_cdf_tdist_Qinv (q, df);
985 tab_double (trb->t, 5, i + 3, TAB_RIGHT,
986 pair->mean_diff - t * se_mean, NULL);
987 tab_double (trb->t, 6, i + 3, TAB_RIGHT,
988 pair->mean_diff + t * se_mean, NULL);
990 t = ((pair->mean[0] - pair->mean[1])
991 / sqrt ((pow2 (pair->s_std_dev[0]) + pow2 (pair->s_std_dev[1])
992 - (2 * pair->correlation
993 * pair->s_std_dev[0] * pair->s_std_dev[1]))
996 tab_double (trb->t, 7, i + 3, TAB_RIGHT, t, NULL);
998 /* Degrees of freedom */
999 tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format);
1001 p = gsl_cdf_tdist_P (t, df);
1002 q = gsl_cdf_tdist_P (t, df);
1004 tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL);
1008 /* Initialize the one sample trbox */
1010 trbox_one_sample_init (struct trbox *self, struct t_test_proc *proc)
1012 const int hsize = 7;
1013 const int vsize = proc->n_vars + 3;
1015 self->populate = trbox_one_sample_populate;
1017 trbox_base_init (self, proc->n_vars, hsize);
1018 tab_title (self->t, _("One-Sample Test"));
1019 tab_hline (self->t, TAL_1, 1, hsize - 1, 1);
1020 tab_vline (self->t, TAL_2, 1, 0, vsize - 1);
1022 tab_joint_text (self->t, 1, 0, hsize - 1, 0, TAB_CENTER | TAT_PRINTF,
1023 _("Test Value = %f"), proc->testval);
1025 tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1);
1028 tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
1029 _("%g%% Confidence Interval of the Difference"),
1030 proc->criteria * 100.0);
1032 tab_vline (self->t, TAL_GAP, 6, 1, 1);
1033 tab_hline (self->t, TAL_1, 5, 6, 2);
1034 tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t"));
1035 tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df"));
1036 tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
1037 tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
1038 tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
1039 tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
1042 /* Populate the one sample trbox */
1044 trbox_one_sample_populate (struct trbox *trb, struct t_test_proc *proc)
1050 for (i = 0; i < proc->n_vars; i++)
1055 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
1057 tab_text (trb->t, 0, i + 3, TAB_LEFT, var_get_name (proc->vars[i]));
1059 t = (gs->mean - proc->testval) * sqrt (gs->n) / gs->std_dev;
1061 tab_double (trb->t, 1, i + 3, TAB_RIGHT, t, NULL);
1063 /* degrees of freedom */
1066 tab_double (trb->t, 2, i + 3, TAB_RIGHT, df, &proc->weight_format);
1068 p = gsl_cdf_tdist_P (t, df);
1069 q = gsl_cdf_tdist_Q (t, df);
1071 /* Multiply by 2 to get 2-tailed significance, makeing sure we've got
1073 tab_double (trb->t, 3, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL);
1074 tab_double (trb->t, 4, i + 3, TAB_RIGHT, gs->mean_diff, NULL);
1077 q = (1 - proc->criteria) / 2.0; /* 2-tailed test */
1078 t = gsl_cdf_tdist_Qinv (q, df);
1080 tab_double (trb->t, 5, i + 3, TAB_RIGHT,
1081 gs->mean_diff - t * gs->se_mean, NULL);
1082 tab_double (trb->t, 6, i + 3, TAB_RIGHT,
1083 gs->mean_diff + t * gs->se_mean, NULL);
1087 /* Base initializer for the generalized trbox */
1089 trbox_base_init (struct trbox *self, size_t data_rows, int cols)
1091 const size_t rows = 3 + data_rows;
1093 self->finalize = trbox_base_finalize;
1094 self->t = tab_create (cols, rows, 0);
1095 tab_headers (self->t, 0, 0, 3, 0);
1096 tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
1097 tab_hline (self->t, TAL_2, 0, cols- 1, 3);
1098 tab_dim (self->t, tab_natural_dimensions, NULL);
1101 /* Base finalizer for the trbox */
1103 trbox_base_finalize (struct trbox *trb)
1105 tab_submit (trb->t);
1108 /* Create, populate and submit the Paired Samples Correlation box */
1110 pscbox (struct t_test_proc *proc)
1112 const int rows=1+proc->n_pairs;
1116 struct tab_table *table;
1118 table = tab_create (cols, rows, 0);
1120 tab_columns (table, SOM_COL_DOWN, 1);
1121 tab_headers (table, 0, 0, 1, 0);
1122 tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
1123 tab_hline (table, TAL_2, 0, cols - 1, 1);
1124 tab_vline (table, TAL_2, 2, 0, rows - 1);
1125 tab_dim (table, tab_natural_dimensions, NULL);
1126 tab_title (table, _("Paired Samples Correlations"));
1128 /* column headings */
1129 tab_text (table, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
1130 tab_text (table, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation"));
1131 tab_text (table, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig."));
1133 for (i = 0; i < proc->n_pairs; i++)
1135 struct pair *pair = &proc->pairs[i];
1137 double df = pair->n -2;
1138 double correlation_t = (pair->correlation * sqrt (df) /
1139 sqrt (1 - pow2 (pair->correlation)));
1142 tab_text (table, 0, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1144 tab_text (table, 1, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1146 var_get_name (pair->v[0]),
1147 var_get_name (pair->v[1]));
1150 tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format);
1151 tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL);
1153 p = gsl_cdf_tdist_P (correlation_t, df);
1154 q = gsl_cdf_tdist_Q (correlation_t, df);
1155 tab_double (table, 4, i + 1, TAB_RIGHT,
1156 2.0 * (correlation_t > 0 ? q : p), NULL);
1162 /* Calculation Implementation */
1164 /* Calculations common to all variants of the T test. */
1166 common_calc (const struct dictionary *dict,
1167 struct t_test_proc *proc,
1168 struct casereader *reader)
1173 for (i = 0; i < proc->n_vars; i++)
1175 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
1182 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
1184 double weight = dict_get_case_weight (dict, c, NULL);
1186 /* Listwise has to be implicit if the independent variable
1188 if (proc->mode == T_IND_SAMPLES)
1190 if (var_is_value_missing (proc->indep_var,
1191 case_data (c, proc->indep_var),
1196 for (i = 0; i < proc->n_vars; i++)
1198 const struct variable *v = proc->vars[i];
1199 const union value *val = case_data (c, v);
1201 if (!var_is_value_missing (v, val, proc->exclude))
1203 struct group_statistics *gs;
1204 gs = &group_proc_get (v)->ugs;
1207 gs->sum += weight * val->f;
1208 gs->ssq += weight * pow2 (val->f);
1212 casereader_destroy (reader);
1214 for (i = 0; i < proc->n_vars; i++)
1216 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
1218 gs->mean = gs->sum / gs->n;
1219 gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean)));
1220 gs->std_dev = sqrt (gs->n / (gs->n- 1)
1221 * ((gs->ssq / gs->n) - pow2 (gs->mean)));
1222 gs->se_mean = gs->std_dev / sqrt (gs->n);
1223 gs->mean_diff = gs->sum_diff / gs->n;
1227 /* Calculations for one sample T test. */
1229 one_sample_calc (const struct dictionary *dict, struct t_test_proc *proc,
1230 struct casereader *reader)
1235 for (i = 0; i < proc->n_vars; i++)
1237 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
1241 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
1243 double weight = dict_get_case_weight (dict, c, NULL);
1244 for (i = 0; i < proc->n_vars; i++)
1246 const struct variable *v = proc->vars[i];
1247 struct group_statistics *gs = &group_proc_get (v)->ugs;
1248 const union value *val = case_data (c, v);
1249 if (!var_is_value_missing (v, val, proc->exclude))
1250 gs->sum_diff += weight * (val->f - proc->testval);
1254 for (i = 0; i < proc->n_vars; i++)
1256 struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
1257 gs->mean_diff = gs->sum_diff / gs->n;
1260 casereader_destroy (reader);
1266 paired_calc (const struct dictionary *dict, struct t_test_proc *proc,
1267 struct casereader *reader)
1272 for (i = 0; i < proc->n_pairs; i++)
1274 struct pair *pair = &proc->pairs[i];
1276 pair->sum[0] = pair->sum[1] = 0;
1277 pair->ssq[0] = pair->ssq[1] = 0;
1278 pair->sum_of_prod = 0;
1279 pair->correlation = 0;
1280 pair->sum_of_diffs = 0;
1281 pair->ssq_diffs = 0;
1284 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
1286 double weight = dict_get_case_weight (dict, c, NULL);
1287 for (i = 0; i < proc->n_pairs; i++)
1289 struct pair *pair = &proc->pairs[i];
1290 const struct variable *v0 = pair->v[0];
1291 const struct variable *v1 = pair->v[1];
1293 const union value *val0 = case_data (c, v0);
1294 const union value *val1 = case_data (c, v1);
1296 if (!var_is_value_missing (v0, val0, proc->exclude)
1297 && !var_is_value_missing (v1, val1, proc->exclude))
1300 pair->sum[0] += weight * val0->f;
1301 pair->sum[1] += weight * val1->f;
1302 pair->ssq[0] += weight * pow2 (val0->f);
1303 pair->ssq[1] += weight * pow2 (val1->f);
1304 pair->sum_of_prod += weight * val0->f * val1->f;
1305 pair->sum_of_diffs += weight * (val0->f - val1->f);
1306 pair->ssq_diffs += weight * pow2 (val0->f - val1->f);
1311 for (i = 0; i < proc->n_pairs; i++)
1313 struct pair *pair = &proc->pairs[i];
1314 const double n = pair->n;
1317 for (j=0; j < 2; j++)
1319 pair->mean[j] = pair->sum[j] / n;
1320 pair->s_std_dev[j] = sqrt ((pair->ssq[j] / n
1321 - pow2 (pair->mean[j])));
1322 pair->std_dev[j] = sqrt (n / (n- 1) * (pair->ssq[j] / n
1323 - pow2 (pair->mean[j])));
1326 pair->correlation = (pair->sum_of_prod / pair->n
1327 - pair->mean[0] * pair->mean[1]);
1328 /* correlation now actually contains the covariance */
1329 pair->correlation /= pair->std_dev[0] * pair->std_dev[1];
1330 pair->correlation *= pair->n / (pair->n - 1);
1332 pair->mean_diff = pair->sum_of_diffs / n;
1333 pair->std_dev_diff = sqrt (n / (n - 1) * ((pair->ssq_diffs / n)
1334 - pow2 (pair->mean_diff)));
1337 casereader_destroy (reader);
1342 group_calc (const struct dictionary *dict, struct t_test_proc *proc,
1343 struct casereader *reader)
1348 for (i = 0; i < proc->n_vars; i++)
1350 struct group_proc *ttpr = group_proc_get (proc->vars[i]);
1353 /* There's always 2 groups for a T - TEST */
1355 ttpr->group_hash = hsh_create (2,
1356 (hsh_compare_func *) compare_group_binary,
1357 (hsh_hash_func *) hash_group_binary,
1358 (hsh_free_func *) free_group,
1361 for (j = 0; j < 2; j++)
1363 struct group_statistics *gs = xmalloc (sizeof *gs);
1367 if (proc->criterion == CMP_EQ)
1368 gs->id = proc->g_value[j];
1372 gs->id.f = proc->critical_value - 1.0;
1374 gs->id.f = proc->critical_value + 1.0;
1377 hsh_insert (ttpr->group_hash, gs);
1381 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
1383 const double weight = dict_get_case_weight (dict, c, NULL);
1384 const union value *gv;
1386 if (var_is_value_missing (proc->indep_var,
1387 case_data (c, proc->indep_var), proc->exclude))
1390 gv = case_data (c, proc->indep_var);
1391 for (i = 0; i < proc->n_vars; i++)
1393 const struct variable *var = proc->vars[i];
1394 const union value *val = case_data (c, var);
1395 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
1396 struct group_statistics *gs = hsh_find (grp_hash, gv);
1398 /* If the independent variable doesn't match either of the values
1399 for this case then move on to the next case. */
1403 if (!var_is_value_missing (var, val, proc->exclude))
1406 gs->sum += weight * val->f;
1407 gs->ssq += weight * pow2 (val->f);
1412 for (i = 0; i < proc->n_vars; i++)
1414 const struct variable *var = proc->vars[i];
1415 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
1416 struct hsh_iterator g;
1417 struct group_statistics *gs;
1420 for (gs = hsh_first (grp_hash, &g); gs != NULL;
1421 gs = hsh_next (grp_hash, &g))
1423 gs->mean = gs->sum / gs->n;
1424 gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean)));
1425 gs->std_dev = sqrt (gs->n / (gs->n- 1)
1426 * ((gs->ssq / gs->n) - pow2 (gs->mean)));
1427 gs->se_mean = gs->std_dev / sqrt (gs->n);
1430 assert (count == 2);
1433 casereader_destroy (reader);
1439 calculate (struct t_test_proc *proc,
1440 struct casereader *input, const struct dataset *ds)
1442 const struct dictionary *dict = dataset_dict (ds);
1443 struct ssbox stat_summary_box;
1444 struct trbox test_results_box;
1445 struct taint *taint;
1448 c = casereader_peek (input, 0);
1451 casereader_destroy (input);
1454 output_split_file_values (ds, c);
1457 if (proc->listwise_missing)
1458 input = casereader_create_filter_missing (input,
1461 proc->exclude, NULL, NULL);
1462 input = casereader_create_filter_weight (input, dict, NULL, NULL);
1463 taint = taint_clone (casereader_get_taint (input));
1465 common_calc (dict, proc, casereader_clone (input));
1469 one_sample_calc (dict, proc, input);
1472 paired_calc (dict, proc, input);
1475 group_calc (dict, proc, casereader_clone (input));
1476 levene (dict, input, proc->indep_var, proc->n_vars, proc->vars,
1483 if (!taint_has_tainted_successor (taint))
1485 ssbox_create (&stat_summary_box, proc);
1486 ssbox_populate (&stat_summary_box, proc);
1487 ssbox_finalize (&stat_summary_box);
1489 if (proc->mode == T_PAIRED)
1492 trbox_create (&test_results_box, proc);
1493 trbox_populate (&test_results_box, proc);
1494 trbox_finalize (&test_results_box);
1497 taint_destroy (taint);
1500 /* return 0 if G belongs to group 0,
1501 1 if it belongs to group 1,
1502 2 if it belongs to neither group */
1504 which_group (const struct group_statistics *g,
1505 const struct t_test_proc *proc)
1507 int width = var_get_width (proc->indep_var);
1509 if (0 == value_compare_3way (&g->id, &proc->g_value[0], width))
1512 if (0 == value_compare_3way (&g->id, &proc->g_value[1], width))
1518 /* Return -1 if the id of a is less than b; +1 if greater than and
1521 compare_group_binary (const struct group_statistics *a,
1522 const struct group_statistics *b,
1523 const struct t_test_proc *proc)
1528 if (proc->criterion == CMP_LE)
1530 flag_a = (a->id.f < proc->critical_value);
1531 flag_b = (b->id.f < proc->critical_value);
1535 flag_a = which_group (a, proc);
1536 flag_b = which_group (b, proc);
1539 if (flag_a < flag_b)
1542 return (flag_a > flag_b);
1545 /* This is a degenerate case of a hash, since it can only return three possible
1546 values. It's really a comparison, being used as a hash function */
1549 hash_group_binary (const struct group_statistics *g,
1550 const struct t_test_proc *proc)
1552 return (proc->criterion == CMP_LE
1553 ? g->id.f < proc->critical_value
1554 : which_group (g, proc));