1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
25 #include <data/case.h>
26 #include <data/casegrouper.h>
27 #include <data/casereader.h>
28 #include <data/dictionary.h>
29 #include <data/procedure.h>
30 #include <data/value-labels.h>
31 #include <data/variable.h>
32 #include <language/command.h>
33 #include <language/dictionary/split-file.h>
34 #include <language/lexer/lexer.h>
35 #include <libpspp/assertion.h>
36 #include <libpspp/compiler.h>
37 #include <libpspp/hash.h>
38 #include <libpspp/message.h>
39 #include <libpspp/misc.h>
40 #include <libpspp/str.h>
41 #include <libpspp/taint.h>
42 #include <math/group-proc.h>
43 #include <math/levene.h>
44 #include <output/manager.h>
45 #include <output/table.h>
46 #include <data/format.h>
51 #define _(msgid) gettext (msgid)
59 +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
61 missing=miss:!analysis/listwise,
62 incl:include/!exclude;
63 +format=fmt:!labels/nolabels;
64 criteria=:cin(d:criteria,"%s > 0. && %s < 1.").
70 /* Variable for the GROUPS subcommand, if given. */
71 static struct variable *indep_var;
79 struct group_properties
81 /* The comparison criterion */
82 enum comparison criterion;
84 /* The width of the independent variable */
88 /* The value of the independent variable at which groups are determined to
89 belong to one group or the other */
90 double critical_value;
93 /* The values of the independent variable for each group */
94 union value g_value[2];
100 static struct group_properties gp ;
104 /* PAIRS: Number of pairs to be compared ; each pair. */
105 static int n_pairs = 0 ;
108 /* The variables comprising the pair */
109 const struct variable *v[2];
111 /* The number of valid variable pairs */
114 /* The sum of the members */
117 /* sum of squares of the members */
120 /* Std deviation of the members */
124 /* Sample Std deviation of the members */
127 /* The means of the members */
130 /* The correlation coefficient between the variables */
133 /* The sum of the differences */
136 /* The sum of the products */
139 /* The mean of the differences */
142 /* The sum of the squares of the differences */
145 /* The std deviation of the differences */
149 static struct pair *pairs=0;
151 static int parse_value (struct lexer *lexer, union value * v, enum val_type);
153 /* Structures and Functions for the Statistics Summary Box */
155 typedef void populate_ssbox_func (struct ssbox *ssb,
156 const struct dictionary *,
157 struct cmd_t_test *cmd);
158 typedef void finalize_ssbox_func (struct ssbox *ssb);
164 populate_ssbox_func *populate;
165 finalize_ssbox_func *finalize;
170 void ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode);
172 /* Populate a ssbox according to cmd */
173 void ssbox_populate (struct ssbox *ssb, const struct dictionary *dict,
174 struct cmd_t_test *cmd);
176 /* Submit and destroy a ssbox */
177 void ssbox_finalize (struct ssbox *ssb);
179 /* A function to create, populate and submit the Paired Samples Correlation
181 static void pscbox (const struct dictionary *);
184 /* Structures and Functions for the Test Results Box */
187 typedef void populate_trbox_func (struct trbox *trb,
188 const struct dictionary *dict,
189 struct cmd_t_test *cmd);
190 typedef void finalize_trbox_func (struct trbox *trb);
194 populate_trbox_func *populate;
195 finalize_trbox_func *finalize;
199 void trbox_create (struct trbox *trb, struct cmd_t_test *cmd, int mode);
201 /* Populate a ssbox according to cmd */
202 static void trbox_populate (struct trbox *trb, const struct dictionary *dict,
203 struct cmd_t_test *cmd);
205 /* Submit and destroy a ssbox */
206 void trbox_finalize (struct trbox *trb);
208 /* Which mode was T-TEST invoked */
216 static int common_calc (const struct dictionary *dict,
217 const struct ccase *, void *,
219 static void common_precalc (struct cmd_t_test *);
220 static void common_postcalc (struct cmd_t_test *);
222 static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class);
223 static void one_sample_precalc (struct cmd_t_test *);
224 static void one_sample_postcalc (struct cmd_t_test *);
226 static int paired_calc (const struct dictionary *dict, const struct ccase *,
227 struct cmd_t_test*, enum mv_class);
228 static void paired_precalc (struct cmd_t_test *);
229 static void paired_postcalc (struct cmd_t_test *);
231 static void group_precalc (struct cmd_t_test *);
232 static int group_calc (const struct dictionary *dict, const struct ccase *,
233 struct cmd_t_test *, enum mv_class);
234 static void group_postcalc (struct cmd_t_test *);
237 static void calculate (struct cmd_t_test *,
239 const struct dataset *);
243 static struct cmd_t_test cmd;
245 static bool bad_weight_warn = false;
248 static int compare_group_binary (const struct group_statistics *a,
249 const struct group_statistics *b,
250 const struct group_properties *p);
253 static unsigned hash_group_binary (const struct group_statistics *g,
254 const struct group_properties *p);
259 cmd_t_test (struct lexer *lexer, struct dataset *ds)
261 struct casegrouper *grouper;
262 struct casereader *group;
265 if ( !parse_t_test (lexer, ds, &cmd, NULL) )
268 if (! cmd.sbc_criteria)
273 if (cmd.sbc_testval) ++m;
274 if (cmd.sbc_groups) ++m;
275 if (cmd.sbc_pairs) ++m;
280 _ ("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.")
289 else if (cmd.sbc_groups)
294 if ( mode == T_PAIRED)
296 if (cmd.sbc_variables)
298 msg (SE, _ ("VARIABLES subcommand is not appropriate with PAIRS"));
304 /* Iterate through the pairs and put each variable that is a
305 member of a pair into cmd.v_variables */
308 struct hsh_iterator hi;
309 struct const_hsh_table *hash;
310 const struct variable *v;
312 hash = const_hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name,
315 for (i=0; i < n_pairs; ++i)
317 const_hsh_insert (hash, pairs[i].v[0]);
318 const_hsh_insert (hash, pairs[i].v[1]);
321 assert (cmd.n_variables == 0);
322 cmd.n_variables = const_hsh_count (hash);
324 cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables,
325 sizeof *cmd.v_variables);
326 /* Iterate through the hash */
327 for (i=0,v = const_hsh_first (hash, &hi);
329 v = const_hsh_next (hash, &hi) )
330 cmd.v_variables[i++]=v;
331 const_hsh_destroy (hash);
334 else if ( !cmd.sbc_variables)
336 msg (SE, _ ("One or more VARIABLES must be specified."));
341 bad_weight_warn = true;
344 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
345 while (casegrouper_get_next_group (grouper, &group))
346 calculate (&cmd, group, ds);
347 ok = casegrouper_destroy (grouper);
348 ok = proc_commit (ds) && ok;
354 if ( mode == T_IND_SAMPLES)
357 /* Destroy any group statistics we created */
358 for (v = 0 ; v < cmd.n_variables ; ++v )
360 struct group_proc *grpp = group_proc_get (cmd.v_variables[v]);
361 hsh_destroy (grpp->group_hash);
366 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
370 tts_custom_groups (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED,
373 int n_group_values=0;
375 lex_match (lexer, '=');
377 indep_var = parse_variable (lexer, dataset_dict (ds));
380 lex_error (lexer, "expecting variable name in GROUPS subcommand");
384 if (var_is_long_string (indep_var))
386 msg (SE, _ ("Long string variable %s is not valid here."),
387 var_get_name (indep_var));
391 if (!lex_match (lexer, '('))
393 if (var_is_numeric (indep_var))
395 gp.v.g_value[0].f = 1;
396 gp.v.g_value[1].f = 2;
398 gp.criterion = CMP_EQ;
406 msg (SE, _ ("When applying GROUPS to a string variable, two "
407 "values must be specified."));
412 if (!parse_value (lexer, &gp.v.g_value[0], var_get_type (indep_var)))
415 lex_match (lexer, ',');
416 if (lex_match (lexer, ')'))
418 if (var_is_alpha (indep_var))
420 msg (SE, _ ("When applying GROUPS to a string variable, two "
421 "values must be specified."));
424 gp.criterion = CMP_LE;
425 gp.v.critical_value = gp.v.g_value[0].f;
431 if (!parse_value (lexer, &gp.v.g_value[1], var_get_type (indep_var)))
435 if (!lex_force_match (lexer, ')'))
438 if ( n_group_values == 2 )
439 gp.criterion = CMP_EQ ;
441 gp.criterion = CMP_LE ;
444 if ( var_is_alpha (indep_var))
446 buf_copy_rpad (gp.v.g_value [0].s, var_get_width (indep_var),
447 gp.v.g_value [0].s, strlen (gp.v.g_value[0].s));
449 buf_copy_rpad (gp.v.g_value [1].s, var_get_width (indep_var),
450 gp.v.g_value [1].s, strlen (gp.v.g_value[1].s));
458 tts_custom_pairs (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED)
460 const struct variable **vars;
462 size_t n_pairs_local;
464 size_t n_before_WITH;
465 size_t n_after_WITH = SIZE_MAX;
466 int paired ; /* Was the PAIRED keyword given ? */
468 lex_match (lexer, '=');
471 if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars,
472 PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
480 if (lex_match (lexer, T_WITH))
482 n_before_WITH = n_vars;
483 if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars,
484 PV_DUPLICATE | PV_APPEND
485 | PV_NUMERIC | PV_NO_SCRATCH))
490 n_after_WITH = n_vars - n_before_WITH;
493 paired = (lex_match (lexer, '(') && lex_match_id (lexer, "PAIRED") && lex_match (lexer, ')'));
495 /* Determine the number of pairs needed */
498 if (n_before_WITH != n_after_WITH)
501 msg (SE, _ ("PAIRED was specified but the number of variables "
502 "preceding WITH (%zu) did not match the number "
504 n_before_WITH, n_after_WITH);
507 n_pairs_local = n_before_WITH;
509 else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */
511 n_pairs_local = n_before_WITH * n_after_WITH ;
513 else /* Neither WITH nor PAIRED keyword given */
518 msg (SE, _ ("At least two variables must be specified "
523 /* how many ways can you pick 2 from n_vars ? */
524 n_pairs_local = n_vars * (n_vars - 1) / 2;
528 /* Allocate storage for the pairs */
529 pairs = xnrealloc (pairs, n_pairs + n_pairs_local, sizeof *pairs);
531 /* Populate the pairs with the appropriate variables */
536 assert (n_pairs_local == n_vars / 2);
537 for (i = 0; i < n_pairs_local; ++i)
539 pairs[i].v[n_pairs] = vars[i];
540 pairs[i].v[n_pairs + 1] = vars[i + n_pairs_local];
543 else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */
548 for (i=0 ; i < n_before_WITH ; ++i )
550 for (j=0 ; j < n_after_WITH ; ++j)
552 pairs[p].v[0] = vars[i];
553 pairs[p].v[1] = vars[j+n_before_WITH];
558 else /* Neither WITH nor PAIRED given */
563 for (i=0 ; i < n_vars ; ++i )
565 for (j=i+1 ; j < n_vars ; ++j)
567 pairs[p].v[0] = vars[i];
568 pairs[p].v[1] = vars[j];
574 n_pairs+=n_pairs_local;
580 /* Parses the current token (numeric or string, depending on type)
581 value v and returns success. */
583 parse_value (struct lexer *lexer, union value * v, enum val_type type)
585 if (type == VAL_NUMERIC)
587 if (!lex_force_num (lexer))
589 v->f = lex_tokval (lexer);
593 if (!lex_force_string (lexer))
595 memset (v->s, ' ', MAX_SHORT_STRING);
596 strncpy (v->s, ds_cstr (lex_tokstr (lexer)), ds_length (lex_tokstr (lexer)));
605 /* Implementation of the SSBOX object */
607 void ssbox_base_init (struct ssbox *this, int cols,int rows);
609 void ssbox_base_finalize (struct ssbox *ssb);
611 void ssbox_one_sample_init (struct ssbox *this,
612 struct cmd_t_test *cmd );
614 void ssbox_independent_samples_init (struct ssbox *this,
615 struct cmd_t_test *cmd);
617 void ssbox_paired_init (struct ssbox *this,
618 struct cmd_t_test *cmd);
621 /* Factory to create an ssbox */
623 ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode)
628 ssbox_one_sample_init (ssb,cmd);
631 ssbox_independent_samples_init (ssb,cmd);
634 ssbox_paired_init (ssb,cmd);
643 /* Despatcher for the populate method */
645 ssbox_populate (struct ssbox *ssb, const struct dictionary *dict,
646 struct cmd_t_test *cmd)
648 ssb->populate (ssb, dict, cmd);
652 /* Despatcher for finalize */
654 ssbox_finalize (struct ssbox *ssb)
660 /* Submit the box and clear up */
662 ssbox_base_finalize (struct ssbox *ssb)
669 /* Initialize a ssbox struct */
671 ssbox_base_init (struct ssbox *this, int cols,int rows)
673 this->finalize = ssbox_base_finalize;
674 this->t = tab_create (cols, rows, 0);
676 tab_columns (this->t, SOM_COL_DOWN, 1);
677 tab_headers (this->t,0,0,1,0);
678 tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 );
679 tab_hline (this->t, TAL_2,0,cols-1,1);
680 tab_dim (this->t, tab_natural_dimensions);
683 void ssbox_one_sample_populate (struct ssbox *ssb,
684 const struct dictionary *,
685 struct cmd_t_test *cmd);
687 /* Initialize the one_sample ssbox */
689 ssbox_one_sample_init (struct ssbox *this,
690 struct cmd_t_test *cmd )
693 const int vsize=cmd->n_variables+1;
695 this->populate = ssbox_one_sample_populate;
697 ssbox_base_init (this, hsize,vsize);
698 tab_title (this->t, _ ("One-Sample Statistics"));
699 tab_vline (this->t, TAL_2, 1,0,vsize - 1);
700 tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _ ("N"));
701 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean"));
702 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation"));
703 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean"));
706 static void ssbox_independent_samples_populate (struct ssbox *ssb,
707 const struct dictionary *,
708 struct cmd_t_test *cmd);
710 /* Initialize the independent samples ssbox */
712 ssbox_independent_samples_init (struct ssbox *this,
713 struct cmd_t_test *cmd)
716 int vsize = cmd->n_variables*2 +1;
718 this->populate = ssbox_independent_samples_populate;
720 ssbox_base_init (this, hsize,vsize);
721 tab_vline (this->t, TAL_GAP, 1, 0,vsize - 1);
722 tab_title (this->t, _ ("Group Statistics"));
723 tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, var_get_name (indep_var));
724 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("N"));
725 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Mean"));
726 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation"));
727 tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean"));
731 /* Populate the ssbox for independent samples */
733 ssbox_independent_samples_populate (struct ssbox *ssb,
734 const struct dictionary *dict,
735 struct cmd_t_test *cmd)
739 const struct variable *wv = dict_get_weight (dict);
740 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
742 char *val_lab[2] = {NULL, NULL};
743 double indep_value[2];
745 char prefix[2][3]={"",""};
747 if ( var_is_numeric (indep_var) )
751 s = var_lookup_value_label (indep_var, &gp.v.g_value[0]);
752 val_lab[0] = s ? strdup (s) : NULL;
754 s = var_lookup_value_label (indep_var, &gp.v.g_value[1]);
755 val_lab[1] = s ? strdup (s) : NULL;
759 val_lab[0] = calloc (sizeof (char), MAX_SHORT_STRING + 1);
760 val_lab[1] = calloc (sizeof (char), MAX_SHORT_STRING + 1);
761 memcpy (val_lab[0], gp.v.g_value[0].s, MAX_SHORT_STRING);
762 memcpy (val_lab[1], gp.v.g_value[1].s, MAX_SHORT_STRING);
765 if (gp.criterion == CMP_LE )
767 strcpy (prefix[0],">=");
768 strcpy (prefix[1],"<");
769 indep_value[0] = gp.v.critical_value;
770 indep_value[1] = gp.v.critical_value;
774 indep_value[0] = gp.v.g_value[0].f;
775 indep_value[1] = gp.v.g_value[1].f;
780 for (i=0; i < cmd->n_variables; ++i)
782 const struct variable *var = cmd->v_variables[i];
783 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
786 tab_text (ssb->t, 0, i*2+1, TAB_LEFT,
787 var_get_name (cmd->v_variables[i]));
790 tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
791 "%s%s", prefix[0], val_lab[0]);
793 tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF,
794 "%s%g", prefix[0], indep_value[0]);
798 tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
799 "%s%s", prefix[1], val_lab[1]);
801 tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF,
802 "%s%g", prefix[1], indep_value[1]);
805 /* Fill in the group statistics */
806 for ( count = 0 ; count < 2 ; ++count )
808 union value search_val;
810 struct group_statistics *gs;
812 if ( gp.criterion == CMP_LE )
817 search_val.f = gp.v.critical_value + 1.0;
821 /* less than ( < ) case */
822 search_val.f = gp.v.critical_value - 1.0;
827 search_val = gp.v.g_value[count];
830 gs = hsh_find (grp_hash, (void *) &search_val);
833 tab_double (ssb->t, 2, i*2+count+1, TAB_RIGHT, gs->n, wfmt);
834 tab_double (ssb->t, 3, i*2+count+1, TAB_RIGHT, gs->mean, NULL);
835 tab_double (ssb->t, 4, i*2+count+1, TAB_RIGHT, gs->std_dev, NULL);
836 tab_double (ssb->t, 5, i*2+count+1, TAB_RIGHT, gs->se_mean, NULL);
844 static void ssbox_paired_populate (struct ssbox *ssb,
845 const struct dictionary *dict,
846 struct cmd_t_test *cmd);
848 /* Initialize the paired values ssbox */
850 ssbox_paired_init (struct ssbox *this, struct cmd_t_test *cmd UNUSED)
854 int vsize = n_pairs*2+1;
856 this->populate = ssbox_paired_populate;
858 ssbox_base_init (this, hsize,vsize);
859 tab_title (this->t, _ ("Paired Sample Statistics"));
860 tab_vline (this->t,TAL_GAP,1,0,vsize-1);
861 tab_vline (this->t,TAL_2,2,0,vsize-1);
862 tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean"));
863 tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("N"));
864 tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation"));
865 tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean"));
869 /* Populate the ssbox for paired values */
871 ssbox_paired_populate (struct ssbox *ssb, const struct dictionary *dict,
872 struct cmd_t_test *cmd UNUSED)
876 const struct variable *wv = dict_get_weight (dict);
877 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
881 for (i=0; i < n_pairs; ++i)
885 tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _ ("Pair %d"),i);
887 for (j=0 ; j < 2 ; ++j)
889 struct group_statistics *gs;
891 gs = &group_proc_get (pairs[i].v[j])->ugs;
895 tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT,
896 var_get_name (pairs[i].v[j]));
899 tab_double (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], NULL);
900 tab_double (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, wfmt);
901 tab_double (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], NULL);
902 tab_double (ssb->t,5, i*2+j+1, TAB_RIGHT,
903 pairs[i].std_dev[j]/sqrt (pairs[i].n), NULL);
909 /* Populate the one sample ssbox */
911 ssbox_one_sample_populate (struct ssbox *ssb, const struct dictionary *dict,
912 struct cmd_t_test *cmd)
916 const struct variable *wv = dict_get_weight (dict);
917 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
921 for (i=0; i < cmd->n_variables; ++i)
923 struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs;
925 tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i]));
926 tab_double (ssb->t,1, i+1, TAB_RIGHT, gs->n, wfmt);
927 tab_double (ssb->t,2, i+1, TAB_RIGHT, gs->mean, NULL);
928 tab_double (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, NULL);
929 tab_double (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, NULL);
935 /* Implementation of the Test Results box struct */
937 void trbox_base_init (struct trbox *self,size_t n_vars, int cols);
938 void trbox_base_finalize (struct trbox *trb);
940 void trbox_independent_samples_init (struct trbox *trb,
941 struct cmd_t_test *cmd );
943 static void trbox_independent_samples_populate (struct trbox *trb,
944 const struct dictionary *dict,
945 struct cmd_t_test *cmd);
947 void trbox_one_sample_init (struct trbox *self,
948 struct cmd_t_test *cmd );
950 static void trbox_one_sample_populate (struct trbox *trb,
951 const struct dictionary *,
952 struct cmd_t_test *cmd);
954 void trbox_paired_init (struct trbox *self,
955 struct cmd_t_test *cmd );
957 static void trbox_paired_populate (struct trbox *trb,
958 const struct dictionary *,
959 struct cmd_t_test *cmd);
963 /* Create a trbox according to mode*/
965 trbox_create (struct trbox *trb,
966 struct cmd_t_test *cmd, int mode)
971 trbox_one_sample_init (trb,cmd);
974 trbox_independent_samples_init (trb,cmd);
977 trbox_paired_init (trb,cmd);
984 /* Populate a trbox according to cmd */
986 trbox_populate (struct trbox *trb, const struct dictionary *dict,
987 struct cmd_t_test *cmd)
989 trb->populate (trb, dict, cmd);
992 /* Submit and destroy a trbox */
994 trbox_finalize (struct trbox *trb)
999 /* Initialize the independent samples trbox */
1001 trbox_independent_samples_init (struct trbox *self,
1002 struct cmd_t_test *cmd UNUSED)
1005 const int vsize=cmd->n_variables*2+3;
1008 self->populate = trbox_independent_samples_populate;
1010 trbox_base_init (self,cmd->n_variables*2,hsize);
1011 tab_title (self->t,_ ("Independent Samples Test"));
1012 tab_hline (self->t,TAL_1,2,hsize-1,1);
1013 tab_vline (self->t,TAL_2,2,0,vsize-1);
1014 tab_vline (self->t,TAL_1,4,0,vsize-1);
1015 tab_box (self->t,-1,-1,-1,TAL_1, 2,1,hsize-2,vsize-1);
1016 tab_hline (self->t,TAL_1, hsize-2,hsize-1,2);
1017 tab_box (self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1);
1018 tab_joint_text (self->t, 2, 0, 3, 0,
1019 TAB_CENTER,_ ("Levene's Test for Equality of Variances"));
1020 tab_joint_text (self->t, 4,0,hsize-1,0,
1021 TAB_CENTER,_ ("t-test for Equality of Means"));
1023 tab_text (self->t,2,2, TAB_CENTER | TAT_TITLE,_ ("F"));
1024 tab_text (self->t,3,2, TAB_CENTER | TAT_TITLE,_ ("Sig."));
1025 tab_text (self->t,4,2, TAB_CENTER | TAT_TITLE,_ ("t"));
1026 tab_text (self->t,5,2, TAB_CENTER | TAT_TITLE,_ ("df"));
1027 tab_text (self->t,6,2, TAB_CENTER | TAT_TITLE,_ ("Sig. (2-tailed)"));
1028 tab_text (self->t,7,2, TAB_CENTER | TAT_TITLE,_ ("Mean Difference"));
1029 tab_text (self->t,8,2, TAB_CENTER | TAT_TITLE,_ ("Std. Error Difference"));
1030 tab_text (self->t,9,2, TAB_CENTER | TAT_TITLE,_ ("Lower"));
1031 tab_text (self->t,10,2, TAB_CENTER | TAT_TITLE,_ ("Upper"));
1033 tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF,
1034 _ ("%g%% Confidence Interval of the Difference"),
1035 cmd->criteria*100.0);
1039 /* Populate the independent samples trbox */
1041 trbox_independent_samples_populate (struct trbox *self,
1042 const struct dictionary *dict UNUSED,
1043 struct cmd_t_test *cmd)
1048 for (i=0; i < cmd->n_variables; ++i)
1057 double pooled_variance;
1058 double std_err_diff;
1061 const struct variable *var = cmd->v_variables[i];
1062 struct group_proc *grp_data = group_proc_get (var);
1064 struct hsh_table *grp_hash = grp_data->group_hash;
1066 struct group_statistics *gs0 ;
1067 struct group_statistics *gs1 ;
1069 union value search_val;
1071 if ( gp.criterion == CMP_LE )
1072 search_val.f = gp.v.critical_value - 1.0;
1074 search_val = gp.v.g_value[0];
1076 gs0 = hsh_find (grp_hash, (void *) &search_val);
1079 if ( gp.criterion == CMP_LE )
1080 search_val.f = gp.v.critical_value + 1.0;
1082 search_val = gp.v.g_value[1];
1084 gs1 = hsh_find (grp_hash, (void *) &search_val);
1088 tab_text (self->t, 0, i*2+3, TAB_LEFT, var_get_name (cmd->v_variables[i]));
1090 tab_text (self->t, 1, i*2+3, TAB_LEFT, _ ("Equal variances assumed"));
1093 tab_double (self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, NULL);
1095 /* Now work out the significance of the Levene test */
1096 df1 = 1; df2 = grp_data->ugs.n - 2;
1097 q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2);
1099 tab_double (self->t, 3, i*2+3, TAB_CENTER, q, NULL);
1101 df = gs0->n + gs1->n - 2.0 ;
1102 tab_double (self->t, 5, i*2+3, TAB_RIGHT, df, NULL);
1104 pooled_variance = ( (gs0->n )*pow2 (gs0->s_std_dev)
1106 (gs1->n )*pow2 (gs1->s_std_dev)
1109 t = (gs0->mean - gs1->mean) / sqrt (pooled_variance) ;
1110 t /= sqrt ((gs0->n + gs1->n)/ (gs0->n*gs1->n));
1112 tab_double (self->t, 4, i*2+3, TAB_RIGHT, t, NULL);
1114 p = gsl_cdf_tdist_P (t, df);
1115 q = gsl_cdf_tdist_Q (t, df);
1117 tab_double (self->t, 6, i*2+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL);
1119 mean_diff = gs0->mean - gs1->mean;
1120 tab_double (self->t, 7, i*2+3, TAB_RIGHT, mean_diff, NULL);
1123 std_err_diff = sqrt ( pow2 (gs0->se_mean) + pow2 (gs1->se_mean));
1124 tab_double (self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, NULL);
1127 /* Now work out the confidence interval */
1128 q = (1 - cmd->criteria)/2.0; /* 2-tailed test */
1130 t = gsl_cdf_tdist_Qinv (q,df);
1131 tab_double (self->t, 9, i*2+3, TAB_RIGHT,
1132 mean_diff - t * std_err_diff, NULL);
1134 tab_double (self->t, 10, i*2+3, TAB_RIGHT,
1135 mean_diff + t * std_err_diff, NULL);
1140 /* Now for the \sigma_1 != \sigma_2 case */
1141 tab_text (self->t, 1, i*2+3+1,
1142 TAB_LEFT, _ ("Equal variances not assumed"));
1145 se2 = (pow2 (gs0->s_std_dev)/ (gs0->n -1) ) +
1146 (pow2 (gs1->s_std_dev)/ (gs1->n -1) );
1148 t = mean_diff / sqrt (se2) ;
1149 tab_double (self->t, 4, i*2+3+1, TAB_RIGHT, t, NULL);
1152 (pow2 (pow2 (gs0->s_std_dev)/ (gs0->n - 1 ))
1156 (pow2 (pow2 (gs1->s_std_dev)/ (gs1->n - 1 ))
1161 tab_double (self->t, 5, i*2+3+1, TAB_RIGHT, df, NULL);
1163 p = gsl_cdf_tdist_P (t, df);
1164 q = gsl_cdf_tdist_Q (t, df);
1166 tab_double (self->t, 6, i*2+3+1, TAB_RIGHT, 2.0* (t>0?q:p), NULL);
1168 /* Now work out the confidence interval */
1169 q = (1 - cmd->criteria)/2.0; /* 2-tailed test */
1171 t = gsl_cdf_tdist_Qinv (q, df);
1173 tab_double (self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, NULL);
1176 tab_double (self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, NULL);
1179 tab_double (self->t, 9, i*2+3+1, TAB_RIGHT,
1180 mean_diff - t * std_err_diff, NULL);
1182 tab_double (self->t, 10, i*2+3+1, TAB_RIGHT,
1183 mean_diff + t * std_err_diff, NULL);
1188 /* Initialize the paired samples trbox */
1190 trbox_paired_init (struct trbox *self,
1191 struct cmd_t_test *cmd UNUSED)
1195 const int vsize=n_pairs+3;
1197 self->populate = trbox_paired_populate;
1199 trbox_base_init (self,n_pairs,hsize);
1200 tab_title (self->t, _ ("Paired Samples Test"));
1201 tab_hline (self->t,TAL_1,2,6,1);
1202 tab_vline (self->t,TAL_2,2,0,vsize - 1);
1203 tab_joint_text (self->t,2,0,6,0,TAB_CENTER,_ ("Paired Differences"));
1204 tab_box (self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1);
1205 tab_box (self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1);
1206 tab_hline (self->t,TAL_1,5,6, 2);
1207 tab_vline (self->t,TAL_GAP,6,0,1);
1209 tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
1210 _ ("%g%% Confidence Interval of the Difference"),
1211 cmd->criteria*100.0);
1213 tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("Mean"));
1214 tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation"));
1215 tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Error Mean"));
1216 tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower"));
1217 tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper"));
1218 tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _ ("t"));
1219 tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _ ("df"));
1220 tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)"));
1223 /* Populate the paired samples trbox */
1225 trbox_paired_populate (struct trbox *trb,
1226 const struct dictionary *dict,
1227 struct cmd_t_test *cmd UNUSED)
1231 const struct variable *wv = dict_get_weight (dict);
1232 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
1234 for (i=0; i < n_pairs; ++i)
1239 double n = pairs[i].n;
1243 tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _ ("Pair %d"),i);
1245 tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s",
1246 var_get_name (pairs[i].v[0]),
1247 var_get_name (pairs[i].v[1]));
1249 tab_double (trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, NULL);
1251 tab_double (trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, NULL);
1254 se_mean = pairs[i].std_dev_diff / sqrt (n) ;
1255 tab_double (trb->t, 4, i+3, TAB_RIGHT, se_mean, NULL);
1257 /* Now work out the confidence interval */
1258 q = (1 - cmd->criteria)/2.0; /* 2-tailed test */
1260 t = gsl_cdf_tdist_Qinv (q, df);
1262 tab_double (trb->t, 5, i+3, TAB_RIGHT,
1263 pairs[i].mean_diff - t * se_mean , NULL);
1265 tab_double (trb->t, 6, i+3, TAB_RIGHT,
1266 pairs[i].mean_diff + t * se_mean , NULL);
1268 t = (pairs[i].mean[0] - pairs[i].mean[1])
1270 ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) -
1271 2 * pairs[i].correlation *
1272 pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] )
1276 tab_double (trb->t, 7, i+3, TAB_RIGHT, t, NULL);
1278 /* Degrees of freedom */
1279 tab_double (trb->t, 8, i+3, TAB_RIGHT, df, wfmt);
1281 p = gsl_cdf_tdist_P (t,df);
1282 q = gsl_cdf_tdist_Q (t,df);
1284 tab_double (trb->t, 9, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL);
1289 /* Initialize the one sample trbox */
1291 trbox_one_sample_init (struct trbox *self, struct cmd_t_test *cmd )
1294 const int vsize=cmd->n_variables+3;
1296 self->populate = trbox_one_sample_populate;
1298 trbox_base_init (self, cmd->n_variables,hsize);
1299 tab_title (self->t, _ ("One-Sample Test"));
1300 tab_hline (self->t, TAL_1, 1, hsize - 1, 1);
1301 tab_vline (self->t, TAL_2, 1, 0, vsize - 1);
1303 tab_joint_text (self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF,
1304 _ ("Test Value = %f"), cmd->n_testval[0]);
1306 tab_box (self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1);
1309 tab_joint_text (self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF,
1310 _ ("%g%% Confidence Interval of the Difference"),
1311 cmd->criteria*100.0);
1313 tab_vline (self->t,TAL_GAP,6,1,1);
1314 tab_hline (self->t,TAL_1,5,6,2);
1315 tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _ ("t"));
1316 tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("df"));
1317 tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)"));
1318 tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Mean Difference"));
1319 tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower"));
1320 tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper"));
1325 /* Populate the one sample trbox */
1327 trbox_one_sample_populate (struct trbox *trb,
1328 const struct dictionary *dict,
1329 struct cmd_t_test *cmd)
1333 const struct variable *wv = dict_get_weight (dict);
1334 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
1338 for (i=0; i < cmd->n_variables; ++i)
1343 struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs;
1346 tab_text (trb->t, 0, i+3, TAB_LEFT, var_get_name (cmd->v_variables[i]));
1348 t = (gs->mean - cmd->n_testval[0] ) * sqrt (gs->n) / gs->std_dev ;
1350 tab_double (trb->t, 1, i+3, TAB_RIGHT, t, NULL);
1352 /* degrees of freedom */
1355 tab_double (trb->t, 2, i+3, TAB_RIGHT, df, wfmt);
1357 p = gsl_cdf_tdist_P (t, df);
1358 q = gsl_cdf_tdist_Q (t, df);
1360 /* Multiply by 2 to get 2-tailed significance, makeing sure we've got
1362 tab_double (trb->t, 3, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL);
1364 tab_double (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, NULL);
1367 q = (1 - cmd->criteria)/2.0; /* 2-tailed test */
1368 t = gsl_cdf_tdist_Qinv (q, df);
1370 tab_double (trb->t, 5, i+3, TAB_RIGHT,
1371 gs->mean_diff - t * gs->se_mean, NULL);
1373 tab_double (trb->t, 6, i+3, TAB_RIGHT,
1374 gs->mean_diff + t * gs->se_mean, NULL);
1378 /* Base initializer for the generalized trbox */
1380 trbox_base_init (struct trbox *self, size_t data_rows, int cols)
1382 const size_t rows = 3 + data_rows;
1384 self->finalize = trbox_base_finalize;
1385 self->t = tab_create (cols, rows, 0);
1386 tab_headers (self->t,0,0,3,0);
1387 tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols -1, rows -1);
1388 tab_hline (self->t, TAL_2,0,cols-1,3);
1389 tab_dim (self->t, tab_natural_dimensions);
1393 /* Base finalizer for the trbox */
1395 trbox_base_finalize (struct trbox *trb)
1397 tab_submit (trb->t);
1401 /* Create , populate and submit the Paired Samples Correlation box */
1403 pscbox (const struct dictionary *dict)
1405 const struct variable *wv = dict_get_weight (dict);
1406 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
1408 const int rows = 1 + n_pairs;
1412 struct tab_table *table;
1414 table = tab_create (cols,rows,0);
1416 tab_columns (table, SOM_COL_DOWN, 1);
1417 tab_headers (table,0,0,1,0);
1418 tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 );
1419 tab_hline (table, TAL_2, 0, cols - 1, 1);
1420 tab_vline (table, TAL_2, 2, 0, rows - 1);
1421 tab_dim (table, tab_natural_dimensions);
1422 tab_title (table, _ ("Paired Samples Correlations"));
1424 /* column headings */
1425 tab_text (table, 2,0, TAB_CENTER | TAT_TITLE, _ ("N"));
1426 tab_text (table, 3,0, TAB_CENTER | TAT_TITLE, _ ("Correlation"));
1427 tab_text (table, 4,0, TAB_CENTER | TAT_TITLE, _ ("Sig."));
1429 for (i=0; i < n_pairs; ++i)
1433 double df = pairs[i].n -2;
1435 /* pairs[i].correlation is a correlation, so mathematically it will
1436 always be in the range [-1.0, 1.0]. Inaccurate calculations sometimes
1437 cause it to be slightly greater than 1.0, however, which makes the
1438 sqrt() below to come out as NaN instead of 0. So force it to be 1.0
1440 double corr = MIN (1.0, pairs[i].correlation);
1442 double correlation_t =
1443 pairs[i].correlation * sqrt (df) /
1444 sqrt (1 - pow2 (corr));
1448 tab_text (table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1451 tab_text (table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1453 var_get_name (pairs[i].v[0]),
1454 var_get_name (pairs[i].v[1]));
1458 tab_double (table, 2, i+1, TAB_RIGHT, pairs[i].n, wfmt);
1459 tab_double (table, 3, i+1, TAB_RIGHT, pairs[i].correlation, NULL);
1461 p = gsl_cdf_tdist_P (correlation_t, df);
1462 q = gsl_cdf_tdist_Q (correlation_t, df);
1464 tab_double (table, 4, i+1, TAB_RIGHT, 2.0* (correlation_t>0?q:p), NULL);
1473 /* Calculation Implementation */
1475 /* Per case calculations common to all variants of the T test */
1477 common_calc (const struct dictionary *dict,
1478 const struct ccase *c,
1480 enum mv_class exclude)
1483 struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;
1485 double weight = dict_get_case_weight (dict, c, NULL);
1488 /* Listwise has to be implicit if the independent variable is missing ?? */
1489 if ( cmd->sbc_groups )
1491 if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
1495 for (i = 0; i < cmd->n_variables ; ++i)
1497 const struct variable *v = cmd->v_variables[i];
1498 const union value *val = case_data (c, v);
1500 if (!var_is_value_missing (v, val, exclude))
1502 struct group_statistics *gs;
1503 gs = &group_proc_get (v)->ugs;
1506 gs->sum += weight * val->f;
1507 gs->ssq += weight * val->f * val->f;
1513 /* Pre calculations common to all variants of the T test */
1515 common_precalc ( struct cmd_t_test *cmd )
1519 for (i=0; i< cmd->n_variables ; ++i)
1521 struct group_statistics *gs;
1522 gs= &group_proc_get (cmd->v_variables[i])->ugs;
1531 /* Post calculations common to all variants of the T test */
1533 common_postcalc (struct cmd_t_test *cmd)
1537 for (i=0; i< cmd->n_variables ; ++i)
1539 struct group_statistics *gs;
1540 gs= &group_proc_get (cmd->v_variables[i])->ugs;
1542 gs->mean=gs->sum / gs->n;
1543 gs->s_std_dev= sqrt (
1544 ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1549 ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1552 gs->se_mean = gs->std_dev / sqrt (gs->n);
1553 gs->mean_diff= gs->sum_diff / gs->n;
1557 /* Per case calculations for one sample t test */
1559 one_sample_calc (const struct dictionary *dict,
1560 const struct ccase *c, void *cmd_,
1561 enum mv_class exclude)
1565 struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
1567 double weight = dict_get_case_weight (dict, c, NULL);
1570 for (i=0; i< cmd->n_variables ; ++i)
1572 struct group_statistics *gs;
1573 const struct variable *v = cmd->v_variables[i];
1574 const union value *val = case_data (c, v);
1576 gs= &group_proc_get (cmd->v_variables[i])->ugs;
1578 if (!var_is_value_missing (v, val, exclude))
1579 gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
1585 /* Pre calculations for one sample t test */
1587 one_sample_precalc ( struct cmd_t_test *cmd )
1591 for (i=0; i< cmd->n_variables ; ++i)
1593 struct group_statistics *gs;
1594 gs= &group_proc_get (cmd->v_variables[i])->ugs;
1600 /* Post calculations for one sample t test */
1602 one_sample_postcalc (struct cmd_t_test *cmd)
1606 for (i=0; i< cmd->n_variables ; ++i)
1608 struct group_statistics *gs;
1609 gs= &group_proc_get (cmd->v_variables[i])->ugs;
1611 gs->mean_diff = gs->sum_diff / gs->n ;
1618 paired_precalc (struct cmd_t_test *cmd UNUSED)
1622 for (i=0; i < n_pairs ; ++i )
1625 pairs[i].sum[0] = 0; pairs[i].sum[1] = 0;
1626 pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0;
1627 pairs[i].sum_of_prod = 0;
1628 pairs[i].correlation = 0;
1629 pairs[i].sum_of_diffs = 0;
1630 pairs[i].ssq_diffs = 0;
1637 paired_calc (const struct dictionary *dict, const struct ccase *c,
1638 struct cmd_t_test *cmd UNUSED, enum mv_class exclude)
1642 double weight = dict_get_case_weight (dict, c, NULL);
1644 for (i=0; i < n_pairs ; ++i )
1646 const struct variable *v0 = pairs[i].v[0];
1647 const struct variable *v1 = pairs[i].v[1];
1649 const union value *val0 = case_data (c, v0);
1650 const union value *val1 = case_data (c, v1);
1652 if (!var_is_value_missing (v0, val0, exclude) &&
1653 !var_is_value_missing (v1, val1, exclude))
1655 pairs[i].n += weight;
1656 pairs[i].sum[0] += weight * val0->f;
1657 pairs[i].sum[1] += weight * val1->f;
1659 pairs[i].ssq[0] += weight * pow2 (val0->f);
1660 pairs[i].ssq[1] += weight * pow2 (val1->f);
1662 pairs[i].sum_of_prod += weight * val0->f * val1->f ;
1664 pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ;
1665 pairs[i].ssq_diffs += weight * pow2 (val0->f - val1->f);
1673 paired_postcalc (struct cmd_t_test *cmd UNUSED)
1677 for (i=0; i < n_pairs ; ++i )
1680 const double n = pairs[i].n;
1682 for (j=0; j < 2 ; ++j)
1684 pairs[i].mean[j] = pairs[i].sum[j] / n ;
1685 pairs[i].s_std_dev[j] = sqrt ((pairs[i].ssq[j] / n -
1686 pow2 (pairs[i].mean[j]))
1689 pairs[i].std_dev[j] = sqrt (n/ (n-1)* (pairs[i].ssq[j] / n -
1690 pow2 (pairs[i].mean[j]))
1694 pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n -
1695 pairs[i].mean[0] * pairs[i].mean[1] ;
1696 /* correlation now actually contains the covariance */
1698 pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1];
1699 pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 );
1701 pairs[i].mean_diff = pairs[i].sum_of_diffs / n ;
1703 pairs[i].std_dev_diff = sqrt ( n / (n - 1) * (
1704 ( pairs[i].ssq_diffs / n )
1706 pow2 (pairs[i].mean_diff )
1712 group_precalc (struct cmd_t_test *cmd )
1717 for (i=0; i< cmd->n_variables ; ++i)
1719 struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]);
1721 /* There's always 2 groups for a T - TEST */
1724 gp.indep_width = var_get_width (indep_var);
1726 ttpr->group_hash = hsh_create (2,
1727 (hsh_compare_func *) compare_group_binary,
1728 (hsh_hash_func *) hash_group_binary,
1729 (hsh_free_func *) free_group,
1732 for (j=0 ; j < 2 ; ++j)
1734 struct group_statistics *gs = xmalloc (sizeof *gs);
1740 if ( gp.criterion == CMP_EQ )
1742 gs->id = gp.v.g_value[j];
1747 gs->id.f = gp.v.critical_value - 1.0 ;
1749 gs->id.f = gp.v.critical_value + 1.0 ;
1752 hsh_insert ( ttpr->group_hash, (void *) gs );
1759 group_calc (const struct dictionary *dict,
1760 const struct ccase *c, struct cmd_t_test *cmd,
1761 enum mv_class exclude)
1765 const double weight = dict_get_case_weight (dict, c, NULL);
1767 const union value *gv;
1769 if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
1772 gv = case_data (c, indep_var);
1774 for (i=0; i< cmd->n_variables ; ++i)
1776 const struct variable *var = cmd->v_variables[i];
1777 const union value *val = case_data (c, var);
1778 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
1779 struct group_statistics *gs;
1781 gs = hsh_find (grp_hash, (void *) gv);
1783 /* If the independent variable doesn't match either of the values
1784 for this case then move on to the next case */
1788 if (!var_is_value_missing (var, val, exclude))
1791 gs->sum += weight * val->f;
1792 gs->ssq += weight * pow2 (val->f);
1801 group_postcalc ( struct cmd_t_test *cmd )
1805 for (i = 0; i < cmd->n_variables ; ++i)
1807 const struct variable *var = cmd->v_variables[i];
1808 struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
1809 struct hsh_iterator g;
1810 struct group_statistics *gs;
1813 for (gs = hsh_first (grp_hash,&g);
1815 gs = hsh_next (grp_hash,&g))
1817 gs->mean = gs->sum / gs->n;
1819 gs->s_std_dev= sqrt (
1820 ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1825 ( (gs->ssq / gs->n ) - gs->mean * gs->mean )
1828 gs->se_mean = gs->std_dev / sqrt (gs->n);
1831 assert (count == 2);
1838 calculate (struct cmd_t_test *cmd,
1839 struct casereader *input, const struct dataset *ds)
1841 const struct dictionary *dict = dataset_dict (ds);
1842 struct ssbox stat_summary_box;
1843 struct trbox test_results_box;
1845 struct casereader *pass1, *pass2, *pass3;
1846 struct taint *taint;
1849 enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
1851 if (!casereader_peek (input, 0, &c))
1853 casereader_destroy (input);
1856 output_split_file_values (ds, &c);
1859 if ( cmd->miss == TTS_LISTWISE )
1860 input = casereader_create_filter_missing (input,
1865 input = casereader_create_filter_weight (input, dict, NULL, NULL);
1867 taint = taint_clone (casereader_get_taint (input));
1868 casereader_split (input, &pass1, &pass2);
1870 common_precalc (cmd);
1871 for (; casereader_read (pass1, &c); case_destroy (&c))
1872 common_calc (dict, &c, cmd, exclude);
1873 casereader_destroy (pass1);
1874 common_postcalc (cmd);
1879 one_sample_precalc (cmd);
1880 for (; casereader_read (pass2, &c); case_destroy (&c))
1881 one_sample_calc (dict, &c, cmd, exclude);
1882 one_sample_postcalc (cmd);
1885 paired_precalc (cmd);
1886 for (; casereader_read (pass2, &c); case_destroy (&c))
1887 paired_calc (dict, &c, cmd, exclude);
1888 paired_postcalc (cmd);
1891 pass3 = casereader_clone (pass2);
1893 group_precalc (cmd);
1894 for (; casereader_read (pass2, &c); case_destroy (&c))
1895 group_calc (dict, &c, cmd, exclude);
1896 group_postcalc (cmd);
1898 levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables,
1902 casereader_destroy (pass2);
1904 if (!taint_has_tainted_successor (taint))
1906 ssbox_create (&stat_summary_box,cmd,mode);
1907 ssbox_populate (&stat_summary_box, dict, cmd);
1908 ssbox_finalize (&stat_summary_box);
1910 if ( mode == T_PAIRED )
1913 trbox_create (&test_results_box, cmd, mode);
1914 trbox_populate (&test_results_box, dict, cmd);
1915 trbox_finalize (&test_results_box);
1918 taint_destroy (taint);
1921 short which_group (const struct group_statistics *g,
1922 const struct group_properties *p);
1924 /* Return -1 if the id of a is less than b; +1 if greater than and
1927 compare_group_binary (const struct group_statistics *a,
1928 const struct group_statistics *b,
1929 const struct group_properties *p)
1934 if ( p->criterion == CMP_LE )
1936 /* less-than comparision is not meaningfull for
1937 alpha variables, so we shouldn't ever arrive here */
1938 assert (p->indep_width == 0 ) ;
1940 flag_a = ( a->id.f < p->v.critical_value ) ;
1941 flag_b = ( b->id.f < p->v.critical_value ) ;
1945 flag_a = which_group (a, p);
1946 flag_b = which_group (b, p);
1949 if (flag_a < flag_b )
1952 return (flag_a > flag_b);
1955 /* This is a degenerate case of a hash, since it can only return three possible
1956 values. It's really a comparison, being used as a hash function */
1959 hash_group_binary (const struct group_statistics *g,
1960 const struct group_properties *p)
1964 if ( p->criterion == CMP_LE )
1966 /* Not meaningfull to do a less than compare for alpha values ? */
1967 assert (p->indep_width == 0 ) ;
1968 flag = ( g->id.f < p->v.critical_value ) ;
1970 else if ( p->criterion == CMP_EQ)
1972 flag = which_group (g,p);
1980 /* return 0 if G belongs to group 0,
1981 1 if it belongs to group 1,
1982 2 if it belongs to neither group */
1984 which_group (const struct group_statistics *g,
1985 const struct group_properties *p)
1987 if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width))
1990 if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width))