1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include "algorithm.h"
41 #include "dcdflib/cdflib.h"
50 #include "value-labels.h"
54 #include "debug-print.h"
60 +missing=miss:!table/include/report;
61 +write[wr_]=none,cells,all;
62 +format=fmt:!labels/nolabels/novallabs,
65 tabl:!tables/notables,
68 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
70 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
76 /* Number of chi-square statistics. */
79 /* Number of symmetric statistics. */
82 /* Number of directional statistics. */
83 #define N_DIRECTIONAL 13
85 /* A single table entry for general mode. */
88 int table; /* Flattened table number. */
91 double freq; /* Frequency count. */
92 double *data; /* Crosstabulation table for integer mode. */
95 union value v[1]; /* Values. */
98 /* A crosstabulation. */
101 int nvar; /* Number of variables. */
102 double missing; /* Missing cases count. */
103 int ofs; /* Integer mode: Offset into sorted_tab[]. */
104 struct variable *v[2]; /* At least two variables; sorted by
105 larger indices first. */
108 /* Indexes into crosstab.v. */
115 /* General mode crosstabulation table. */
116 static struct hsh_table *gen_tab; /* Hash table. */
117 static int n_sorted_tab; /* Number of entries in sorted_tab. */
118 static struct table_entry **sorted_tab; /* Sorted table. */
120 /* Variables specifies on VARIABLES. */
121 static struct variable **variables;
122 static size_t variables_cnt;
125 static struct crosstab **xtab;
128 /* Integer or general mode? */
137 static int num_cells; /* Number of cells requested. */
138 static int cells[8]; /* Cells requested. */
139 static int expected; /* Nonzero if expected value is needed. */
142 static int write; /* One of WR_* that specifies the WRITE style. */
144 /* Command parsing info. */
145 static struct cmd_crosstabs cmd;
148 static struct pool *pl_tc; /* For table cells. */
149 static struct pool *pl_col; /* For column data. */
151 static int internal_cmd_crosstabs (void);
152 static void precalc (void);
153 static int calc_general (struct ccase *);
154 static int calc_integer (struct ccase *);
155 static void postcalc (void);
156 static void submit (struct tab_table *);
159 static void debug_print (void);
160 static void print_table_entries (struct table_entry **tab);
163 /* Parse and execute CROSSTABS, then clean up. */
167 int result = internal_cmd_crosstabs ();
170 pool_destroy (pl_tc);
171 pool_destroy (pl_col);
176 /* Parses and executes the CROSSTABS procedure. */
178 internal_cmd_crosstabs (void)
184 pl_tc = pool_create ();
185 pl_col = pool_create ();
187 lex_match_id ("CROSSTABS");
188 if (!parse_crosstabs (&cmd))
192 /* Needs variables. */
196 mode = variables ? INTEGER : GENERAL;
202 cmd.a_cells[CRS_CL_COUNT] = 1;
210 for (i = 0; i < CRS_CL_count; i++)
215 cmd.a_cells[CRS_CL_COUNT] = 1;
216 cmd.a_cells[CRS_CL_ROW] = 1;
217 cmd.a_cells[CRS_CL_COLUMN] = 1;
218 cmd.a_cells[CRS_CL_TOTAL] = 1;
220 if (cmd.a_cells[CRS_CL_ALL])
222 for (i = 0; i < CRS_CL_count; i++)
224 cmd.a_cells[CRS_CL_ALL] = 0;
226 cmd.a_cells[CRS_CL_NONE] = 0;
227 for (num_cells = i = 0; i < CRS_CL_count; i++)
230 if (i >= CRS_CL_EXPECTED)
232 cmd.a_cells[num_cells++] = i;
237 if (cmd.sbc_statistics)
242 for (i = 0; i < CRS_ST_count; i++)
243 if (cmd.a_statistics[i])
246 cmd.a_statistics[CRS_ST_CHISQ] = 1;
247 if (cmd.a_statistics[CRS_ST_ALL])
248 for (i = 0; i < CRS_ST_count; i++)
249 cmd.a_statistics[i] = 1;
253 if (cmd.miss == CRS_REPORT && mode == GENERAL)
255 msg (SE, _("Missing mode REPORT not allowed in general mode. "
256 "Assuming MISSING=TABLE."));
257 cmd.miss = CRS_TABLE;
261 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
262 cmd.a_write[CRS_WR_ALL] = 0;
263 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
265 msg (SE, _("Write mode ALL not allowed in general mode. "
266 "Assuming WRITE=CELLS."));
267 cmd.a_write[CRS_WR_CELLS] = 1;
270 && (cmd.a_write[CRS_WR_NONE]
271 + cmd.a_write[CRS_WR_ALL]
272 + cmd.a_write[CRS_WR_CELLS] == 0))
273 cmd.a_write[CRS_WR_CELLS] = 1;
274 if (cmd.a_write[CRS_WR_CELLS])
275 write = CRS_WR_CELLS;
276 else if (cmd.a_write[CRS_WR_ALL])
281 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
286 /* Parses the TABLES subcommand. */
288 crs_custom_tables (struct cmd_crosstabs *cmd unused)
290 struct var_set *var_set;
292 struct variable ***by = NULL;
297 /* Ensure that this is a TABLES subcommand. */
298 if (!lex_match_id ("TABLES")
299 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
304 if (variables != NULL)
305 var_set = var_set_create_from_array (variables, variables_cnt);
307 var_set = var_set_create_from_dict (default_dict);
308 assert (var_set != NULL);
312 by = xrealloc (by, sizeof *by * (n_by + 1));
313 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
314 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
315 PV_NO_DUPLICATE | PV_NO_SCRATCH))
320 if (!lex_match (T_BY))
324 lex_error (_("expecting BY"));
333 int *by_iter = xcalloc (sizeof *by_iter * n_by);
336 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
337 for (i = 0; i < nx; i++)
341 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
348 for (i = 0; i < n_by; i++)
349 x->v[i] = by[i][by_iter[i]];
355 for (i = n_by - 1; i >= 0; i--)
357 if (++by_iter[i] < by_nvar[i])
370 /* All return paths lead here. */
374 for (i = 0; i < n_by; i++)
380 var_set_destroy (var_set);
385 /* Parses the VARIABLES subcommand. */
387 crs_custom_variables (struct cmd_crosstabs *cmd unused)
391 msg (SE, _("VARIABLES must be specified before TABLES."));
399 int orig_nv = variables_cnt;
404 if (!parse_variables (default_dict, &variables, &variables_cnt,
405 (PV_APPEND | PV_NUMERIC
406 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
411 lex_error ("expecting `('");
416 if (!lex_force_int ())
418 min = lex_integer ();
423 if (!lex_force_int ())
425 max = lex_integer ();
428 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
436 lex_error ("expecting `)'");
441 for (i = orig_nv; i < variables_cnt; i++)
443 variables[i]->p.crs.min = min;
444 variables[i]->p.crs.max = max + 1.;
445 variables[i]->p.crs.count = max - min + 1;
464 printf ("CROSSTABS\n");
466 if (variables != NULL)
470 printf ("\t/VARIABLES=");
471 for (i = 0; i < variables_cnt; i++)
473 struct variable *v = variables[i];
475 printf ("%s ", v->name);
476 if (i < variables_cnt - 1)
478 struct variable *nv = variables[i + 1];
480 if (v->p.crs.min == nv->p.crs.min
481 && v->p.crs.max == nv->p.crs.max)
484 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
492 printf ("\t/TABLES=");
493 for (i = 0; i < nxtab; i++)
495 struct crosstab *x = xtab[i];
500 for (j = 0; j < x->nvar; j++)
504 printf ("%s", x->v[j]->name);
510 #endif /* DEBUGGING */
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, void *);
515 static unsigned hash_table_entry (const void *, void *);
517 /* Set up the crosstabulation tables for processing. */
523 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
533 for (i = 0; i < nxtab; i++)
535 struct crosstab *x = xtab[i];
540 x->ofs = n_sorted_tab;
542 for (j = 2; j < x->nvar; j++)
543 count *= x->v[j - 2]->p.crs.count;
545 sorted_tab = xrealloc (sorted_tab,
546 sizeof *sorted_tab * (n_sorted_tab + count));
547 v = local_alloc (sizeof *v * x->nvar);
548 for (j = 2; j < x->nvar; j++)
549 v[j] = x->v[j]->p.crs.min;
550 for (j = 0; j < count; j++)
552 struct table_entry *te;
555 te = sorted_tab[n_sorted_tab++]
556 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
560 const int mat_size = (x->v[0]->p.crs.count
561 * x->v[1]->p.crs.count);
564 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
565 for (m = 0; m < mat_size; m++)
569 for (k = 2; k < x->nvar; k++)
571 for (k = 2; k < x->nvar; k++)
572 if (++v[k] >= x->v[k]->p.crs.max)
573 v[k] = x->v[k]->p.crs.min;
580 sorted_tab = xrealloc (sorted_tab,
581 sizeof *sorted_tab * (n_sorted_tab + 1));
582 sorted_tab[n_sorted_tab] = NULL;
586 /* Form crosstabulations for general mode. */
588 calc_general (struct ccase *c)
591 double weight = dict_get_case_weight (default_dict, c);
593 /* Flattened current table index. */
596 for (t = 0; t < nxtab; t++)
598 struct crosstab *x = xtab[t];
599 const size_t entry_size = (sizeof (struct table_entry)
600 + sizeof (union value) * (x->nvar - 1));
601 struct table_entry *te = local_alloc (entry_size);
603 /* Construct table entry for the current record and table. */
609 for (j = 0; j < x->nvar; j++)
611 if ((cmd.miss == CRS_TABLE
612 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
613 || (cmd.miss == CRS_INCLUDE
614 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
616 x->missing += weight;
620 if (x->v[j]->type == NUMERIC)
621 te->v[j].f = c->data[x->v[j]->fv].f;
624 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
626 /* Necessary in order to simplify comparisons. */
627 memset (&te->v[j].s[x->v[j]->width], 0,
628 sizeof (union value) - x->v[j]->width);
633 /* Add record to hash table. */
635 struct table_entry **tepp
636 = (struct table_entry **) hsh_probe (gen_tab, te);
639 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
642 memcpy (tep, te, entry_size);
647 (*tepp)->u.freq += weight;
658 calc_integer (struct ccase *c)
661 double weight = dict_get_case_weight (default_dict, c);
663 /* Flattened current table index. */
666 for (t = 0; t < nxtab; t++)
668 struct crosstab *x = xtab[t];
673 for (i = 0; i < x->nvar; i++)
675 struct variable *const v = x->v[i];
676 double value = c->data[v->fv].f;
678 /* Note that the first test also rules out SYSMIS. */
679 if ((value < v->p.crs.min || value >= v->p.crs.max)
680 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
682 x->missing += weight;
688 ofs += fact * ((int) value - v->p.crs.min);
689 fact *= v->p.crs.count;
694 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
695 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
696 const int col_dim = x->v[COL_VAR]->p.crs.count;
698 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
708 /* Print out all table entries in NULL-terminated TAB for use by a
709 debugger (a person, not a program). */
711 print_table_entries (struct table_entry **tab)
713 printf ("raw crosstabulation data:\n");
716 const struct crosstab *x = xtab[(*tab)->table];
719 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
720 for (i = 0; i < x->nvar; i++)
724 printf ("%s:", x->v[i]->name);
726 if (x->v[i]->type == NUMERIC)
727 printf ("%g", (*tab)->v[i].f);
729 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
737 /* Compare the table_entry's at A and B and return a strcmp()-type
740 compare_table_entry (const void *a_, const void *b_, void *foo unused)
742 const struct table_entry *a = a_;
743 const struct table_entry *b = b_;
745 if (a->table > b->table)
747 else if (a->table < b->table)
751 const struct crosstab *x = xtab[a->table];
754 for (i = x->nvar - 1; i >= 0; i--)
755 if (x->v[i]->type == NUMERIC)
757 const double diffnum = a->v[i].f - b->v[i].f;
760 else if (diffnum > 0)
765 assert (x->v[i]->type == ALPHA);
767 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
777 /* Calculate a hash value from table_entry PA. */
779 hash_table_entry (const void *pa, void *foo unused)
781 const struct table_entry *a = pa;
782 unsigned long hash = a->table;
785 /* Hash formula from _SPSS Statistical Algorithms_. */
786 for (i = 0; i < xtab[a->table]->nvar; i++)
788 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
789 hash ^= a->v[i].hash[0];
790 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
791 hash ^= a->v[i].hash[1];
798 /* Post-data reading calculations. */
800 static struct table_entry **find_pivot_extent (struct table_entry **,
801 int *cnt, int pivot);
802 static void enum_var_values (struct table_entry **entries, int entry_cnt,
804 union value **values, int *value_cnt);
805 static void output_pivot_table (struct table_entry **, struct table_entry **,
806 double **, double **, double **,
807 int *, int *, int *);
808 static void make_summary_table (void);
815 n_sorted_tab = hsh_count (gen_tab);
816 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
818 print_table_entries (sorted_tab);
822 make_summary_table ();
824 /* Identify all the individual crosstabulation tables, and deal with
827 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
828 int pc = n_sorted_tab; /* Pivot count. */
830 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
831 int maxrows = 0, maxcols = 0, maxcells = 0;
835 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
839 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
840 &maxrows, &maxcols, &maxcells);
849 hsh_destroy (gen_tab);
852 static void insert_summary (struct tab_table *, int tab_index, double valid);
854 /* Output a table summarizing the cases processed. */
856 make_summary_table (void)
858 struct tab_table *summary;
860 struct table_entry **pb = sorted_tab, **pe;
861 int pc = n_sorted_tab;
864 summary = tab_create (7, 3 + nxtab, 1);
865 tab_title (summary, 0, _("Summary."));
866 tab_headers (summary, 1, 0, 3, 0);
867 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
868 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
869 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
870 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
871 tab_hline (summary, TAL_1, 1, 6, 1);
872 tab_hline (summary, TAL_1, 1, 6, 2);
873 tab_vline (summary, TAL_1, 3, 1, 1);
874 tab_vline (summary, TAL_1, 5, 1, 1);
878 for (i = 0; i < 3; i++)
880 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
881 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
884 tab_offset (summary, 0, 3);
890 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
894 while (cur_tab < (*pb)->table)
895 insert_summary (summary, cur_tab++, 0.);
898 for (valid = 0.; pb < pe; pb++)
899 valid += (*pb)->u.freq;
902 const struct crosstab *const x = xtab[(*pb)->table];
903 const int n_cols = x->v[COL_VAR]->p.crs.count;
904 const int n_rows = x->v[ROW_VAR]->p.crs.count;
905 const int count = n_cols * n_rows;
907 for (valid = 0.; pb < pe; pb++)
909 const double *data = (*pb)->u.data;
912 for (i = 0; i < count; i++)
916 insert_summary (summary, cur_tab++, valid);
921 while (cur_tab < nxtab)
922 insert_summary (summary, cur_tab++, 0.);
927 /* Inserts a line into T describing the crosstabulation at index
928 TAB_INDEX, which has VALID valid observations. */
930 insert_summary (struct tab_table *t, int tab_index, double valid)
932 struct crosstab *x = xtab[tab_index];
934 tab_hline (t, TAL_1, 0, 6, 0);
936 /* Crosstabulation name. */
938 char *buf = local_alloc (128 * x->nvar);
942 for (i = 0; i < x->nvar; i++)
945 cp = stpcpy (cp, " * ");
947 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
949 tab_text (t, 0, 0, TAB_LEFT, buf);
954 /* Counts and percentages. */
964 for (i = 0; i < 3; i++)
966 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
967 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
978 static struct tab_table *table; /* Crosstabulation table. */
979 static struct tab_table *chisq; /* Chi-square table. */
980 static struct tab_table *sym; /* Symmetric measures table. */
981 static struct tab_table *risk; /* Risk estimate table. */
982 static struct tab_table *direct; /* Directional measures table. */
985 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
987 /* Column values, number of columns. */
988 static union value *cols;
991 /* Row values, number of rows. */
992 static union value *rows;
995 /* Number of statistically interesting columns/rows (columns/rows with
997 static int ns_cols, ns_rows;
999 /* Crosstabulation. */
1000 static struct crosstab *x;
1002 /* Number of variables from the crosstabulation to consider. This is
1003 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1006 /* Matrix contents. */
1007 static double *mat; /* Matrix proper. */
1008 static double *row_tot; /* Row totals. */
1009 static double *col_tot; /* Column totals. */
1010 static double W; /* Grand total. */
1012 static void display_dimensions (struct tab_table *, int first_difference,
1013 struct table_entry *);
1014 static void display_crosstabulation (void);
1015 static void display_chisq (void);
1016 static void display_symmetric (void);
1017 static void display_risk (void);
1018 static void display_directional (void);
1019 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1020 static void table_value_missing (struct tab_table *table, int c, int r,
1021 unsigned char opt, const union value *v,
1022 const struct variable *var);
1023 static void delete_missing (void);
1025 /* Output pivot table beginning at PB and continuing until PE,
1026 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1027 hold *MAXROWS entries. */
1029 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1030 double **matp, double **row_totp, double **col_totp,
1031 int *maxrows, int *maxcols, int *maxcells)
1034 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1035 int tc = pe - pb; /* Table count. */
1037 /* Table entry for header comparison. */
1038 struct table_entry *cmp;
1040 x = xtab[(*pb)->table];
1041 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1043 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1045 /* Crosstabulation table initialization. */
1048 table = tab_create (nvar + n_cols,
1049 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1050 tab_headers (table, nvar - 1, 0, 2, 0);
1052 /* First header line. */
1053 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1054 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1056 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1058 /* Second header line. */
1062 for (i = 2; i < nvar; i++)
1063 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1064 TAB_RIGHT | TAT_TITLE,
1065 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1066 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1067 x->v[ROW_VAR]->name);
1068 for (i = 0; i < n_cols; i++)
1069 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1071 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1074 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1075 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1079 char *title = local_alloc (x->nvar * 64 + 128);
1083 if (cmd.pivot == CRS_PIVOT)
1084 for (i = 0; i < nvar; i++)
1087 cp = stpcpy (cp, " by ");
1088 cp = stpcpy (cp, x->v[i]->name);
1092 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1093 for (i = 2; i < nvar; i++)
1095 char buf[64], *bufp;
1100 cp = stpcpy (cp, x->v[i]->name);
1102 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1103 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1105 cp = stpcpy (cp, bufp);
1109 cp = stpcpy (cp, " [");
1110 for (i = 0; i < num_cells; i++)
1118 static const struct tuple cell_names[] =
1120 {CRS_CL_COUNT, N_("count")},
1121 {CRS_CL_ROW, N_("row %")},
1122 {CRS_CL_COLUMN, N_("column %")},
1123 {CRS_CL_TOTAL, N_("total %")},
1124 {CRS_CL_EXPECTED, N_("expected")},
1125 {CRS_CL_RESIDUAL, N_("residual")},
1126 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1127 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1131 const struct tuple *t;
1133 for (t = cell_names; t->value != cells[i]; t++)
1134 assert (t->value != -1);
1136 cp = stpcpy (cp, ", ");
1137 cp = stpcpy (cp, gettext (t->name));
1141 tab_title (table, 0, title);
1145 tab_offset (table, 0, 2);
1150 /* Chi-square table initialization. */
1151 if (cmd.a_statistics[CRS_ST_CHISQ])
1153 chisq = tab_create (6 + (nvar - 2),
1154 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1155 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1157 tab_title (chisq, 0, "Chi-square tests.");
1159 tab_offset (chisq, nvar - 2, 0);
1160 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1161 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1162 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1163 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1164 _("Asymp. Sig. (2-sided)"));
1165 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1166 _("Exact. Sig. (2-sided)"));
1167 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1168 _("Exact. Sig. (1-sided)"));
1170 tab_offset (chisq, 0, 1);
1175 /* Symmetric measures. */
1176 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1177 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1178 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1179 || cmd.a_statistics[CRS_ST_KAPPA])
1181 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1182 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1183 tab_title (sym, 0, "Symmetric measures.");
1185 tab_offset (sym, nvar - 2, 0);
1186 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1187 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1188 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1189 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1190 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1191 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1192 tab_offset (sym, 0, 1);
1197 /* Risk estimate. */
1198 if (cmd.a_statistics[CRS_ST_RISK])
1200 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1201 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1202 tab_title (risk, 0, "Risk estimate.");
1204 tab_offset (risk, nvar - 2, 0);
1205 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1206 _(" 95%% Confidence Interval"));
1207 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1208 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1209 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1210 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1211 tab_hline (risk, TAL_1, 2, 3, 1);
1212 tab_vline (risk, TAL_1, 2, 0, 1);
1213 tab_offset (risk, 0, 2);
1218 /* Directional measures. */
1219 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1220 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1222 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1223 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1224 tab_title (direct, 0, "Directional measures.");
1226 tab_offset (direct, nvar - 2, 0);
1227 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1228 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1229 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1230 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1231 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1232 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1233 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1234 tab_offset (direct, 0, 1);
1241 /* Find pivot subtable if applicable. */
1242 te = find_pivot_extent (tb, &tc, 0);
1246 /* Find all the row variable values. */
1247 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1249 /* Allocate memory space for the column and row totals. */
1250 if (n_rows > *maxrows)
1252 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1253 row_tot = *row_totp;
1256 if (n_cols > *maxcols)
1258 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1259 col_tot = *col_totp;
1263 /* Allocate table space for the matrix. */
1264 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1265 tab_realloc (table, -1,
1266 max (tab_nr (table) + (n_rows + 1) * num_cells,
1267 tab_nr (table) * (pe - pb) / (te - tb)));
1269 if (mode == GENERAL)
1271 /* Allocate memory space for the matrix. */
1272 if (n_cols * n_rows > *maxcells)
1274 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1275 *maxcells = n_cols * n_rows;
1280 /* Build the matrix and calculate column totals. */
1282 union value *cur_col = cols;
1283 union value *cur_row = rows;
1285 double *cp = col_tot;
1286 struct table_entry **p;
1289 for (p = &tb[0]; p < te; p++)
1291 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1295 for (; cur_row < &rows[n_rows]; cur_row++)
1301 mp = &mat[cur_col - cols];
1304 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1311 *cp += *mp = (*p)->u.freq;
1316 /* Zero out the rest of the matrix. */
1317 for (; cur_row < &rows[n_rows]; cur_row++)
1323 if (cur_col < &cols[n_cols])
1325 const int rem_cols = n_cols - (cur_col - cols);
1328 for (c = 0; c < rem_cols; c++)
1330 mp = &mat[cur_col - cols];
1331 for (r = 0; r < n_rows; r++)
1333 for (c = 0; c < rem_cols; c++)
1335 mp += n_cols - rem_cols;
1343 double *tp = col_tot;
1345 assert (mode == INTEGER);
1346 mat = (*tb)->u.data;
1349 /* Calculate column totals. */
1350 for (c = 0; c < n_cols; c++)
1353 double *cp = &mat[c];
1355 for (r = 0; r < n_rows; r++)
1356 cum += cp[r * n_cols];
1364 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1365 ns_cols += *cp != 0.;
1368 /* Calculate row totals. */
1371 double *rp = row_tot;
1374 for (ns_rows = 0, r = n_rows; r--; )
1377 for (c = n_cols; c--; )
1385 /* Calculate grand total. */
1391 if (n_rows < n_cols)
1392 tp = row_tot, n = n_rows;
1394 tp = col_tot, n = n_cols;
1401 /* Print the matrix. */
1405 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1406 for (i = 2; i < nvar; i++)
1407 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1410 for (c = 0; c < n_cols; c++)
1411 printf ("%4g", cols[c].f);
1413 for (r = 0; r < n_rows; r++)
1415 printf ("%4g:", rows[r].f);
1416 for (c = 0; c < n_cols; c++)
1417 printf ("%4g", mat[c + r * n_cols]);
1418 printf ("%4g", row_tot[r]);
1422 for (c = 0; c < n_cols; c++)
1423 printf ("%4g", col_tot[c]);
1429 /* Find the first variable that differs from the last subtable,
1430 then display the values of the dimensioning variables for
1431 each table that needs it. */
1433 int first_difference = nvar - 1;
1436 for (; ; first_difference--)
1438 assert (first_difference >= 2);
1439 if (memcmp (&cmp->v[first_difference],
1440 &(*tb)->v[first_difference], sizeof *cmp->v))
1446 display_dimensions (table, first_difference, *tb);
1448 display_dimensions (chisq, first_difference, *tb);
1450 display_dimensions (sym, first_difference, *tb);
1452 display_dimensions (risk, first_difference, *tb);
1454 display_dimensions (direct, first_difference, *tb);
1458 display_crosstabulation ();
1459 if (cmd.miss == CRS_REPORT)
1464 display_symmetric ();
1468 display_directional ();
1479 tab_resize (chisq, 4 + (nvar - 2), -1);
1490 /* Delete missing rows and columns for statistical analysis when
1493 delete_missing (void)
1498 for (r = 0; r < n_rows; r++)
1499 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1503 for (c = 0; c < n_cols; c++)
1504 mat[c + r * n_cols] = 0.;
1512 for (c = 0; c < n_cols; c++)
1513 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1517 for (r = 0; r < n_rows; r++)
1518 mat[c + r * n_cols] = 0.;
1524 /* Prepare table T for submission, and submit it. */
1526 submit (struct tab_table *t)
1533 tab_resize (t, -1, 0);
1534 if (tab_nr (t) == tab_t (t))
1539 tab_offset (t, 0, 0);
1541 for (i = 2; i < nvar; i++)
1542 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1543 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1544 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1545 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1547 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1549 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1550 tab_dim (t, crosstabs_dim);
1554 /* Sets the widths of all the columns and heights of all the rows in
1555 table T for driver D. */
1557 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1561 /* Width of a numerical column. */
1562 int c = outp_string_width (d, "0.000000");
1563 if (cmd.miss == CRS_REPORT)
1564 c += outp_string_width (d, "M");
1566 /* Set width for header columns. */
1569 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1571 if (w < d->prop_em_width * 8)
1572 w = d->prop_em_width * 8;
1574 if (w > d->prop_em_width * 15)
1575 w = d->prop_em_width * 15;
1577 for (i = 0; i < t->l; i++)
1581 for (i = t->l; i < t->nc; i++)
1584 for (i = 0; i < t->nr; i++)
1585 t->h[i] = tab_natural_height (t, d, i);
1588 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1589 int *cnt, int pivot);
1590 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1591 int *cnt, int pivot);
1593 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1595 static struct table_entry **
1596 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1598 return (mode == GENERAL
1599 ? find_pivot_extent_general (tp, cnt, pivot)
1600 : find_pivot_extent_integer (tp, cnt, pivot));
1603 /* Find the extent of a region in TP that contains one table. If
1604 PIVOT != 0 that means a set of table entries with identical table
1605 number; otherwise they also have to have the same values for every
1606 dimension after the row and column dimensions. The table that is
1607 searched starts at TP and has length CNT. Returns the first entry
1608 after the last one in the table; sets *CNT to the number of
1609 remaining values. If there are no entries in TP at all, returns
1610 NULL. A yucky interface, admittedly, but it works. */
1611 static struct table_entry **
1612 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1614 struct table_entry *fp = *tp;
1619 x = xtab[(*tp)->table];
1627 if ((*tp)->table != fp->table)
1632 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1639 /* Integer mode correspondent to find_pivot_extent_general(). This
1640 could be optimized somewhat, but I just don't give a crap about
1641 CROSSTABS performance in integer mode, which is just a
1642 CROSSTABS wart as far as I'm concerned.
1644 That said, feel free to send optimization patches to me. */
1645 static struct table_entry **
1646 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1648 struct table_entry *fp = *tp;
1653 x = xtab[(*tp)->table];
1661 if ((*tp)->table != fp->table)
1666 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1673 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1674 result. WIDTH_ points to an int which is either 0 for a
1675 numeric value or a string width for a string value. */
1677 compare_value (const void *a_, const void *b_, void *width_)
1679 const union value *a = a_;
1680 const union value *b = b_;
1681 const int *pwidth = width_;
1682 const int width = *pwidth;
1685 return (a->f < b->f) ? -1 : (a->f > b->f);
1687 return strncmp (a->s, b->s, width);
1690 /* Given an array of ENTRY_CNT table_entry structures starting at
1691 ENTRIES, creates a sorted list of the values that the variable
1692 with index VAR_INDEX takes on. The values are returned as a
1693 malloc()'darray stored in *VALUES, with the number of values
1694 stored in *VALUE_CNT.
1697 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1698 union value **values, int *value_cnt)
1700 if (mode == GENERAL)
1702 int width = xtab[(*entries)->table]->v[var_idx]->width;
1705 *values = xmalloc (sizeof **values * entry_cnt);
1706 for (i = 0; i < entry_cnt; i++)
1707 (*values)[i] = entries[i]->v[var_idx];
1708 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1709 compare_value, &width);
1713 struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs;
1716 assert (mode == INTEGER);
1717 *values = xmalloc (sizeof **values * crs->count);
1718 for (i = 0; i < crs->count; i++)
1719 (*values)[i].f = i + crs->min;
1720 *value_cnt = crs->count;
1724 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1725 from V, displayed with print format spec from variable VAR. When
1726 in REPORT missing-value mode, missing values have an M appended. */
1728 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1729 const union value *v, const struct variable *var)
1731 struct len_string s;
1733 const char *label = val_labs_find (var->val_labs, *v);
1736 tab_text (table, c, r, TAB_LEFT, label);
1740 s.length = var->print.w;
1741 s.string = tab_alloc (table, s.length + 1);
1742 data_out (s.string, &var->print, v);
1743 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1744 s.string[s.length++] = 'M';
1745 while (s.length && *s.string == ' ')
1750 tab_raw (table, c, r, opt, &s);
1753 /* Draws a line across TABLE at the current row to indicate the most
1754 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1755 that changed, and puts the values that changed into the table. TB
1756 and X must be the corresponding table_entry and crosstab,
1759 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1761 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1763 for (; first_difference >= 2; first_difference--)
1764 table_value_missing (table, nvar - first_difference - 1, 0,
1765 TAB_RIGHT, &tb->v[first_difference],
1766 x->v[first_difference]);
1769 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1771 float_M_suffix (struct tab_table *table, int c, int r, double v)
1773 static const struct fmt_spec f = {FMT_F, 8, 0};
1774 struct len_string s;
1777 s.string = tab_alloc (table, 9);
1779 data_out (s.string, &f, (union value *) &v);
1780 while (*s.string == ' ')
1785 tab_raw (table, c, r, TAB_RIGHT, &s);
1788 /* Displays the crosstabulation table. */
1790 display_crosstabulation (void)
1795 for (r = 0; r < n_rows; r++)
1796 table_value_missing (table, nvar - 2, r * num_cells,
1797 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1799 tab_text (table, nvar - 2, n_rows * num_cells,
1800 TAB_LEFT, _("Total"));
1802 /* Put in the actual cells. */
1807 tab_offset (table, nvar - 1, -1);
1808 for (r = 0; r < n_rows; r++)
1811 tab_hline (table, TAL_1, -1, n_cols, 0);
1812 for (c = 0; c < n_cols; c++)
1814 double expected_value;
1817 expected_value = row_tot[r] * col_tot[c] / W;
1818 for (i = 0; i < num_cells; i++)
1828 v = *mp / row_tot[r] * 100.;
1831 v = *mp / col_tot[c] * 100.;
1836 case CRS_CL_EXPECTED:
1839 case CRS_CL_RESIDUAL:
1840 v = *mp - expected_value;
1842 case CRS_CL_SRESIDUAL:
1843 v = (*mp - expected_value) / sqrt (expected_value);
1845 case CRS_CL_ASRESIDUAL:
1846 v = ((*mp - expected_value)
1847 / sqrt (expected_value
1848 * (1. - row_tot[r] / W)
1849 * (1. - col_tot[c] / W)));
1855 if (cmd.miss == CRS_REPORT
1856 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1857 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1858 float_M_suffix (table, c, i, v);
1860 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1866 tab_offset (table, -1, tab_row (table) + num_cells);
1874 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1875 for (r = 0; r < n_rows; r++)
1876 for (i = 0; i < num_cells; i++)
1889 v = row_tot[r] / W * 100.;
1892 v = row_tot[r] / W * 100.;
1894 case CRS_CL_EXPECTED:
1895 case CRS_CL_RESIDUAL:
1896 case CRS_CL_SRESIDUAL:
1897 case CRS_CL_ASRESIDUAL:
1904 if (cmd.miss == CRS_REPORT
1905 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1906 float_M_suffix (table, n_cols, 0, v);
1908 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1910 tab_next_row (table);
1914 /* Column totals, grand total. */
1919 tab_hline (table, TAL_1, -1, n_cols, 0);
1920 for (c = 0; c <= n_cols; c++)
1922 double ct = c < n_cols ? col_tot[c] : W;
1925 for (i = j = 0; i < num_cells; i++)
1943 case CRS_CL_EXPECTED:
1944 case CRS_CL_RESIDUAL:
1945 case CRS_CL_SRESIDUAL:
1946 case CRS_CL_ASRESIDUAL:
1952 if (cmd.miss == CRS_REPORT && c < n_cols
1953 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1954 float_M_suffix (table, c, j, v);
1956 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1962 tab_offset (table, -1, tab_row (table) + j);
1965 tab_offset (table, 0, -1);
1968 static void calc_r (double *X, double *Y, double *, double *, double *);
1969 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1971 /* Display chi-square statistics. */
1973 display_chisq (void)
1975 static const char *chisq_stats[N_CHISQ] =
1977 N_("Pearson Chi-Square"),
1978 N_("Likelihood Ratio"),
1979 N_("Fisher's Exact Test"),
1980 N_("Continuity Correction"),
1981 N_("Linear-by-Linear Association"),
1983 double chisq_v[N_CHISQ];
1984 double fisher1, fisher2;
1990 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1992 tab_offset (chisq, nvar - 2, -1);
1994 for (i = 0; i < N_CHISQ; i++)
1996 if ((i != 2 && chisq_v[i] == SYSMIS)
1997 || (i == 2 && fisher1 == SYSMIS))
2001 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2004 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2005 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2006 tab_float (chisq, 3, 0, TAB_RIGHT,
2007 chisq_sig (chisq_v[i], df[i]), 8, 3);
2012 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2013 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2015 tab_next_row (chisq);
2018 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2019 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2020 tab_next_row (chisq);
2022 tab_offset (chisq, 0, -1);
2025 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2026 double[N_SYMMETRIC]);
2028 /* Display symmetric measures. */
2030 display_symmetric (void)
2032 static const char *categories[] =
2034 N_("Nominal by Nominal"),
2035 N_("Ordinal by Ordinal"),
2036 N_("Interval by Interval"),
2037 N_("Measure of Agreement"),
2040 static const char *stats[N_SYMMETRIC] =
2044 N_("Contingency Coefficient"),
2045 N_("Kendall's tau-b"),
2046 N_("Kendall's tau-c"),
2048 N_("Spearman Correlation"),
2053 static const int stats_categories[N_SYMMETRIC] =
2055 0, 0, 0, 1, 1, 1, 1, 2, 3,
2059 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2062 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2065 tab_offset (sym, nvar - 2, -1);
2067 for (i = 0; i < N_SYMMETRIC; i++)
2069 if (sym_v[i] == SYSMIS)
2072 if (stats_categories[i] != last_cat)
2074 last_cat = stats_categories[i];
2075 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2078 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2079 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2080 if (sym_ase[i] != SYSMIS)
2081 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2082 if (sym_t[i] != SYSMIS)
2083 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2084 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2088 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2089 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2092 tab_offset (sym, 0, -1);
2095 static int calc_risk (double[], double[], double[], union value *);
2097 /* Display risk estimate. */
2102 double risk_v[3], lower[3], upper[3];
2106 if (!calc_risk (risk_v, upper, lower, c))
2109 tab_offset (risk, nvar - 2, -1);
2111 for (i = 0; i < 3; i++)
2113 if (risk_v[i] == SYSMIS)
2119 if (x->v[COL_VAR]->type == NUMERIC)
2120 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2121 x->v[COL_VAR]->name, c[0].f, c[1].f);
2123 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2124 x->v[COL_VAR]->name,
2125 x->v[COL_VAR]->width, c[0].s,
2126 x->v[COL_VAR]->width, c[1].s);
2130 if (x->v[ROW_VAR]->type == NUMERIC)
2131 sprintf (buf, _("For cohort %s = %g"),
2132 x->v[ROW_VAR]->name, rows[i - 1].f);
2134 sprintf (buf, _("For cohort %s = %.*s"),
2135 x->v[ROW_VAR]->name,
2136 x->v[ROW_VAR]->width, rows[i - 1].s);
2140 tab_text (risk, 0, 0, TAB_LEFT, buf);
2141 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2142 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2143 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2144 tab_next_row (risk);
2147 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2148 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2149 tab_next_row (risk);
2151 tab_offset (risk, 0, -1);
2154 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2155 double[N_DIRECTIONAL]);
2157 /* Display directional measures. */
2159 display_directional (void)
2161 static const char *categories[] =
2163 N_("Nominal by Nominal"),
2164 N_("Ordinal by Ordinal"),
2165 N_("Nominal by Interval"),
2168 static const char *stats[] =
2171 N_("Goodman and Kruskal tau"),
2172 N_("Uncertainty Coefficient"),
2177 static const char *types[] =
2184 static const int stats_categories[N_DIRECTIONAL] =
2186 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2189 static const int stats_stats[N_DIRECTIONAL] =
2191 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2194 static const int stats_types[N_DIRECTIONAL] =
2196 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2199 static const int *stats_lookup[] =
2206 static const char **stats_names[] =
2218 double direct_v[N_DIRECTIONAL];
2219 double direct_ase[N_DIRECTIONAL];
2220 double direct_t[N_DIRECTIONAL];
2224 if (!calc_directional (direct_v, direct_ase, direct_t))
2227 tab_offset (direct, nvar - 2, -1);
2229 for (i = 0; i < N_DIRECTIONAL; i++)
2231 if (direct_v[i] == SYSMIS)
2237 for (j = 0; j < 3; j++)
2238 if (last[j] != stats_lookup[j][i])
2241 tab_hline (direct, TAL_1, j, 6, 0);
2246 int k = last[j] = stats_lookup[j][i];
2251 string = x->v[0]->name;
2253 string = x->v[1]->name;
2255 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2256 gettext (stats_names[j][k]), string);
2261 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2262 if (direct_ase[i] != SYSMIS)
2263 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2264 if (direct_t[i] != SYSMIS)
2265 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2266 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2267 tab_next_row (direct);
2270 tab_offset (direct, 0, -1);
2273 /* Statistical calculations. */
2275 /* Returns the value of the gamma (factorial) function for an integer
2278 gamma_int (double x)
2283 for (i = 2; i < x; i++)
2288 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2290 static inline double
2291 Pr (int a, int b, int c, int d)
2293 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2294 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2295 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2296 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2297 / gamma_int (a + b + c + d + 1.));
2300 /* Swap the contents of A and B. */
2302 swap (int *a, int *b)
2309 /* Calculate significance for Fisher's exact test as specified in
2310 _SPSS Statistical Algorithms_, Appendix 5. */
2312 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2316 if (min (c, d) < min (a, b))
2317 swap (&a, &c), swap (&b, &d);
2318 if (min (b, d) < min (a, c))
2319 swap (&a, &b), swap (&c, &d);
2323 swap (&a, &b), swap (&c, &d);
2325 swap (&a, &c), swap (&b, &d);
2329 for (x = 0; x <= a; x++)
2330 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2332 *fisher2 = *fisher1;
2333 for (x = 1; x <= b; x++)
2334 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2337 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2338 columns with values COLS and N_ROWS rows with values ROWS. Values
2339 in the matrix sum to W. */
2341 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2342 double *fisher1, double *fisher2)
2346 chisq[0] = chisq[1] = 0.;
2347 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2348 *fisher1 = *fisher2 = SYSMIS;
2350 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2352 if (ns_rows <= 1 || ns_cols <= 1)
2354 chisq[0] = chisq[1] = SYSMIS;
2358 for (r = 0; r < n_rows; r++)
2359 for (c = 0; c < n_cols; c++)
2361 const double expected = row_tot[r] * col_tot[c] / W;
2362 const double freq = mat[n_cols * r + c];
2363 const double residual = freq - expected;
2366 chisq[0] += residual * residual / expected;
2368 chisq[1] += freq * log (expected / freq);
2379 /* Calculate Yates and Fisher exact test. */
2380 if (ns_cols == 2 && ns_rows == 2)
2382 double f11, f12, f21, f22;
2388 for (i = j = 0; i < n_cols; i++)
2389 if (col_tot[i] != 0.)
2398 f11 = mat[nz_cols[0]];
2399 f12 = mat[nz_cols[1]];
2400 f21 = mat[nz_cols[0] + n_cols];
2401 f22 = mat[nz_cols[1] + n_cols];
2406 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2409 chisq[3] = (W * x * x
2410 / (f11 + f12) / (f21 + f22)
2411 / (f11 + f21) / (f12 + f22));
2419 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2420 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2423 /* Calculate Mantel-Haenszel. */
2424 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2426 double r, ase_0, ase_1;
2427 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2429 chisq[4] = (W - 1.) * r * r;
2434 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2435 ASE_1, and ase_0 into ASE_0. The row and column values must be
2436 passed in X and Y. */
2438 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2440 double SX, SY, S, T;
2442 double sum_XYf, sum_X2Y2f;
2443 double sum_Xr, sum_X2r;
2444 double sum_Yc, sum_Y2c;
2447 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2448 for (j = 0; j < n_cols; j++)
2450 double fij = mat[j + i * n_cols];
2451 double product = X[i] * Y[j];
2452 double temp = fij * product;
2454 sum_X2Y2f += temp * product;
2457 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2459 sum_Xr += X[i] * row_tot[i];
2460 sum_X2r += X[i] * X[i] * row_tot[i];
2464 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2466 sum_Yc += Y[i] * col_tot[i];
2467 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2471 S = sum_XYf - sum_Xr * sum_Yc / W;
2472 SX = sum_X2r - sum_Xr * sum_Xr / W;
2473 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2476 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2481 for (s = c = 0., i = 0; i < n_rows; i++)
2482 for (j = 0; j < n_cols; j++)
2484 double Xresid, Yresid;
2487 Xresid = X[i] - Xbar;
2488 Yresid = Y[j] - Ybar;
2489 temp = (T * Xresid * Yresid
2491 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2492 y = mat[j + i * n_cols] * temp * temp - c;
2497 *ase_1 = sqrt (s) / (T * T);
2501 static double somers_d_v[3];
2502 static double somers_d_ase[3];
2503 static double somers_d_t[3];
2505 /* Calculate symmetric statistics and their asymptotic standard
2506 errors. Returns 0 if none could be calculated. */
2508 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2509 double t[N_SYMMETRIC])
2511 int q = min (ns_rows, ns_cols);
2520 for (i = 0; i < N_SYMMETRIC; i++)
2521 v[i] = ase[i] = t[i] = SYSMIS;
2524 /* Phi, Cramer's V, contingency coefficient. */
2525 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2527 double Xp = 0.; /* Pearson chi-square. */
2532 for (r = 0; r < n_rows; r++)
2533 for (c = 0; c < n_cols; c++)
2535 const double expected = row_tot[r] * col_tot[c] / W;
2536 const double freq = mat[n_cols * r + c];
2537 const double residual = freq - expected;
2540 Xp += residual * residual / expected;
2544 if (cmd.a_statistics[CRS_ST_PHI])
2546 v[0] = sqrt (Xp / W);
2547 v[1] = sqrt (Xp / (W * (q - 1)));
2549 if (cmd.a_statistics[CRS_ST_CC])
2550 v[2] = sqrt (Xp / (Xp + W));
2553 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2554 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2559 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2566 for (r = 0; r < n_rows; r++)
2567 Dr -= row_tot[r] * row_tot[r];
2568 for (c = 0; c < n_cols; c++)
2569 Dc -= col_tot[c] * col_tot[c];
2575 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2576 for (c = 0; c < n_cols; c++)
2580 for (r = 0; r < n_rows; r++)
2581 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2591 for (i = 0; i < n_rows; i++)
2595 for (j = 1; j < n_cols; j++)
2596 Cij += col_tot[j] - cum[j + i * n_cols];
2599 for (j = 1; j < n_cols; j++)
2600 Dij += cum[j + (i - 1) * n_cols];
2604 double fij = mat[j + i * n_cols];
2610 assert (j < n_cols);
2612 Cij -= col_tot[j] - cum[j + i * n_cols];
2613 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2617 Cij += cum[j - 1 + (i - 1) * n_cols];
2618 Dij -= cum[j + (i - 1) * n_cols];
2624 if (cmd.a_statistics[CRS_ST_BTAU])
2625 v[3] = (P - Q) / sqrt (Dr * Dc);
2626 if (cmd.a_statistics[CRS_ST_CTAU])
2627 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2628 if (cmd.a_statistics[CRS_ST_GAMMA])
2629 v[5] = (P - Q) / (P + Q);
2631 /* ASE for tau-b, tau-c, gamma. Calculations could be
2632 eliminated here, at expense of memory. */
2637 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2638 for (i = 0; i < n_rows; i++)
2642 for (j = 1; j < n_cols; j++)
2643 Cij += col_tot[j] - cum[j + i * n_cols];
2646 for (j = 1; j < n_cols; j++)
2647 Dij += cum[j + (i - 1) * n_cols];
2651 double fij = mat[j + i * n_cols];
2653 if (cmd.a_statistics[CRS_ST_BTAU])
2655 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2656 + v[3] * (row_tot[i] * Dc
2657 + col_tot[j] * Dr));
2658 btau_cum += fij * temp * temp;
2662 const double temp = Cij - Dij;
2663 ctau_cum += fij * temp * temp;
2666 if (cmd.a_statistics[CRS_ST_GAMMA])
2668 const double temp = Q * Cij - P * Dij;
2669 gamma_cum += fij * temp * temp;
2672 if (cmd.a_statistics[CRS_ST_D])
2674 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2675 - (P - Q) * (W - row_tot[i]));
2676 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2677 - (Q - P) * (W - col_tot[j]));
2682 assert (j < n_cols);
2684 Cij -= col_tot[j] - cum[j + i * n_cols];
2685 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2689 Cij += cum[j - 1 + (i - 1) * n_cols];
2690 Dij -= cum[j + (i - 1) * n_cols];
2696 btau_var = ((btau_cum
2697 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2699 if (cmd.a_statistics[CRS_ST_BTAU])
2701 ase[3] = sqrt (btau_var);
2702 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2705 if (cmd.a_statistics[CRS_ST_CTAU])
2707 ase[4] = ((2 * q / ((q - 1) * W * W))
2708 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2709 t[4] = v[4] / ase[4];
2711 if (cmd.a_statistics[CRS_ST_GAMMA])
2713 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2714 t[5] = v[5] / (2. / (P + Q)
2715 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2717 if (cmd.a_statistics[CRS_ST_D])
2719 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2720 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2721 somers_d_t[0] = (somers_d_v[0]
2723 * sqrt (ctau_cum - sqr (P - Q) / W)));
2724 somers_d_v[1] = (P - Q) / Dc;
2725 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2726 somers_d_t[1] = (somers_d_v[1]
2728 * sqrt (ctau_cum - sqr (P - Q) / W)));
2729 somers_d_v[2] = (P - Q) / Dr;
2730 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2731 somers_d_t[2] = (somers_d_v[2]
2733 * sqrt (ctau_cum - sqr (P - Q) / W)));
2739 /* Spearman correlation, Pearson's r. */
2740 if (cmd.a_statistics[CRS_ST_CORR])
2742 double *R = local_alloc (sizeof *R * n_rows);
2743 double *C = local_alloc (sizeof *C * n_cols);
2746 double y, t, c = 0., s = 0.;
2751 R[i] = s + (row_tot[i] + 1.) / 2.;
2758 assert (i < n_rows);
2763 double y, t, c = 0., s = 0.;
2768 C[j] = s + (col_tot[j] + 1.) / 2;
2775 assert (j < n_cols);
2779 calc_r (R, C, &v[6], &t[6], &ase[6]);
2785 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2789 /* Cohen's kappa. */
2790 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2792 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2795 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2796 i < ns_rows; i++, j++)
2800 while (col_tot[j] == 0.)
2803 prod = row_tot[i] * col_tot[j];
2804 sum = row_tot[i] + col_tot[j];
2806 sum_fii += mat[j + i * n_cols];
2808 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2809 sum_riciri_ci += prod * sum;
2811 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2812 for (j = 0; j < ns_cols; j++)
2814 double sum = row_tot[i] + col_tot[j];
2815 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2818 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2820 ase[8] = sqrt ((W * W * sum_rici
2821 + sum_rici * sum_rici
2822 - W * sum_riciri_ci)
2823 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2825 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2826 / sqr (W * W - sum_rici))
2827 + ((2. * (W - sum_fii)
2828 * (2. * sum_fii * sum_rici
2829 - W * sum_fiiri_ci))
2830 / cube (W * W - sum_rici))
2831 + (sqr (W - sum_fii)
2832 * (W * sum_fijri_ci2 - 4.
2833 * sum_rici * sum_rici)
2834 / pow4 (W * W - sum_rici))));
2836 t[8] = v[8] / ase[8];
2843 /* Calculate risk estimate. */
2845 calc_risk (double *value, double *upper, double *lower, union value *c)
2847 double f11, f12, f21, f22;
2853 for (i = 0; i < 3; i++)
2854 value[i] = upper[i] = lower[i] = SYSMIS;
2857 if (ns_rows != 2 || ns_cols != 2)
2864 for (i = j = 0; i < n_cols; i++)
2865 if (col_tot[i] != 0.)
2874 f11 = mat[nz_cols[0]];
2875 f12 = mat[nz_cols[1]];
2876 f21 = mat[nz_cols[0] + n_cols];
2877 f22 = mat[nz_cols[1] + n_cols];
2879 c[0] = cols[nz_cols[0]];
2880 c[1] = cols[nz_cols[1]];
2883 value[0] = (f11 * f22) / (f12 * f21);
2884 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2885 lower[0] = value[0] * exp (-1.960 * v);
2886 upper[0] = value[0] * exp (1.960 * v);
2888 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2889 v = sqrt ((f12 / (f11 * (f11 + f12)))
2890 + (f22 / (f21 * (f21 + f22))));
2891 lower[1] = value[1] * exp (-1.960 * v);
2892 upper[1] = value[1] * exp (1.960 * v);
2894 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2895 v = sqrt ((f11 / (f12 * (f11 + f12)))
2896 + (f21 / (f22 * (f21 + f22))));
2897 lower[2] = value[2] * exp (-1.960 * v);
2898 upper[2] = value[2] * exp (1.960 * v);
2903 /* Calculate directional measures. */
2905 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2906 double t[N_DIRECTIONAL])
2911 for (i = 0; i < N_DIRECTIONAL; i++)
2912 v[i] = ase[i] = t[i] = SYSMIS;
2916 if (cmd.a_statistics[CRS_ST_LAMBDA])
2918 double *fim = xmalloc (sizeof *fim * n_rows);
2919 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2920 double *fmj = xmalloc (sizeof *fmj * n_cols);
2921 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2922 double sum_fim, sum_fmj;
2924 int rm_index, cm_index;
2927 /* Find maximum for each row and their sum. */
2928 for (sum_fim = 0., i = 0; i < n_rows; i++)
2930 double max = mat[i * n_cols];
2933 for (j = 1; j < n_cols; j++)
2934 if (mat[j + i * n_cols] > max)
2936 max = mat[j + i * n_cols];
2940 sum_fim += fim[i] = max;
2941 fim_index[i] = index;
2944 /* Find maximum for each column. */
2945 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2947 double max = mat[j];
2950 for (i = 1; i < n_rows; i++)
2951 if (mat[j + i * n_cols] > max)
2953 max = mat[j + i * n_cols];
2957 sum_fmj += fmj[j] = max;
2958 fmj_index[j] = index;
2961 /* Find maximum row total. */
2964 for (i = 1; i < n_rows; i++)
2965 if (row_tot[i] > rm)
2971 /* Find maximum column total. */
2974 for (j = 1; j < n_cols; j++)
2975 if (col_tot[j] > cm)
2981 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2982 v[1] = (sum_fmj - rm) / (W - rm);
2983 v[2] = (sum_fim - cm) / (W - cm);
2985 /* ASE1 for Y given X. */
2989 for (accum = 0., i = 0; i < n_rows; i++)
2990 for (j = 0; j < n_cols; j++)
2992 const int deltaj = j == cm_index;
2993 accum += (mat[j + i * n_cols]
2994 * sqr ((j == fim_index[i])
2999 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3002 /* ASE0 for Y given X. */
3006 for (accum = 0., i = 0; i < n_rows; i++)
3007 if (cm_index != fim_index[i])
3008 accum += (mat[i * n_cols + fim_index[i]]
3009 + mat[i * n_cols + cm_index]);
3010 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3013 /* ASE1 for X given Y. */
3017 for (accum = 0., i = 0; i < n_rows; i++)
3018 for (j = 0; j < n_cols; j++)
3020 const int deltaj = i == rm_index;
3021 accum += (mat[j + i * n_cols]
3022 * sqr ((i == fmj_index[j])
3027 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3030 /* ASE0 for X given Y. */
3034 for (accum = 0., j = 0; j < n_cols; j++)
3035 if (rm_index != fmj_index[j])
3036 accum += (mat[j + n_cols * fmj_index[j]]
3037 + mat[j + n_cols * rm_index]);
3038 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3041 /* Symmetric ASE0 and ASE1. */
3046 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3047 for (j = 0; j < n_cols; j++)
3049 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3050 int temp1 = (i == rm_index) + (j == cm_index);
3051 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3052 accum1 += (mat[j + i * n_cols]
3053 * sqr (temp0 + (v[0] - 1.) * temp1));
3055 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3056 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3057 / (2. * W - rm - cm));
3066 double sum_fij2_ri, sum_fij2_ci;
3067 double sum_ri2, sum_cj2;
3069 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3070 for (j = 0; j < n_cols; j++)
3072 double temp = sqr (mat[j + i * n_cols]);
3073 sum_fij2_ri += temp / row_tot[i];
3074 sum_fij2_ci += temp / col_tot[j];
3077 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3078 sum_ri2 += row_tot[i] * row_tot[i];
3080 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3081 sum_cj2 += col_tot[j] * col_tot[j];
3083 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3084 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3088 if (cmd.a_statistics[CRS_ST_UC])
3090 double UX, UY, UXY, P;
3091 double ase1_yx, ase1_xy, ase1_sym;
3094 for (UX = 0., i = 0; i < n_rows; i++)
3095 if (row_tot[i] > 0.)
3096 UX -= row_tot[i] / W * log (row_tot[i] / W);
3098 for (UY = 0., j = 0; j < n_cols; j++)
3099 if (col_tot[j] > 0.)
3100 UY -= col_tot[j] / W * log (col_tot[j] / W);
3102 for (UXY = P = 0., i = 0; i < n_rows; i++)
3103 for (j = 0; j < n_cols; j++)
3105 double entry = mat[j + i * n_cols];
3110 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3111 UXY -= entry / W * log (entry / W);
3114 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3115 for (j = 0; j < n_cols; j++)
3117 double entry = mat[j + i * n_cols];
3122 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3123 + (UX - UXY) * log (col_tot[j] / W));
3124 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3125 + (UY - UXY) * log (row_tot[i] / W));
3126 ase1_sym += entry * sqr ((UXY
3127 * log (row_tot[i] * col_tot[j] / (W * W)))
3128 - (UX + UY) * log (entry / W));
3131 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3132 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3133 t[5] = v[5] / ((2. / (W * (UX + UY)))
3134 * sqrt (P - sqr (UX + UY - UXY) / W));
3136 v[6] = (UX + UY - UXY) / UX;
3137 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3138 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3140 v[7] = (UX + UY - UXY) / UY;
3141 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3142 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3146 if (cmd.a_statistics[CRS_ST_D])
3151 calc_symmetric (NULL, NULL, NULL);
3152 for (i = 0; i < 3; i++)
3154 v[8 + i] = somers_d_v[i];
3155 ase[8 + i] = somers_d_ase[i];
3156 t[8 + i] = somers_d_t[i];
3161 if (cmd.a_statistics[CRS_ST_ETA])
3164 double sum_Xr, sum_X2r;
3168 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3170 sum_Xr += rows[i].f * row_tot[i];
3171 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3173 SX = sum_X2r - sum_Xr * sum_Xr / W;
3175 for (SXW = 0., j = 0; j < n_cols; j++)
3179 for (cum = 0., i = 0; i < n_rows; i++)
3181 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3182 cum += rows[i].f * mat[j + i * n_cols];
3185 SXW -= cum * cum / col_tot[j];
3187 v[11] = sqrt (1. - SXW / SX);
3191 double sum_Yc, sum_Y2c;
3195 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3197 sum_Yc += cols[i].f * col_tot[i];
3198 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3200 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3202 for (SYW = 0., i = 0; i < n_rows; i++)
3206 for (cum = 0., j = 0; j < n_cols; j++)
3208 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3209 cum += cols[j].f * mat[j + i * n_cols];
3212 SYW -= cum * cum / row_tot[i];
3214 v[12] = sqrt (1. - SYW / SY);