1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value v[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *v[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
138 static int expected; /* Nonzero if expected value is needed. */
141 static int write; /* One of WR_* that specifies the WRITE style. */
143 /* Command parsing info. */
144 static struct cmd_crosstabs cmd;
147 static struct pool *pl_tc; /* For table cells. */
148 static struct pool *pl_col; /* For column data. */
150 static int internal_cmd_crosstabs (void);
151 static void precalc (void);
152 static int calc_general (struct ccase *);
153 static int calc_integer (struct ccase *);
154 static void postcalc (void);
155 static void submit (struct tab_table *);
158 static void debug_print (void);
159 static void print_table_entries (struct table_entry **tab);
162 /* Parse and execute CROSSTABS, then clean up. */
166 int result = internal_cmd_crosstabs ();
169 pool_destroy (pl_tc);
170 pool_destroy (pl_col);
175 /* Parses and executes the CROSSTABS procedure. */
177 internal_cmd_crosstabs (void)
183 pl_tc = pool_create ();
184 pl_col = pool_create ();
186 lex_match_id ("CROSSTABS");
187 if (!parse_crosstabs (&cmd))
191 /* Needs variables. */
195 mode = variables ? INTEGER : GENERAL;
201 cmd.a_cells[CRS_CL_COUNT] = 1;
209 for (i = 0; i < CRS_CL_count; i++)
214 cmd.a_cells[CRS_CL_COUNT] = 1;
215 cmd.a_cells[CRS_CL_ROW] = 1;
216 cmd.a_cells[CRS_CL_COLUMN] = 1;
217 cmd.a_cells[CRS_CL_TOTAL] = 1;
219 if (cmd.a_cells[CRS_CL_ALL])
221 for (i = 0; i < CRS_CL_count; i++)
223 cmd.a_cells[CRS_CL_ALL] = 0;
225 cmd.a_cells[CRS_CL_NONE] = 0;
226 for (num_cells = i = 0; i < CRS_CL_count; i++)
229 if (i >= CRS_CL_EXPECTED)
231 cmd.a_cells[num_cells++] = i;
236 if (cmd.sbc_statistics)
241 for (i = 0; i < CRS_ST_count; i++)
242 if (cmd.a_statistics[i])
245 cmd.a_statistics[CRS_ST_CHISQ] = 1;
246 if (cmd.a_statistics[CRS_ST_ALL])
247 for (i = 0; i < CRS_ST_count; i++)
248 cmd.a_statistics[i] = 1;
252 if (cmd.miss == CRS_REPORT && mode == GENERAL)
254 msg (SE, _("Missing mode REPORT not allowed in general mode. "
255 "Assuming MISSING=TABLE."));
256 cmd.miss = CRS_TABLE;
260 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
261 cmd.a_write[CRS_WR_ALL] = 0;
262 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
264 msg (SE, _("Write mode ALL not allowed in general mode. "
265 "Assuming WRITE=CELLS."));
266 cmd.a_write[CRS_WR_CELLS] = 1;
269 && (cmd.a_write[CRS_WR_NONE]
270 + cmd.a_write[CRS_WR_ALL]
271 + cmd.a_write[CRS_WR_CELLS] == 0))
272 cmd.a_write[CRS_WR_CELLS] = 1;
273 if (cmd.a_write[CRS_WR_CELLS])
274 write = CRS_WR_CELLS;
275 else if (cmd.a_write[CRS_WR_ALL])
280 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
285 /* Parses the TABLES subcommand. */
287 crs_custom_tables (struct cmd_crosstabs *cmd unused)
289 struct var_set *var_set;
291 struct variable ***by = NULL;
296 /* Ensure that this is a TABLES subcommand. */
297 if (!lex_match_id ("TABLES")
298 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
303 if (variables != NULL)
304 var_set = var_set_create_from_array (variables, variables_cnt);
306 var_set = var_set_create_from_dict (default_dict);
307 assert (var_set != NULL);
311 by = xrealloc (by, sizeof *by * (n_by + 1));
312 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
313 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
314 PV_NO_DUPLICATE | PV_NO_SCRATCH))
319 if (!lex_match (T_BY))
323 lex_error (_("expecting BY"));
332 int *by_iter = xcalloc (sizeof *by_iter * n_by);
335 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
336 for (i = 0; i < nx; i++)
340 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
347 for (i = 0; i < n_by; i++)
348 x->v[i] = by[i][by_iter[i]];
354 for (i = n_by - 1; i >= 0; i--)
356 if (++by_iter[i] < by_nvar[i])
369 /* All return paths lead here. */
373 for (i = 0; i < n_by; i++)
379 var_set_destroy (var_set);
384 /* Parses the VARIABLES subcommand. */
386 crs_custom_variables (struct cmd_crosstabs *cmd unused)
390 msg (SE, _("VARIABLES must be specified before TABLES."));
398 int orig_nv = variables_cnt;
403 if (!parse_variables (default_dict, &variables, &variables_cnt,
404 (PV_APPEND | PV_NUMERIC
405 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
410 lex_error ("expecting `('");
415 if (!lex_force_int ())
417 min = lex_integer ();
422 if (!lex_force_int ())
424 max = lex_integer ();
427 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
435 lex_error ("expecting `)'");
440 for (i = orig_nv; i < variables_cnt; i++)
442 variables[i]->p.crs.min = min;
443 variables[i]->p.crs.max = max + 1.;
444 variables[i]->p.crs.count = max - min + 1;
463 printf ("CROSSTABS\n");
465 if (variables != NULL)
469 printf ("\t/VARIABLES=");
470 for (i = 0; i < variables_cnt; i++)
472 struct variable *v = variables[i];
474 printf ("%s ", v->name);
475 if (i < variables_cnt - 1)
477 struct variable *nv = variables[i + 1];
479 if (v->p.crs.min == nv->p.crs.min
480 && v->p.crs.max == nv->p.crs.max)
483 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
491 printf ("\t/TABLES=");
492 for (i = 0; i < nxtab; i++)
494 struct crosstab *x = xtab[i];
499 for (j = 0; j < x->nvar; j++)
503 printf ("%s", x->v[j]->name);
509 #endif /* DEBUGGING */
511 /* Data file processing. */
513 static int compare_table_entry (const void *, const void *, void *);
514 static unsigned hash_table_entry (const void *, void *);
516 /* Set up the crosstabulation tables for processing. */
522 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
532 for (i = 0; i < nxtab; i++)
534 struct crosstab *x = xtab[i];
539 x->ofs = n_sorted_tab;
541 for (j = 2; j < x->nvar; j++)
542 count *= x->v[j - 2]->p.crs.count;
544 sorted_tab = xrealloc (sorted_tab,
545 sizeof *sorted_tab * (n_sorted_tab + count));
546 v = local_alloc (sizeof *v * x->nvar);
547 for (j = 2; j < x->nvar; j++)
548 v[j] = x->v[j]->p.crs.min;
549 for (j = 0; j < count; j++)
551 struct table_entry *te;
554 te = sorted_tab[n_sorted_tab++]
555 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
559 const int mat_size = (x->v[0]->p.crs.count
560 * x->v[1]->p.crs.count);
563 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
564 for (m = 0; m < mat_size; m++)
568 for (k = 2; k < x->nvar; k++)
570 for (k = 2; k < x->nvar; k++)
571 if (++v[k] >= x->v[k]->p.crs.max)
572 v[k] = x->v[k]->p.crs.min;
579 sorted_tab = xrealloc (sorted_tab,
580 sizeof *sorted_tab * (n_sorted_tab + 1));
581 sorted_tab[n_sorted_tab] = NULL;
585 /* Form crosstabulations for general mode. */
587 calc_general (struct ccase *c)
590 double weight = dict_get_case_weight (default_dict, c);
592 /* Flattened current table index. */
595 for (t = 0; t < nxtab; t++)
597 struct crosstab *x = xtab[t];
598 const size_t entry_size = (sizeof (struct table_entry)
599 + sizeof (union value) * (x->nvar - 1));
600 struct table_entry *te = local_alloc (entry_size);
602 /* Construct table entry for the current record and table. */
608 for (j = 0; j < x->nvar; j++)
610 if ((cmd.miss == CRS_TABLE
611 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
612 || (cmd.miss == CRS_INCLUDE
613 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
615 x->missing += weight;
619 if (x->v[j]->type == NUMERIC)
620 te->v[j].f = c->data[x->v[j]->fv].f;
623 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
625 /* Necessary in order to simplify comparisons. */
626 memset (&te->v[j].s[x->v[j]->width], 0,
627 sizeof (union value) - x->v[j]->width);
632 /* Add record to hash table. */
634 struct table_entry **tepp
635 = (struct table_entry **) hsh_probe (gen_tab, te);
638 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
641 memcpy (tep, te, entry_size);
646 (*tepp)->u.freq += weight;
657 calc_integer (struct ccase *c)
660 double weight = dict_get_case_weight (default_dict, c);
662 /* Flattened current table index. */
665 for (t = 0; t < nxtab; t++)
667 struct crosstab *x = xtab[t];
672 for (i = 0; i < x->nvar; i++)
674 struct variable *const v = x->v[i];
675 double value = c->data[v->fv].f;
677 /* Note that the first test also rules out SYSMIS. */
678 if ((value < v->p.crs.min || value >= v->p.crs.max)
679 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
681 x->missing += weight;
687 ofs += fact * ((int) value - v->p.crs.min);
688 fact *= v->p.crs.count;
693 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
694 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
695 const int col_dim = x->v[COL_VAR]->p.crs.count;
697 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
707 /* Print out all table entries in NULL-terminated TAB for use by a
708 debugger (a person, not a program). */
710 print_table_entries (struct table_entry **tab)
712 printf ("raw crosstabulation data:\n");
715 const struct crosstab *x = xtab[(*tab)->table];
718 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
719 for (i = 0; i < x->nvar; i++)
723 printf ("%s:", x->v[i]->name);
725 if (x->v[i]->type == NUMERIC)
726 printf ("%g", (*tab)->v[i].f);
728 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
736 /* Compare the table_entry's at A and B and return a strcmp()-type
739 compare_table_entry (const void *a_, const void *b_, void *foo unused)
741 const struct table_entry *a = a_;
742 const struct table_entry *b = b_;
744 if (a->table > b->table)
746 else if (a->table < b->table)
750 const struct crosstab *x = xtab[a->table];
753 for (i = x->nvar - 1; i >= 0; i--)
754 if (x->v[i]->type == NUMERIC)
756 const double diffnum = a->v[i].f - b->v[i].f;
759 else if (diffnum > 0)
764 assert (x->v[i]->type == ALPHA);
766 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
776 /* Calculate a hash value from table_entry PA. */
778 hash_table_entry (const void *pa, void *foo unused)
780 const struct table_entry *a = pa;
781 unsigned long hash = a->table;
784 /* Hash formula from _SPSS Statistical Algorithms_. */
785 for (i = 0; i < xtab[a->table]->nvar; i++)
787 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
788 hash ^= a->v[i].hash[0];
789 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
790 hash ^= a->v[i].hash[1];
797 /* Post-data reading calculations. */
799 static struct table_entry **find_pivot_extent (struct table_entry **,
800 int *cnt, int pivot);
801 static void enum_var_values (struct table_entry **entries, int entry_cnt,
803 union value **values, int *value_cnt);
804 static void output_pivot_table (struct table_entry **, struct table_entry **,
805 double **, double **, double **,
806 int *, int *, int *);
807 static void make_summary_table (void);
814 n_sorted_tab = hsh_count (gen_tab);
815 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
817 print_table_entries (sorted_tab);
821 make_summary_table ();
823 /* Identify all the individual crosstabulation tables, and deal with
826 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
827 int pc = n_sorted_tab; /* Pivot count. */
829 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
830 int maxrows = 0, maxcols = 0, maxcells = 0;
834 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
838 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
839 &maxrows, &maxcols, &maxcells);
848 hsh_destroy (gen_tab);
851 static void insert_summary (struct tab_table *, int tab_index, double valid);
853 /* Output a table summarizing the cases processed. */
855 make_summary_table (void)
857 struct tab_table *summary;
859 struct table_entry **pb = sorted_tab, **pe;
860 int pc = n_sorted_tab;
863 summary = tab_create (7, 3 + nxtab, 1);
864 tab_title (summary, 0, _("Summary."));
865 tab_headers (summary, 1, 0, 3, 0);
866 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
867 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
868 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
869 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
870 tab_hline (summary, TAL_1, 1, 6, 1);
871 tab_hline (summary, TAL_1, 1, 6, 2);
872 tab_vline (summary, TAL_1, 3, 1, 1);
873 tab_vline (summary, TAL_1, 5, 1, 1);
877 for (i = 0; i < 3; i++)
879 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
880 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
883 tab_offset (summary, 0, 3);
889 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
893 while (cur_tab < (*pb)->table)
894 insert_summary (summary, cur_tab++, 0.);
897 for (valid = 0.; pb < pe; pb++)
898 valid += (*pb)->u.freq;
901 const struct crosstab *const x = xtab[(*pb)->table];
902 const int n_cols = x->v[COL_VAR]->p.crs.count;
903 const int n_rows = x->v[ROW_VAR]->p.crs.count;
904 const int count = n_cols * n_rows;
906 for (valid = 0.; pb < pe; pb++)
908 const double *data = (*pb)->u.data;
911 for (i = 0; i < count; i++)
915 insert_summary (summary, cur_tab++, valid);
920 while (cur_tab < nxtab)
921 insert_summary (summary, cur_tab++, 0.);
926 /* Inserts a line into T describing the crosstabulation at index
927 TAB_INDEX, which has VALID valid observations. */
929 insert_summary (struct tab_table *t, int tab_index, double valid)
931 struct crosstab *x = xtab[tab_index];
933 tab_hline (t, TAL_1, 0, 6, 0);
935 /* Crosstabulation name. */
937 char *buf = local_alloc (128 * x->nvar);
941 for (i = 0; i < x->nvar; i++)
944 cp = stpcpy (cp, " * ");
946 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
948 tab_text (t, 0, 0, TAB_LEFT, buf);
953 /* Counts and percentages. */
963 for (i = 0; i < 3; i++)
965 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
966 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
977 static struct tab_table *table; /* Crosstabulation table. */
978 static struct tab_table *chisq; /* Chi-square table. */
979 static struct tab_table *sym; /* Symmetric measures table. */
980 static struct tab_table *risk; /* Risk estimate table. */
981 static struct tab_table *direct; /* Directional measures table. */
984 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
986 /* Column values, number of columns. */
987 static union value *cols;
990 /* Row values, number of rows. */
991 static union value *rows;
994 /* Number of statistically interesting columns/rows (columns/rows with
996 static int ns_cols, ns_rows;
998 /* Crosstabulation. */
999 static struct crosstab *x;
1001 /* Number of variables from the crosstabulation to consider. This is
1002 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1005 /* Matrix contents. */
1006 static double *mat; /* Matrix proper. */
1007 static double *row_tot; /* Row totals. */
1008 static double *col_tot; /* Column totals. */
1009 static double W; /* Grand total. */
1011 static void display_dimensions (struct tab_table *, int first_difference,
1012 struct table_entry *);
1013 static void display_crosstabulation (void);
1014 static void display_chisq (void);
1015 static void display_symmetric (void);
1016 static void display_risk (void);
1017 static void display_directional (void);
1018 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1019 static void table_value_missing (struct tab_table *table, int c, int r,
1020 unsigned char opt, const union value *v,
1021 const struct variable *var);
1022 static void delete_missing (void);
1024 /* Output pivot table beginning at PB and continuing until PE,
1025 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1026 hold *MAXROWS entries. */
1028 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1029 double **matp, double **row_totp, double **col_totp,
1030 int *maxrows, int *maxcols, int *maxcells)
1033 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1034 int tc = pe - pb; /* Table count. */
1036 /* Table entry for header comparison. */
1037 struct table_entry *cmp;
1039 x = xtab[(*pb)->table];
1040 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1042 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1044 /* Crosstabulation table initialization. */
1047 table = tab_create (nvar + n_cols,
1048 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1049 tab_headers (table, nvar - 1, 0, 2, 0);
1051 /* First header line. */
1052 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1053 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1055 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1057 /* Second header line. */
1061 for (i = 2; i < nvar; i++)
1062 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1063 TAB_RIGHT | TAT_TITLE,
1064 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1065 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1066 x->v[ROW_VAR]->name);
1067 for (i = 0; i < n_cols; i++)
1068 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1070 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1073 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1074 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1078 char *title = local_alloc (x->nvar * 64 + 128);
1082 if (cmd.pivot == CRS_PIVOT)
1083 for (i = 0; i < nvar; i++)
1086 cp = stpcpy (cp, " by ");
1087 cp = stpcpy (cp, x->v[i]->name);
1091 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1092 for (i = 2; i < nvar; i++)
1094 char buf[64], *bufp;
1099 cp = stpcpy (cp, x->v[i]->name);
1101 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1102 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1104 cp = stpcpy (cp, bufp);
1108 cp = stpcpy (cp, " [");
1109 for (i = 0; i < num_cells; i++)
1117 static const struct tuple cell_names[] =
1119 {CRS_CL_COUNT, N_("count")},
1120 {CRS_CL_ROW, N_("row %")},
1121 {CRS_CL_COLUMN, N_("column %")},
1122 {CRS_CL_TOTAL, N_("total %")},
1123 {CRS_CL_EXPECTED, N_("expected")},
1124 {CRS_CL_RESIDUAL, N_("residual")},
1125 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1126 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1130 const struct tuple *t;
1132 for (t = cell_names; t->value != cells[i]; t++)
1133 assert (t->value != -1);
1135 cp = stpcpy (cp, ", ");
1136 cp = stpcpy (cp, gettext (t->name));
1140 tab_title (table, 0, title);
1144 tab_offset (table, 0, 2);
1149 /* Chi-square table initialization. */
1150 if (cmd.a_statistics[CRS_ST_CHISQ])
1152 chisq = tab_create (6 + (nvar - 2),
1153 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1154 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1156 tab_title (chisq, 0, "Chi-square tests.");
1158 tab_offset (chisq, nvar - 2, 0);
1159 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1160 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1161 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1162 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1163 _("Asymp. Sig. (2-sided)"));
1164 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1165 _("Exact. Sig. (2-sided)"));
1166 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1167 _("Exact. Sig. (1-sided)"));
1169 tab_offset (chisq, 0, 1);
1174 /* Symmetric measures. */
1175 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1176 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1177 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1178 || cmd.a_statistics[CRS_ST_KAPPA])
1180 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1181 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1182 tab_title (sym, 0, "Symmetric measures.");
1184 tab_offset (sym, nvar - 2, 0);
1185 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1186 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1187 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1188 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1189 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1190 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1191 tab_offset (sym, 0, 1);
1196 /* Risk estimate. */
1197 if (cmd.a_statistics[CRS_ST_RISK])
1199 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1200 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1201 tab_title (risk, 0, "Risk estimate.");
1203 tab_offset (risk, nvar - 2, 0);
1204 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1205 _(" 95%% Confidence Interval"));
1206 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1207 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1208 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1209 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1210 tab_hline (risk, TAL_1, 2, 3, 1);
1211 tab_vline (risk, TAL_1, 2, 0, 1);
1212 tab_offset (risk, 0, 2);
1217 /* Directional measures. */
1218 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1219 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1221 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1222 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1223 tab_title (direct, 0, "Directional measures.");
1225 tab_offset (direct, nvar - 2, 0);
1226 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1227 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1228 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1229 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1230 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1231 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1232 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1233 tab_offset (direct, 0, 1);
1240 /* Find pivot subtable if applicable. */
1241 te = find_pivot_extent (tb, &tc, 0);
1245 /* Find all the row variable values. */
1246 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1248 /* Allocate memory space for the column and row totals. */
1249 if (n_rows > *maxrows)
1251 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1252 row_tot = *row_totp;
1255 if (n_cols > *maxcols)
1257 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1258 col_tot = *col_totp;
1262 /* Allocate table space for the matrix. */
1263 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1264 tab_realloc (table, -1,
1265 max (tab_nr (table) + (n_rows + 1) * num_cells,
1266 tab_nr (table) * (pe - pb) / (te - tb)));
1268 if (mode == GENERAL)
1270 /* Allocate memory space for the matrix. */
1271 if (n_cols * n_rows > *maxcells)
1273 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1274 *maxcells = n_cols * n_rows;
1279 /* Build the matrix and calculate column totals. */
1281 union value *cur_col = cols;
1282 union value *cur_row = rows;
1284 double *cp = col_tot;
1285 struct table_entry **p;
1288 for (p = &tb[0]; p < te; p++)
1290 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1294 for (; cur_row < &rows[n_rows]; cur_row++)
1300 mp = &mat[cur_col - cols];
1303 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1310 *cp += *mp = (*p)->u.freq;
1315 /* Zero out the rest of the matrix. */
1316 for (; cur_row < &rows[n_rows]; cur_row++)
1322 if (cur_col < &cols[n_cols])
1324 const int rem_cols = n_cols - (cur_col - cols);
1327 for (c = 0; c < rem_cols; c++)
1329 mp = &mat[cur_col - cols];
1330 for (r = 0; r < n_rows; r++)
1332 for (c = 0; c < rem_cols; c++)
1334 mp += n_cols - rem_cols;
1342 double *tp = col_tot;
1344 assert (mode == INTEGER);
1345 mat = (*tb)->u.data;
1348 /* Calculate column totals. */
1349 for (c = 0; c < n_cols; c++)
1352 double *cp = &mat[c];
1354 for (r = 0; r < n_rows; r++)
1355 cum += cp[r * n_cols];
1363 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1364 ns_cols += *cp != 0.;
1367 /* Calculate row totals. */
1370 double *rp = row_tot;
1373 for (ns_rows = 0, r = n_rows; r--; )
1376 for (c = n_cols; c--; )
1384 /* Calculate grand total. */
1390 if (n_rows < n_cols)
1391 tp = row_tot, n = n_rows;
1393 tp = col_tot, n = n_cols;
1400 /* Print the matrix. */
1404 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1405 for (i = 2; i < nvar; i++)
1406 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1409 for (c = 0; c < n_cols; c++)
1410 printf ("%4g", cols[c].f);
1412 for (r = 0; r < n_rows; r++)
1414 printf ("%4g:", rows[r].f);
1415 for (c = 0; c < n_cols; c++)
1416 printf ("%4g", mat[c + r * n_cols]);
1417 printf ("%4g", row_tot[r]);
1421 for (c = 0; c < n_cols; c++)
1422 printf ("%4g", col_tot[c]);
1428 /* Find the first variable that differs from the last subtable,
1429 then display the values of the dimensioning variables for
1430 each table that needs it. */
1432 int first_difference = nvar - 1;
1435 for (; ; first_difference--)
1437 assert (first_difference >= 2);
1438 if (memcmp (&cmp->v[first_difference],
1439 &(*tb)->v[first_difference], sizeof *cmp->v))
1445 display_dimensions (table, first_difference, *tb);
1447 display_dimensions (chisq, first_difference, *tb);
1449 display_dimensions (sym, first_difference, *tb);
1451 display_dimensions (risk, first_difference, *tb);
1453 display_dimensions (direct, first_difference, *tb);
1457 display_crosstabulation ();
1458 if (cmd.miss == CRS_REPORT)
1463 display_symmetric ();
1467 display_directional ();
1478 tab_resize (chisq, 4 + (nvar - 2), -1);
1489 /* Delete missing rows and columns for statistical analysis when
1492 delete_missing (void)
1497 for (r = 0; r < n_rows; r++)
1498 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1502 for (c = 0; c < n_cols; c++)
1503 mat[c + r * n_cols] = 0.;
1511 for (c = 0; c < n_cols; c++)
1512 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1516 for (r = 0; r < n_rows; r++)
1517 mat[c + r * n_cols] = 0.;
1523 /* Prepare table T for submission, and submit it. */
1525 submit (struct tab_table *t)
1532 tab_resize (t, -1, 0);
1533 if (tab_nr (t) == tab_t (t))
1538 tab_offset (t, 0, 0);
1540 for (i = 2; i < nvar; i++)
1541 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1542 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1543 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1544 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1546 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1548 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1549 tab_dim (t, crosstabs_dim);
1553 /* Sets the widths of all the columns and heights of all the rows in
1554 table T for driver D. */
1556 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1560 /* Width of a numerical column. */
1561 int c = outp_string_width (d, "0.000000");
1562 if (cmd.miss == CRS_REPORT)
1563 c += outp_string_width (d, "M");
1565 /* Set width for header columns. */
1568 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1570 if (w < d->prop_em_width * 8)
1571 w = d->prop_em_width * 8;
1573 if (w > d->prop_em_width * 15)
1574 w = d->prop_em_width * 15;
1576 for (i = 0; i < t->l; i++)
1580 for (i = t->l; i < t->nc; i++)
1583 for (i = 0; i < t->nr; i++)
1584 t->h[i] = tab_natural_height (t, d, i);
1587 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1588 int *cnt, int pivot);
1589 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1590 int *cnt, int pivot);
1592 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1594 static struct table_entry **
1595 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1597 return (mode == GENERAL
1598 ? find_pivot_extent_general (tp, cnt, pivot)
1599 : find_pivot_extent_integer (tp, cnt, pivot));
1602 /* Find the extent of a region in TP that contains one table. If
1603 PIVOT != 0 that means a set of table entries with identical table
1604 number; otherwise they also have to have the same values for every
1605 dimension after the row and column dimensions. The table that is
1606 searched starts at TP and has length CNT. Returns the first entry
1607 after the last one in the table; sets *CNT to the number of
1608 remaining values. If there are no entries in TP at all, returns
1609 NULL. A yucky interface, admittedly, but it works. */
1610 static struct table_entry **
1611 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1613 struct table_entry *fp = *tp;
1618 x = xtab[(*tp)->table];
1626 if ((*tp)->table != fp->table)
1631 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1638 /* Integer mode correspondent to find_pivot_extent_general(). This
1639 could be optimized somewhat, but I just don't give a crap about
1640 CROSSTABS performance in integer mode, which is just a
1641 CROSSTABS wart as far as I'm concerned.
1643 That said, feel free to send optimization patches to me. */
1644 static struct table_entry **
1645 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1647 struct table_entry *fp = *tp;
1652 x = xtab[(*tp)->table];
1660 if ((*tp)->table != fp->table)
1665 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1672 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1673 result. WIDTH_ points to an int which is either 0 for a
1674 numeric value or a string width for a string value. */
1676 compare_value (const void *a_, const void *b_, void *width_)
1678 const union value *a = a_;
1679 const union value *b = b_;
1680 const int *pwidth = width_;
1681 const int width = *pwidth;
1684 return (a->f < b->f) ? -1 : (a->f > b->f);
1686 return strncmp (a->s, b->s, width);
1689 /* Given an array of ENTRY_CNT table_entry structures starting at
1690 ENTRIES, creates a sorted list of the values that the variable
1691 with index VAR_INDEX takes on. The values are returned as a
1692 malloc()'darray stored in *VALUES, with the number of values
1693 stored in *VALUE_CNT.
1696 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1697 union value **values, int *value_cnt)
1699 if (mode == GENERAL)
1701 int width = xtab[(*entries)->table]->v[var_idx]->width;
1704 *values = xmalloc (sizeof **values * entry_cnt);
1705 for (i = 0; i < entry_cnt; i++)
1706 (*values)[i] = entries[i]->v[var_idx];
1707 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1708 compare_value, &width);
1712 struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs;
1715 assert (mode == INTEGER);
1716 *values = xmalloc (sizeof **values * crs->count);
1717 for (i = 0; i < crs->count; i++)
1718 (*values)[i].f = i + crs->min;
1719 *value_cnt = crs->count;
1723 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1724 from V, displayed with print format spec from variable VAR. When
1725 in REPORT missing-value mode, missing values have an M appended. */
1727 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1728 const union value *v, const struct variable *var)
1730 struct len_string s;
1732 const char *label = val_labs_find (var->val_labs, *v);
1735 tab_text (table, c, r, TAB_LEFT, label);
1739 s.length = var->print.w;
1740 s.string = tab_alloc (table, s.length + 1);
1741 data_out (s.string, &var->print, v);
1742 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1743 s.string[s.length++] = 'M';
1744 while (s.length && *s.string == ' ')
1749 tab_raw (table, c, r, opt, &s);
1752 /* Draws a line across TABLE at the current row to indicate the most
1753 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1754 that changed, and puts the values that changed into the table. TB
1755 and X must be the corresponding table_entry and crosstab,
1758 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1760 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1762 for (; first_difference >= 2; first_difference--)
1763 table_value_missing (table, nvar - first_difference - 1, 0,
1764 TAB_RIGHT, &tb->v[first_difference],
1765 x->v[first_difference]);
1768 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1770 float_M_suffix (struct tab_table *table, int c, int r, double v)
1772 static const struct fmt_spec f = {FMT_F, 8, 0};
1773 struct len_string s;
1776 s.string = tab_alloc (table, 9);
1778 data_out (s.string, &f, (union value *) &v);
1779 while (*s.string == ' ')
1784 tab_raw (table, c, r, TAB_RIGHT, &s);
1787 /* Displays the crosstabulation table. */
1789 display_crosstabulation (void)
1794 for (r = 0; r < n_rows; r++)
1795 table_value_missing (table, nvar - 2, r * num_cells,
1796 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1798 tab_text (table, nvar - 2, n_rows * num_cells,
1799 TAB_LEFT, _("Total"));
1801 /* Put in the actual cells. */
1806 tab_offset (table, nvar - 1, -1);
1807 for (r = 0; r < n_rows; r++)
1810 tab_hline (table, TAL_1, -1, n_cols, 0);
1811 for (c = 0; c < n_cols; c++)
1813 double expected_value;
1816 expected_value = row_tot[r] * col_tot[c] / W;
1817 for (i = 0; i < num_cells; i++)
1827 v = *mp / row_tot[r] * 100.;
1830 v = *mp / col_tot[c] * 100.;
1835 case CRS_CL_EXPECTED:
1838 case CRS_CL_RESIDUAL:
1839 v = *mp - expected_value;
1841 case CRS_CL_SRESIDUAL:
1842 v = (*mp - expected_value) / sqrt (expected_value);
1844 case CRS_CL_ASRESIDUAL:
1845 v = ((*mp - expected_value)
1846 / sqrt (expected_value
1847 * (1. - row_tot[r] / W)
1848 * (1. - col_tot[c] / W)));
1854 if (cmd.miss == CRS_REPORT
1855 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1856 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1857 float_M_suffix (table, c, i, v);
1859 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1865 tab_offset (table, -1, tab_row (table) + num_cells);
1873 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1874 for (r = 0; r < n_rows; r++)
1875 for (i = 0; i < num_cells; i++)
1888 v = row_tot[r] / W * 100.;
1891 v = row_tot[r] / W * 100.;
1893 case CRS_CL_EXPECTED:
1894 case CRS_CL_RESIDUAL:
1895 case CRS_CL_SRESIDUAL:
1896 case CRS_CL_ASRESIDUAL:
1903 if (cmd.miss == CRS_REPORT
1904 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1905 float_M_suffix (table, n_cols, 0, v);
1907 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1909 tab_next_row (table);
1913 /* Column totals, grand total. */
1918 tab_hline (table, TAL_1, -1, n_cols, 0);
1919 for (c = 0; c <= n_cols; c++)
1921 double ct = c < n_cols ? col_tot[c] : W;
1924 for (i = j = 0; i < num_cells; i++)
1942 case CRS_CL_EXPECTED:
1943 case CRS_CL_RESIDUAL:
1944 case CRS_CL_SRESIDUAL:
1945 case CRS_CL_ASRESIDUAL:
1951 if (cmd.miss == CRS_REPORT && c < n_cols
1952 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1953 float_M_suffix (table, c, j, v);
1955 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1961 tab_offset (table, -1, tab_row (table) + j);
1964 tab_offset (table, 0, -1);
1967 static void calc_r (double *X, double *Y, double *, double *, double *);
1968 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1970 /* Display chi-square statistics. */
1972 display_chisq (void)
1974 static const char *chisq_stats[N_CHISQ] =
1976 N_("Pearson Chi-Square"),
1977 N_("Likelihood Ratio"),
1978 N_("Fisher's Exact Test"),
1979 N_("Continuity Correction"),
1980 N_("Linear-by-Linear Association"),
1982 double chisq_v[N_CHISQ];
1983 double fisher1, fisher2;
1989 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1991 tab_offset (chisq, nvar - 2, -1);
1993 for (i = 0; i < N_CHISQ; i++)
1995 if ((i != 2 && chisq_v[i] == SYSMIS)
1996 || (i == 2 && fisher1 == SYSMIS))
2000 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2003 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2004 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2005 tab_float (chisq, 3, 0, TAB_RIGHT,
2006 chisq_sig (chisq_v[i], df[i]), 8, 3);
2011 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2012 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2014 tab_next_row (chisq);
2017 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2018 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2019 tab_next_row (chisq);
2021 tab_offset (chisq, 0, -1);
2024 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2025 double[N_SYMMETRIC]);
2027 /* Display symmetric measures. */
2029 display_symmetric (void)
2031 static const char *categories[] =
2033 N_("Nominal by Nominal"),
2034 N_("Ordinal by Ordinal"),
2035 N_("Interval by Interval"),
2036 N_("Measure of Agreement"),
2039 static const char *stats[N_SYMMETRIC] =
2043 N_("Contingency Coefficient"),
2044 N_("Kendall's tau-b"),
2045 N_("Kendall's tau-c"),
2047 N_("Spearman Correlation"),
2052 static const int stats_categories[N_SYMMETRIC] =
2054 0, 0, 0, 1, 1, 1, 1, 2, 3,
2058 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2061 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2064 tab_offset (sym, nvar - 2, -1);
2066 for (i = 0; i < N_SYMMETRIC; i++)
2068 if (sym_v[i] == SYSMIS)
2071 if (stats_categories[i] != last_cat)
2073 last_cat = stats_categories[i];
2074 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2077 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2078 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2079 if (sym_ase[i] != SYSMIS)
2080 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2081 if (sym_t[i] != SYSMIS)
2082 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2083 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2087 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2088 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2091 tab_offset (sym, 0, -1);
2094 static int calc_risk (double[], double[], double[], union value *);
2096 /* Display risk estimate. */
2101 double risk_v[3], lower[3], upper[3];
2105 if (!calc_risk (risk_v, upper, lower, c))
2108 tab_offset (risk, nvar - 2, -1);
2110 for (i = 0; i < 3; i++)
2112 if (risk_v[i] == SYSMIS)
2118 if (x->v[COL_VAR]->type == NUMERIC)
2119 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2120 x->v[COL_VAR]->name, c[0].f, c[1].f);
2122 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2123 x->v[COL_VAR]->name,
2124 x->v[COL_VAR]->width, c[0].s,
2125 x->v[COL_VAR]->width, c[1].s);
2129 if (x->v[ROW_VAR]->type == NUMERIC)
2130 sprintf (buf, _("For cohort %s = %g"),
2131 x->v[ROW_VAR]->name, rows[i - 1].f);
2133 sprintf (buf, _("For cohort %s = %.*s"),
2134 x->v[ROW_VAR]->name,
2135 x->v[ROW_VAR]->width, rows[i - 1].s);
2139 tab_text (risk, 0, 0, TAB_LEFT, buf);
2140 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2141 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2142 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2143 tab_next_row (risk);
2146 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2147 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2148 tab_next_row (risk);
2150 tab_offset (risk, 0, -1);
2153 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2154 double[N_DIRECTIONAL]);
2156 /* Display directional measures. */
2158 display_directional (void)
2160 static const char *categories[] =
2162 N_("Nominal by Nominal"),
2163 N_("Ordinal by Ordinal"),
2164 N_("Nominal by Interval"),
2167 static const char *stats[] =
2170 N_("Goodman and Kruskal tau"),
2171 N_("Uncertainty Coefficient"),
2176 static const char *types[] =
2183 static const int stats_categories[N_DIRECTIONAL] =
2185 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2188 static const int stats_stats[N_DIRECTIONAL] =
2190 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2193 static const int stats_types[N_DIRECTIONAL] =
2195 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2198 static const int *stats_lookup[] =
2205 static const char **stats_names[] =
2217 double direct_v[N_DIRECTIONAL];
2218 double direct_ase[N_DIRECTIONAL];
2219 double direct_t[N_DIRECTIONAL];
2223 if (!calc_directional (direct_v, direct_ase, direct_t))
2226 tab_offset (direct, nvar - 2, -1);
2228 for (i = 0; i < N_DIRECTIONAL; i++)
2230 if (direct_v[i] == SYSMIS)
2236 for (j = 0; j < 3; j++)
2237 if (last[j] != stats_lookup[j][i])
2240 tab_hline (direct, TAL_1, j, 6, 0);
2245 int k = last[j] = stats_lookup[j][i];
2250 string = x->v[0]->name;
2252 string = x->v[1]->name;
2254 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2255 gettext (stats_names[j][k]), string);
2260 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2261 if (direct_ase[i] != SYSMIS)
2262 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2263 if (direct_t[i] != SYSMIS)
2264 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2265 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2266 tab_next_row (direct);
2269 tab_offset (direct, 0, -1);
2272 /* Statistical calculations. */
2274 /* Returns the value of the gamma (factorial) function for an integer
2277 gamma_int (double x)
2282 for (i = 2; i < x; i++)
2287 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2289 static inline double
2290 Pr (int a, int b, int c, int d)
2292 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2293 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2294 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2295 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2296 / gamma_int (a + b + c + d + 1.));
2299 /* Swap the contents of A and B. */
2301 swap (int *a, int *b)
2308 /* Calculate significance for Fisher's exact test as specified in
2309 _SPSS Statistical Algorithms_, Appendix 5. */
2311 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2315 if (min (c, d) < min (a, b))
2316 swap (&a, &c), swap (&b, &d);
2317 if (min (b, d) < min (a, c))
2318 swap (&a, &b), swap (&c, &d);
2322 swap (&a, &b), swap (&c, &d);
2324 swap (&a, &c), swap (&b, &d);
2328 for (x = 0; x <= a; x++)
2329 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2331 *fisher2 = *fisher1;
2332 for (x = 1; x <= b; x++)
2333 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2336 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2337 columns with values COLS and N_ROWS rows with values ROWS. Values
2338 in the matrix sum to W. */
2340 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2341 double *fisher1, double *fisher2)
2345 chisq[0] = chisq[1] = 0.;
2346 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2347 *fisher1 = *fisher2 = SYSMIS;
2349 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2351 if (ns_rows <= 1 || ns_cols <= 1)
2353 chisq[0] = chisq[1] = SYSMIS;
2357 for (r = 0; r < n_rows; r++)
2358 for (c = 0; c < n_cols; c++)
2360 const double expected = row_tot[r] * col_tot[c] / W;
2361 const double freq = mat[n_cols * r + c];
2362 const double residual = freq - expected;
2365 chisq[0] += residual * residual / expected;
2367 chisq[1] += freq * log (expected / freq);
2378 /* Calculate Yates and Fisher exact test. */
2379 if (ns_cols == 2 && ns_rows == 2)
2381 double f11, f12, f21, f22;
2387 for (i = j = 0; i < n_cols; i++)
2388 if (col_tot[i] != 0.)
2397 f11 = mat[nz_cols[0]];
2398 f12 = mat[nz_cols[1]];
2399 f21 = mat[nz_cols[0] + n_cols];
2400 f22 = mat[nz_cols[1] + n_cols];
2405 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2408 chisq[3] = (W * x * x
2409 / (f11 + f12) / (f21 + f22)
2410 / (f11 + f21) / (f12 + f22));
2418 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2419 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2422 /* Calculate Mantel-Haenszel. */
2423 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2425 double r, ase_0, ase_1;
2426 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2428 chisq[4] = (W - 1.) * r * r;
2433 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2434 ASE_1, and ase_0 into ASE_0. The row and column values must be
2435 passed in X and Y. */
2437 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2439 double SX, SY, S, T;
2441 double sum_XYf, sum_X2Y2f;
2442 double sum_Xr, sum_X2r;
2443 double sum_Yc, sum_Y2c;
2446 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2447 for (j = 0; j < n_cols; j++)
2449 double fij = mat[j + i * n_cols];
2450 double product = X[i] * Y[j];
2451 double temp = fij * product;
2453 sum_X2Y2f += temp * product;
2456 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2458 sum_Xr += X[i] * row_tot[i];
2459 sum_X2r += X[i] * X[i] * row_tot[i];
2463 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2465 sum_Yc += Y[i] * col_tot[i];
2466 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2470 S = sum_XYf - sum_Xr * sum_Yc / W;
2471 SX = sum_X2r - sum_Xr * sum_Xr / W;
2472 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2475 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2480 for (s = c = 0., i = 0; i < n_rows; i++)
2481 for (j = 0; j < n_cols; j++)
2483 double Xresid, Yresid;
2486 Xresid = X[i] - Xbar;
2487 Yresid = Y[j] - Ybar;
2488 temp = (T * Xresid * Yresid
2490 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2491 y = mat[j + i * n_cols] * temp * temp - c;
2496 *ase_1 = sqrt (s) / (T * T);
2500 static double somers_d_v[3];
2501 static double somers_d_ase[3];
2502 static double somers_d_t[3];
2504 /* Calculate symmetric statistics and their asymptotic standard
2505 errors. Returns 0 if none could be calculated. */
2507 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2508 double t[N_SYMMETRIC])
2510 int q = min (ns_rows, ns_cols);
2519 for (i = 0; i < N_SYMMETRIC; i++)
2520 v[i] = ase[i] = t[i] = SYSMIS;
2523 /* Phi, Cramer's V, contingency coefficient. */
2524 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2526 double Xp = 0.; /* Pearson chi-square. */
2531 for (r = 0; r < n_rows; r++)
2532 for (c = 0; c < n_cols; c++)
2534 const double expected = row_tot[r] * col_tot[c] / W;
2535 const double freq = mat[n_cols * r + c];
2536 const double residual = freq - expected;
2539 Xp += residual * residual / expected;
2543 if (cmd.a_statistics[CRS_ST_PHI])
2545 v[0] = sqrt (Xp / W);
2546 v[1] = sqrt (Xp / (W * (q - 1)));
2548 if (cmd.a_statistics[CRS_ST_CC])
2549 v[2] = sqrt (Xp / (Xp + W));
2552 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2553 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2558 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2565 for (r = 0; r < n_rows; r++)
2566 Dr -= row_tot[r] * row_tot[r];
2567 for (c = 0; c < n_cols; c++)
2568 Dc -= col_tot[c] * col_tot[c];
2574 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2575 for (c = 0; c < n_cols; c++)
2579 for (r = 0; r < n_rows; r++)
2580 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2590 for (i = 0; i < n_rows; i++)
2594 for (j = 1; j < n_cols; j++)
2595 Cij += col_tot[j] - cum[j + i * n_cols];
2598 for (j = 1; j < n_cols; j++)
2599 Dij += cum[j + (i - 1) * n_cols];
2603 double fij = mat[j + i * n_cols];
2609 assert (j < n_cols);
2611 Cij -= col_tot[j] - cum[j + i * n_cols];
2612 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2616 Cij += cum[j - 1 + (i - 1) * n_cols];
2617 Dij -= cum[j + (i - 1) * n_cols];
2623 if (cmd.a_statistics[CRS_ST_BTAU])
2624 v[3] = (P - Q) / sqrt (Dr * Dc);
2625 if (cmd.a_statistics[CRS_ST_CTAU])
2626 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2627 if (cmd.a_statistics[CRS_ST_GAMMA])
2628 v[5] = (P - Q) / (P + Q);
2630 /* ASE for tau-b, tau-c, gamma. Calculations could be
2631 eliminated here, at expense of memory. */
2636 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2637 for (i = 0; i < n_rows; i++)
2641 for (j = 1; j < n_cols; j++)
2642 Cij += col_tot[j] - cum[j + i * n_cols];
2645 for (j = 1; j < n_cols; j++)
2646 Dij += cum[j + (i - 1) * n_cols];
2650 double fij = mat[j + i * n_cols];
2652 if (cmd.a_statistics[CRS_ST_BTAU])
2654 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2655 + v[3] * (row_tot[i] * Dc
2656 + col_tot[j] * Dr));
2657 btau_cum += fij * temp * temp;
2661 const double temp = Cij - Dij;
2662 ctau_cum += fij * temp * temp;
2665 if (cmd.a_statistics[CRS_ST_GAMMA])
2667 const double temp = Q * Cij - P * Dij;
2668 gamma_cum += fij * temp * temp;
2671 if (cmd.a_statistics[CRS_ST_D])
2673 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2674 - (P - Q) * (W - row_tot[i]));
2675 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2676 - (Q - P) * (W - col_tot[j]));
2681 assert (j < n_cols);
2683 Cij -= col_tot[j] - cum[j + i * n_cols];
2684 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2688 Cij += cum[j - 1 + (i - 1) * n_cols];
2689 Dij -= cum[j + (i - 1) * n_cols];
2695 btau_var = ((btau_cum
2696 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2698 if (cmd.a_statistics[CRS_ST_BTAU])
2700 ase[3] = sqrt (btau_var);
2701 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2704 if (cmd.a_statistics[CRS_ST_CTAU])
2706 ase[4] = ((2 * q / ((q - 1) * W * W))
2707 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2708 t[4] = v[4] / ase[4];
2710 if (cmd.a_statistics[CRS_ST_GAMMA])
2712 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2713 t[5] = v[5] / (2. / (P + Q)
2714 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2716 if (cmd.a_statistics[CRS_ST_D])
2718 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2719 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2720 somers_d_t[0] = (somers_d_v[0]
2722 * sqrt (ctau_cum - sqr (P - Q) / W)));
2723 somers_d_v[1] = (P - Q) / Dc;
2724 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2725 somers_d_t[1] = (somers_d_v[1]
2727 * sqrt (ctau_cum - sqr (P - Q) / W)));
2728 somers_d_v[2] = (P - Q) / Dr;
2729 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2730 somers_d_t[2] = (somers_d_v[2]
2732 * sqrt (ctau_cum - sqr (P - Q) / W)));
2738 /* Spearman correlation, Pearson's r. */
2739 if (cmd.a_statistics[CRS_ST_CORR])
2741 double *R = local_alloc (sizeof *R * n_rows);
2742 double *C = local_alloc (sizeof *C * n_cols);
2745 double y, t, c = 0., s = 0.;
2750 R[i] = s + (row_tot[i] + 1.) / 2.;
2757 assert (i < n_rows);
2762 double y, t, c = 0., s = 0.;
2767 C[j] = s + (col_tot[j] + 1.) / 2;
2774 assert (j < n_cols);
2778 calc_r (R, C, &v[6], &t[6], &ase[6]);
2784 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2788 /* Cohen's kappa. */
2789 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2791 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2794 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2795 i < ns_rows; i++, j++)
2799 while (col_tot[j] == 0.)
2802 prod = row_tot[i] * col_tot[j];
2803 sum = row_tot[i] + col_tot[j];
2805 sum_fii += mat[j + i * n_cols];
2807 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2808 sum_riciri_ci += prod * sum;
2810 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2811 for (j = 0; j < ns_cols; j++)
2813 double sum = row_tot[i] + col_tot[j];
2814 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2817 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2819 ase[8] = sqrt ((W * W * sum_rici
2820 + sum_rici * sum_rici
2821 - W * sum_riciri_ci)
2822 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2824 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2825 / sqr (W * W - sum_rici))
2826 + ((2. * (W - sum_fii)
2827 * (2. * sum_fii * sum_rici
2828 - W * sum_fiiri_ci))
2829 / cube (W * W - sum_rici))
2830 + (sqr (W - sum_fii)
2831 * (W * sum_fijri_ci2 - 4.
2832 * sum_rici * sum_rici)
2833 / pow4 (W * W - sum_rici))));
2835 t[8] = v[8] / ase[8];
2842 /* Calculate risk estimate. */
2844 calc_risk (double *value, double *upper, double *lower, union value *c)
2846 double f11, f12, f21, f22;
2852 for (i = 0; i < 3; i++)
2853 value[i] = upper[i] = lower[i] = SYSMIS;
2856 if (ns_rows != 2 || ns_cols != 2)
2863 for (i = j = 0; i < n_cols; i++)
2864 if (col_tot[i] != 0.)
2873 f11 = mat[nz_cols[0]];
2874 f12 = mat[nz_cols[1]];
2875 f21 = mat[nz_cols[0] + n_cols];
2876 f22 = mat[nz_cols[1] + n_cols];
2878 c[0] = cols[nz_cols[0]];
2879 c[1] = cols[nz_cols[1]];
2882 value[0] = (f11 * f22) / (f12 * f21);
2883 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2884 lower[0] = value[0] * exp (-1.960 * v);
2885 upper[0] = value[0] * exp (1.960 * v);
2887 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2888 v = sqrt ((f12 / (f11 * (f11 + f12)))
2889 + (f22 / (f21 * (f21 + f22))));
2890 lower[1] = value[1] * exp (-1.960 * v);
2891 upper[1] = value[1] * exp (1.960 * v);
2893 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2894 v = sqrt ((f11 / (f12 * (f11 + f12)))
2895 + (f21 / (f22 * (f21 + f22))));
2896 lower[2] = value[2] * exp (-1.960 * v);
2897 upper[2] = value[2] * exp (1.960 * v);
2902 /* Calculate directional measures. */
2904 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2905 double t[N_DIRECTIONAL])
2910 for (i = 0; i < N_DIRECTIONAL; i++)
2911 v[i] = ase[i] = t[i] = SYSMIS;
2915 if (cmd.a_statistics[CRS_ST_LAMBDA])
2917 double *fim = xmalloc (sizeof *fim * n_rows);
2918 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2919 double *fmj = xmalloc (sizeof *fmj * n_cols);
2920 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2921 double sum_fim, sum_fmj;
2923 int rm_index, cm_index;
2926 /* Find maximum for each row and their sum. */
2927 for (sum_fim = 0., i = 0; i < n_rows; i++)
2929 double max = mat[i * n_cols];
2932 for (j = 1; j < n_cols; j++)
2933 if (mat[j + i * n_cols] > max)
2935 max = mat[j + i * n_cols];
2939 sum_fim += fim[i] = max;
2940 fim_index[i] = index;
2943 /* Find maximum for each column. */
2944 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2946 double max = mat[j];
2949 for (i = 1; i < n_rows; i++)
2950 if (mat[j + i * n_cols] > max)
2952 max = mat[j + i * n_cols];
2956 sum_fmj += fmj[j] = max;
2957 fmj_index[j] = index;
2960 /* Find maximum row total. */
2963 for (i = 1; i < n_rows; i++)
2964 if (row_tot[i] > rm)
2970 /* Find maximum column total. */
2973 for (j = 1; j < n_cols; j++)
2974 if (col_tot[j] > cm)
2980 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2981 v[1] = (sum_fmj - rm) / (W - rm);
2982 v[2] = (sum_fim - cm) / (W - cm);
2984 /* ASE1 for Y given X. */
2988 for (accum = 0., i = 0; i < n_rows; i++)
2989 for (j = 0; j < n_cols; j++)
2991 const int deltaj = j == cm_index;
2992 accum += (mat[j + i * n_cols]
2993 * sqr ((j == fim_index[i])
2998 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3001 /* ASE0 for Y given X. */
3005 for (accum = 0., i = 0; i < n_rows; i++)
3006 if (cm_index != fim_index[i])
3007 accum += (mat[i * n_cols + fim_index[i]]
3008 + mat[i * n_cols + cm_index]);
3009 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3012 /* ASE1 for X given Y. */
3016 for (accum = 0., i = 0; i < n_rows; i++)
3017 for (j = 0; j < n_cols; j++)
3019 const int deltaj = i == rm_index;
3020 accum += (mat[j + i * n_cols]
3021 * sqr ((i == fmj_index[j])
3026 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3029 /* ASE0 for X given Y. */
3033 for (accum = 0., j = 0; j < n_cols; j++)
3034 if (rm_index != fmj_index[j])
3035 accum += (mat[j + n_cols * fmj_index[j]]
3036 + mat[j + n_cols * rm_index]);
3037 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3040 /* Symmetric ASE0 and ASE1. */
3045 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3046 for (j = 0; j < n_cols; j++)
3048 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3049 int temp1 = (i == rm_index) + (j == cm_index);
3050 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3051 accum1 += (mat[j + i * n_cols]
3052 * sqr (temp0 + (v[0] - 1.) * temp1));
3054 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3055 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3056 / (2. * W - rm - cm));
3065 double sum_fij2_ri, sum_fij2_ci;
3066 double sum_ri2, sum_cj2;
3068 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3069 for (j = 0; j < n_cols; j++)
3071 double temp = sqr (mat[j + i * n_cols]);
3072 sum_fij2_ri += temp / row_tot[i];
3073 sum_fij2_ci += temp / col_tot[j];
3076 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3077 sum_ri2 += row_tot[i] * row_tot[i];
3079 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3080 sum_cj2 += col_tot[j] * col_tot[j];
3082 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3083 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3087 if (cmd.a_statistics[CRS_ST_UC])
3089 double UX, UY, UXY, P;
3090 double ase1_yx, ase1_xy, ase1_sym;
3093 for (UX = 0., i = 0; i < n_rows; i++)
3094 if (row_tot[i] > 0.)
3095 UX -= row_tot[i] / W * log (row_tot[i] / W);
3097 for (UY = 0., j = 0; j < n_cols; j++)
3098 if (col_tot[j] > 0.)
3099 UY -= col_tot[j] / W * log (col_tot[j] / W);
3101 for (UXY = P = 0., i = 0; i < n_rows; i++)
3102 for (j = 0; j < n_cols; j++)
3104 double entry = mat[j + i * n_cols];
3109 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3110 UXY -= entry / W * log (entry / W);
3113 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3114 for (j = 0; j < n_cols; j++)
3116 double entry = mat[j + i * n_cols];
3121 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3122 + (UX - UXY) * log (col_tot[j] / W));
3123 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3124 + (UY - UXY) * log (row_tot[i] / W));
3125 ase1_sym += entry * sqr ((UXY
3126 * log (row_tot[i] * col_tot[j] / (W * W)))
3127 - (UX + UY) * log (entry / W));
3130 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3131 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3132 t[5] = v[5] / ((2. / (W * (UX + UY)))
3133 * sqrt (P - sqr (UX + UY - UXY) / W));
3135 v[6] = (UX + UY - UXY) / UX;
3136 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3137 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3139 v[7] = (UX + UY - UXY) / UY;
3140 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3141 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3145 if (cmd.a_statistics[CRS_ST_D])
3150 calc_symmetric (NULL, NULL, NULL);
3151 for (i = 0; i < 3; i++)
3153 v[8 + i] = somers_d_v[i];
3154 ase[8 + i] = somers_d_ase[i];
3155 t[8 + i] = somers_d_t[i];
3160 if (cmd.a_statistics[CRS_ST_ETA])
3163 double sum_Xr, sum_X2r;
3167 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3169 sum_Xr += rows[i].f * row_tot[i];
3170 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3172 SX = sum_X2r - sum_Xr * sum_Xr / W;
3174 for (SXW = 0., j = 0; j < n_cols; j++)
3178 for (cum = 0., i = 0; i < n_rows; i++)
3180 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3181 cum += rows[i].f * mat[j + i * n_cols];
3184 SXW -= cum * cum / col_tot[j];
3186 v[11] = sqrt (1. - SXW / SX);
3190 double sum_Yc, sum_Y2c;
3194 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3196 sum_Yc += cols[i].f * col_tot[i];
3197 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3199 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3201 for (SYW = 0., i = 0; i < n_rows; i++)
3205 for (cum = 0., j = 0; j < n_cols; j++)
3207 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3208 cum += cols[j].f * mat[j + i * n_cols];
3211 SYW -= cum * cum / row_tot[i];
3213 v[12] = sqrt (1. - SYW / SY);