1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
32 /* AIX requires this to be the first thing in the file. */
35 #define alloca __builtin_alloca
43 #ifndef alloca /* predefined by HP cc +Olibcalls */
54 #include "algorithm.h"
58 #include "dcdflib/cdflib.h"
67 #include "value-labels.h"
71 #include "debug-print.h"
77 +missing=miss:!table/include/report;
78 +write[wr_]=none,cells,all;
79 +format=fmt:!labels/nolabels/novallabs,
82 tabl:!tables/notables,
85 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
87 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
93 /* Number of chi-square statistics. */
96 /* Number of symmetric statistics. */
99 /* Number of directional statistics. */
100 #define N_DIRECTIONAL 13
102 /* A single table entry for general mode. */
105 int table; /* Flattened table number. */
108 double freq; /* Frequency count. */
109 double *data; /* Crosstabulation table for integer mode. */
112 union value v[1]; /* Values. */
115 /* A crosstabulation. */
118 int nvar; /* Number of variables. */
119 double missing; /* Missing cases count. */
120 int ofs; /* Integer mode: Offset into sorted_tab[]. */
121 struct variable *v[2]; /* At least two variables; sorted by
122 larger indices first. */
125 /* Indexes into crosstab.v. */
132 /* General mode crosstabulation table. */
133 static struct hsh_table *gen_tab; /* Hash table. */
134 static int n_sorted_tab; /* Number of entries in sorted_tab. */
135 static struct table_entry **sorted_tab; /* Sorted table. */
137 /* Variables specifies on VARIABLES. */
138 static struct variable **variables;
139 static size_t variables_cnt;
142 static struct crosstab **xtab;
145 /* Integer or general mode? */
154 static int num_cells; /* Number of cells requested. */
155 static int cells[8]; /* Cells requested. */
156 static int expected; /* Nonzero if expected value is needed. */
159 static int write; /* One of WR_* that specifies the WRITE style. */
161 /* Command parsing info. */
162 static struct cmd_crosstabs cmd;
165 static struct pool *pl_tc; /* For table cells. */
166 static struct pool *pl_col; /* For column data. */
168 static int internal_cmd_crosstabs (void);
169 static void precalc (void);
170 static int calc_general (struct ccase *);
171 static int calc_integer (struct ccase *);
172 static void postcalc (void);
173 static void submit (struct tab_table *);
176 static void debug_print (void);
177 static void print_table_entries (struct table_entry **tab);
180 /* Parse and execute CROSSTABS, then clean up. */
184 int result = internal_cmd_crosstabs ();
187 pool_destroy (pl_tc);
188 pool_destroy (pl_col);
193 /* Parses and executes the CROSSTABS procedure. */
195 internal_cmd_crosstabs (void)
201 pl_tc = pool_create ();
202 pl_col = pool_create ();
204 lex_match_id ("CROSSTABS");
205 if (!parse_crosstabs (&cmd))
209 /* Needs variables. */
213 mode = variables ? INTEGER : GENERAL;
219 cmd.a_cells[CRS_CL_COUNT] = 1;
227 for (i = 0; i < CRS_CL_count; i++)
232 cmd.a_cells[CRS_CL_COUNT] = 1;
233 cmd.a_cells[CRS_CL_ROW] = 1;
234 cmd.a_cells[CRS_CL_COLUMN] = 1;
235 cmd.a_cells[CRS_CL_TOTAL] = 1;
237 if (cmd.a_cells[CRS_CL_ALL])
239 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_ALL] = 0;
243 cmd.a_cells[CRS_CL_NONE] = 0;
244 for (num_cells = i = 0; i < CRS_CL_count; i++)
247 if (i >= CRS_CL_EXPECTED)
249 cmd.a_cells[num_cells++] = i;
254 if (cmd.sbc_statistics)
259 for (i = 0; i < CRS_ST_count; i++)
260 if (cmd.a_statistics[i])
263 cmd.a_statistics[CRS_ST_CHISQ] = 1;
264 if (cmd.a_statistics[CRS_ST_ALL])
265 for (i = 0; i < CRS_ST_count; i++)
266 cmd.a_statistics[i] = 1;
270 if (cmd.miss == CRS_REPORT && mode == GENERAL)
272 msg (SE, _("Missing mode REPORT not allowed in general mode. "
273 "Assuming MISSING=TABLE."));
274 cmd.miss = CRS_TABLE;
278 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
279 cmd.a_write[CRS_WR_ALL] = 0;
280 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
282 msg (SE, _("Write mode ALL not allowed in general mode. "
283 "Assuming WRITE=CELLS."));
284 cmd.a_write[CRS_WR_CELLS] = 1;
287 && (cmd.a_write[CRS_WR_NONE]
288 + cmd.a_write[CRS_WR_ALL]
289 + cmd.a_write[CRS_WR_CELLS] == 0))
290 cmd.a_write[CRS_WR_CELLS] = 1;
291 if (cmd.a_write[CRS_WR_CELLS])
292 write = CRS_WR_CELLS;
293 else if (cmd.a_write[CRS_WR_ALL])
298 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
303 /* Parses the TABLES subcommand. */
305 crs_custom_tables (struct cmd_crosstabs *cmd unused)
307 struct var_set *var_set;
309 struct variable ***by = NULL;
314 /* Ensure that this is a TABLES subcommand. */
315 if (!lex_match_id ("TABLES")
316 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
321 if (variables != NULL)
322 var_set = var_set_create_from_array (variables, variables_cnt);
324 var_set = var_set_create_from_dict (default_dict);
325 assert (var_set != NULL);
329 by = xrealloc (by, sizeof *by * (n_by + 1));
330 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
331 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
332 PV_NO_DUPLICATE | PV_NO_SCRATCH))
337 if (!lex_match (T_BY))
341 lex_error (_("expecting BY"));
350 int *by_iter = xcalloc (sizeof *by_iter * n_by);
353 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
354 for (i = 0; i < nx; i++)
358 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
365 for (i = 0; i < n_by; i++)
366 x->v[i] = by[i][by_iter[i]];
372 for (i = n_by - 1; i >= 0; i--)
374 if (++by_iter[i] < by_nvar[i])
387 /* All return paths lead here. */
391 for (i = 0; i < n_by; i++)
397 var_set_destroy (var_set);
402 /* Parses the VARIABLES subcommand. */
404 crs_custom_variables (struct cmd_crosstabs *cmd unused)
408 msg (SE, _("VARIABLES must be specified before TABLES."));
416 int orig_nv = variables_cnt;
421 if (!parse_variables (default_dict, &variables, &variables_cnt,
422 (PV_APPEND | PV_NUMERIC
423 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
428 lex_error ("expecting `('");
433 if (!lex_force_int ())
435 min = lex_integer ();
440 if (!lex_force_int ())
442 max = lex_integer ();
445 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
453 lex_error ("expecting `)'");
458 for (i = orig_nv; i < variables_cnt; i++)
460 variables[i]->p.crs.min = min;
461 variables[i]->p.crs.max = max + 1.;
462 variables[i]->p.crs.count = max - min + 1;
481 printf ("CROSSTABS\n");
483 if (variables != NULL)
487 printf ("\t/VARIABLES=");
488 for (i = 0; i < variables_cnt; i++)
490 struct variable *v = variables[i];
492 printf ("%s ", v->name);
493 if (i < variables_cnt - 1)
495 struct variable *nv = variables[i + 1];
497 if (v->p.crs.min == nv->p.crs.min
498 && v->p.crs.max == nv->p.crs.max)
501 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
509 printf ("\t/TABLES=");
510 for (i = 0; i < nxtab; i++)
512 struct crosstab *x = xtab[i];
517 for (j = 0; j < x->nvar; j++)
521 printf ("%s", x->v[j]->name);
527 #endif /* DEBUGGING */
529 /* Data file processing. */
531 static int compare_table_entry (const void *, const void *, void *);
532 static unsigned hash_table_entry (const void *, void *);
534 /* Set up the crosstabulation tables for processing. */
540 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
550 for (i = 0; i < nxtab; i++)
552 struct crosstab *x = xtab[i];
557 x->ofs = n_sorted_tab;
559 for (j = 2; j < x->nvar; j++)
560 count *= x->v[j - 2]->p.crs.count;
562 sorted_tab = xrealloc (sorted_tab,
563 sizeof *sorted_tab * (n_sorted_tab + count));
564 v = local_alloc (sizeof *v * x->nvar);
565 for (j = 2; j < x->nvar; j++)
566 v[j] = x->v[j]->p.crs.min;
567 for (j = 0; j < count; j++)
569 struct table_entry *te;
572 te = sorted_tab[n_sorted_tab++]
573 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
577 const int mat_size = (x->v[0]->p.crs.count
578 * x->v[1]->p.crs.count);
581 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
582 for (m = 0; m < mat_size; m++)
586 for (k = 2; k < x->nvar; k++)
588 for (k = 2; k < x->nvar; k++)
589 if (++v[k] >= x->v[k]->p.crs.max)
590 v[k] = x->v[k]->p.crs.min;
597 sorted_tab = xrealloc (sorted_tab,
598 sizeof *sorted_tab * (n_sorted_tab + 1));
599 sorted_tab[n_sorted_tab] = NULL;
603 /* Form crosstabulations for general mode. */
605 calc_general (struct ccase *c)
608 double weight = dict_get_case_weight (default_dict, c);
610 /* Flattened current table index. */
613 for (t = 0; t < nxtab; t++)
615 struct crosstab *x = xtab[t];
616 const size_t entry_size = (sizeof (struct table_entry)
617 + sizeof (union value) * (x->nvar - 1));
618 struct table_entry *te = local_alloc (entry_size);
620 /* Construct table entry for the current record and table. */
626 for (j = 0; j < x->nvar; j++)
628 if ((cmd.miss == CRS_TABLE
629 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
630 || (cmd.miss == CRS_INCLUDE
631 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
633 x->missing += weight;
637 if (x->v[j]->type == NUMERIC)
638 te->v[j].f = c->data[x->v[j]->fv].f;
641 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
643 /* Necessary in order to simplify comparisons. */
644 memset (&te->v[j].s[x->v[j]->width], 0,
645 sizeof (union value) - x->v[j]->width);
650 /* Add record to hash table. */
652 struct table_entry **tepp
653 = (struct table_entry **) hsh_probe (gen_tab, te);
656 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
659 memcpy (tep, te, entry_size);
664 (*tepp)->u.freq += weight;
675 calc_integer (struct ccase *c)
678 double weight = dict_get_case_weight (default_dict, c);
680 /* Flattened current table index. */
683 for (t = 0; t < nxtab; t++)
685 struct crosstab *x = xtab[t];
690 for (i = 0; i < x->nvar; i++)
692 struct variable *const v = x->v[i];
693 double value = c->data[v->fv].f;
695 /* Note that the first test also rules out SYSMIS. */
696 if ((value < v->p.crs.min || value >= v->p.crs.max)
697 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
699 x->missing += weight;
705 ofs += fact * ((int) value - v->p.crs.min);
706 fact *= v->p.crs.count;
711 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
712 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
713 const int col_dim = x->v[COL_VAR]->p.crs.count;
715 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
725 /* Print out all table entries in NULL-terminated TAB for use by a
726 debugger (a person, not a program). */
728 print_table_entries (struct table_entry **tab)
730 printf ("raw crosstabulation data:\n");
733 const struct crosstab *x = xtab[(*tab)->table];
736 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
737 for (i = 0; i < x->nvar; i++)
741 printf ("%s:", x->v[i]->name);
743 if (x->v[i]->type == NUMERIC)
744 printf ("%g", (*tab)->v[i].f);
746 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
754 /* Compare the table_entry's at A and B and return a strcmp()-type
757 compare_table_entry (const void *a_, const void *b_, void *foo unused)
759 const struct table_entry *a = a_;
760 const struct table_entry *b = b_;
762 if (a->table > b->table)
764 else if (a->table < b->table)
768 const struct crosstab *x = xtab[a->table];
771 for (i = x->nvar - 1; i >= 0; i--)
772 if (x->v[i]->type == NUMERIC)
774 const double diffnum = a->v[i].f - b->v[i].f;
777 else if (diffnum > 0)
782 assert (x->v[i]->type == ALPHA);
784 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
794 /* Calculate a hash value from table_entry PA. */
796 hash_table_entry (const void *pa, void *foo unused)
798 const struct table_entry *a = pa;
799 unsigned long hash = a->table;
802 /* Hash formula from _SPSS Statistical Algorithms_. */
803 for (i = 0; i < xtab[a->table]->nvar; i++)
805 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
806 hash ^= a->v[i].hash[0];
807 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
808 hash ^= a->v[i].hash[1];
815 /* Post-data reading calculations. */
817 static struct table_entry **find_pivot_extent (struct table_entry **,
818 int *cnt, int pivot);
819 static void enum_var_values (struct table_entry **entries, int entry_cnt,
821 union value **values, int *value_cnt);
822 static void output_pivot_table (struct table_entry **, struct table_entry **,
823 double **, double **, double **,
824 int *, int *, int *);
825 static void make_summary_table (void);
832 n_sorted_tab = hsh_count (gen_tab);
833 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
835 print_table_entries (sorted_tab);
839 make_summary_table ();
841 /* Identify all the individual crosstabulation tables, and deal with
844 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
845 int pc = n_sorted_tab; /* Pivot count. */
847 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
848 int maxrows = 0, maxcols = 0, maxcells = 0;
852 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
856 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
857 &maxrows, &maxcols, &maxcells);
866 hsh_destroy (gen_tab);
869 static void insert_summary (struct tab_table *, int tab_index, double valid);
871 /* Output a table summarizing the cases processed. */
873 make_summary_table (void)
875 struct tab_table *summary;
877 struct table_entry **pb = sorted_tab, **pe;
878 int pc = n_sorted_tab;
881 summary = tab_create (7, 3 + nxtab, 1);
882 tab_title (summary, 0, _("Summary."));
883 tab_headers (summary, 1, 0, 3, 0);
884 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
885 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
886 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
887 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
888 tab_hline (summary, TAL_1, 1, 6, 1);
889 tab_hline (summary, TAL_1, 1, 6, 2);
890 tab_vline (summary, TAL_1, 3, 1, 1);
891 tab_vline (summary, TAL_1, 5, 1, 1);
895 for (i = 0; i < 3; i++)
897 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
898 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
901 tab_offset (summary, 0, 3);
907 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
911 while (cur_tab < (*pb)->table)
912 insert_summary (summary, cur_tab++, 0.);
915 for (valid = 0.; pb < pe; pb++)
916 valid += (*pb)->u.freq;
919 const struct crosstab *const x = xtab[(*pb)->table];
920 const int n_cols = x->v[COL_VAR]->p.crs.count;
921 const int n_rows = x->v[ROW_VAR]->p.crs.count;
922 const int count = n_cols * n_rows;
924 for (valid = 0.; pb < pe; pb++)
926 const double *data = (*pb)->u.data;
929 for (i = 0; i < count; i++)
933 insert_summary (summary, cur_tab++, valid);
938 while (cur_tab < nxtab)
939 insert_summary (summary, cur_tab++, 0.);
944 /* Inserts a line into T describing the crosstabulation at index
945 TAB_INDEX, which has VALID valid observations. */
947 insert_summary (struct tab_table *t, int tab_index, double valid)
949 struct crosstab *x = xtab[tab_index];
951 tab_hline (t, TAL_1, 0, 6, 0);
953 /* Crosstabulation name. */
955 char *buf = local_alloc (128 * x->nvar);
959 for (i = 0; i < x->nvar; i++)
962 cp = stpcpy (cp, " * ");
964 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
966 tab_text (t, 0, 0, TAB_LEFT, buf);
971 /* Counts and percentages. */
981 for (i = 0; i < 3; i++)
983 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
984 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
995 static struct tab_table *table; /* Crosstabulation table. */
996 static struct tab_table *chisq; /* Chi-square table. */
997 static struct tab_table *sym; /* Symmetric measures table. */
998 static struct tab_table *risk; /* Risk estimate table. */
999 static struct tab_table *direct; /* Directional measures table. */
1002 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
1004 /* Column values, number of columns. */
1005 static union value *cols;
1008 /* Row values, number of rows. */
1009 static union value *rows;
1012 /* Number of statistically interesting columns/rows (columns/rows with
1014 static int ns_cols, ns_rows;
1016 /* Crosstabulation. */
1017 static struct crosstab *x;
1019 /* Number of variables from the crosstabulation to consider. This is
1020 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1023 /* Matrix contents. */
1024 static double *mat; /* Matrix proper. */
1025 static double *row_tot; /* Row totals. */
1026 static double *col_tot; /* Column totals. */
1027 static double W; /* Grand total. */
1029 static void display_dimensions (struct tab_table *, int first_difference,
1030 struct table_entry *);
1031 static void display_crosstabulation (void);
1032 static void display_chisq (void);
1033 static void display_symmetric (void);
1034 static void display_risk (void);
1035 static void display_directional (void);
1036 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1037 static void table_value_missing (struct tab_table *table, int c, int r,
1038 unsigned char opt, const union value *v,
1039 const struct variable *var);
1040 static void delete_missing (void);
1042 /* Output pivot table beginning at PB and continuing until PE,
1043 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1044 hold *MAXROWS entries. */
1046 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1047 double **matp, double **row_totp, double **col_totp,
1048 int *maxrows, int *maxcols, int *maxcells)
1051 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1052 int tc = pe - pb; /* Table count. */
1054 /* Table entry for header comparison. */
1055 struct table_entry *cmp;
1057 x = xtab[(*pb)->table];
1058 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1060 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1062 /* Crosstabulation table initialization. */
1065 table = tab_create (nvar + n_cols,
1066 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1067 tab_headers (table, nvar - 1, 0, 2, 0);
1069 /* First header line. */
1070 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1071 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1073 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1075 /* Second header line. */
1079 for (i = 2; i < nvar; i++)
1080 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1081 TAB_RIGHT | TAT_TITLE,
1082 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1083 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1084 x->v[ROW_VAR]->name);
1085 for (i = 0; i < n_cols; i++)
1086 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1088 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1091 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1092 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1096 char *title = local_alloc (x->nvar * 64 + 128);
1100 if (cmd.pivot == CRS_PIVOT)
1101 for (i = 0; i < nvar; i++)
1104 cp = stpcpy (cp, " by ");
1105 cp = stpcpy (cp, x->v[i]->name);
1109 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1110 for (i = 2; i < nvar; i++)
1112 char buf[64], *bufp;
1117 cp = stpcpy (cp, x->v[i]->name);
1119 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1120 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1122 cp = stpcpy (cp, bufp);
1126 cp = stpcpy (cp, " [");
1127 for (i = 0; i < num_cells; i++)
1135 static const struct tuple cell_names[] =
1137 {CRS_CL_COUNT, N_("count")},
1138 {CRS_CL_ROW, N_("row %")},
1139 {CRS_CL_COLUMN, N_("column %")},
1140 {CRS_CL_TOTAL, N_("total %")},
1141 {CRS_CL_EXPECTED, N_("expected")},
1142 {CRS_CL_RESIDUAL, N_("residual")},
1143 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1144 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1148 const struct tuple *t;
1150 for (t = cell_names; t->value != cells[i]; t++)
1151 assert (t->value != -1);
1153 cp = stpcpy (cp, ", ");
1154 cp = stpcpy (cp, gettext (t->name));
1158 tab_title (table, 0, title);
1162 tab_offset (table, 0, 2);
1167 /* Chi-square table initialization. */
1168 if (cmd.a_statistics[CRS_ST_CHISQ])
1170 chisq = tab_create (6 + (nvar - 2),
1171 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1172 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1174 tab_title (chisq, 0, "Chi-square tests.");
1176 tab_offset (chisq, nvar - 2, 0);
1177 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1178 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1179 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1180 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1181 _("Asymp. Sig. (2-sided)"));
1182 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1183 _("Exact. Sig. (2-sided)"));
1184 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1185 _("Exact. Sig. (1-sided)"));
1187 tab_offset (chisq, 0, 1);
1192 /* Symmetric measures. */
1193 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1194 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1195 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1196 || cmd.a_statistics[CRS_ST_KAPPA])
1198 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1199 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1200 tab_title (sym, 0, "Symmetric measures.");
1202 tab_offset (sym, nvar - 2, 0);
1203 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1204 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1205 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1206 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1207 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1208 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1209 tab_offset (sym, 0, 1);
1214 /* Risk estimate. */
1215 if (cmd.a_statistics[CRS_ST_RISK])
1217 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1218 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1219 tab_title (risk, 0, "Risk estimate.");
1221 tab_offset (risk, nvar - 2, 0);
1222 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1223 _(" 95%% Confidence Interval"));
1224 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1225 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1226 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1227 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1228 tab_hline (risk, TAL_1, 2, 3, 1);
1229 tab_vline (risk, TAL_1, 2, 0, 1);
1230 tab_offset (risk, 0, 2);
1235 /* Directional measures. */
1236 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1237 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1239 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1240 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1241 tab_title (direct, 0, "Directional measures.");
1243 tab_offset (direct, nvar - 2, 0);
1244 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1245 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1246 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1247 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1248 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1249 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1250 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1251 tab_offset (direct, 0, 1);
1258 /* Find pivot subtable if applicable. */
1259 te = find_pivot_extent (tb, &tc, 0);
1263 /* Find all the row variable values. */
1264 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1266 /* Allocate memory space for the column and row totals. */
1267 if (n_rows > *maxrows)
1269 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1270 row_tot = *row_totp;
1273 if (n_cols > *maxcols)
1275 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1276 col_tot = *col_totp;
1280 /* Allocate table space for the matrix. */
1281 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1282 tab_realloc (table, -1,
1283 max (tab_nr (table) + (n_rows + 1) * num_cells,
1284 tab_nr (table) * (pe - pb) / (te - tb)));
1286 if (mode == GENERAL)
1288 /* Allocate memory space for the matrix. */
1289 if (n_cols * n_rows > *maxcells)
1291 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1292 *maxcells = n_cols * n_rows;
1297 /* Build the matrix and calculate column totals. */
1299 union value *cur_col = cols;
1300 union value *cur_row = rows;
1302 double *cp = col_tot;
1303 struct table_entry **p;
1306 for (p = &tb[0]; p < te; p++)
1308 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1312 for (; cur_row < &rows[n_rows]; cur_row++)
1318 mp = &mat[cur_col - cols];
1321 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1328 *cp += *mp = (*p)->u.freq;
1333 /* Zero out the rest of the matrix. */
1334 for (; cur_row < &rows[n_rows]; cur_row++)
1340 if (cur_col < &cols[n_cols])
1342 const int rem_cols = n_cols - (cur_col - cols);
1345 for (c = 0; c < rem_cols; c++)
1347 mp = &mat[cur_col - cols];
1348 for (r = 0; r < n_rows; r++)
1350 for (c = 0; c < rem_cols; c++)
1352 mp += n_cols - rem_cols;
1360 double *tp = col_tot;
1362 assert (mode == INTEGER);
1363 mat = (*tb)->u.data;
1366 /* Calculate column totals. */
1367 for (c = 0; c < n_cols; c++)
1370 double *cp = &mat[c];
1372 for (r = 0; r < n_rows; r++)
1373 cum += cp[r * n_cols];
1381 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1382 ns_cols += *cp != 0.;
1385 /* Calculate row totals. */
1388 double *rp = row_tot;
1391 for (ns_rows = 0, r = n_rows; r--; )
1394 for (c = n_cols; c--; )
1402 /* Calculate grand total. */
1408 if (n_rows < n_cols)
1409 tp = row_tot, n = n_rows;
1411 tp = col_tot, n = n_cols;
1418 /* Print the matrix. */
1422 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1423 for (i = 2; i < nvar; i++)
1424 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1427 for (c = 0; c < n_cols; c++)
1428 printf ("%4g", cols[c].f);
1430 for (r = 0; r < n_rows; r++)
1432 printf ("%4g:", rows[r].f);
1433 for (c = 0; c < n_cols; c++)
1434 printf ("%4g", mat[c + r * n_cols]);
1435 printf ("%4g", row_tot[r]);
1439 for (c = 0; c < n_cols; c++)
1440 printf ("%4g", col_tot[c]);
1446 /* Find the first variable that differs from the last subtable,
1447 then display the values of the dimensioning variables for
1448 each table that needs it. */
1450 int first_difference = nvar - 1;
1453 for (; ; first_difference--)
1455 assert (first_difference >= 2);
1456 if (memcmp (&cmp->v[first_difference],
1457 &(*tb)->v[first_difference], sizeof *cmp->v))
1463 display_dimensions (table, first_difference, *tb);
1465 display_dimensions (chisq, first_difference, *tb);
1467 display_dimensions (sym, first_difference, *tb);
1469 display_dimensions (risk, first_difference, *tb);
1471 display_dimensions (direct, first_difference, *tb);
1475 display_crosstabulation ();
1476 if (cmd.miss == CRS_REPORT)
1481 display_symmetric ();
1485 display_directional ();
1496 tab_resize (chisq, 4 + (nvar - 2), -1);
1507 /* Delete missing rows and columns for statistical analysis when
1510 delete_missing (void)
1515 for (r = 0; r < n_rows; r++)
1516 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1520 for (c = 0; c < n_cols; c++)
1521 mat[c + r * n_cols] = 0.;
1529 for (c = 0; c < n_cols; c++)
1530 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1534 for (r = 0; r < n_rows; r++)
1535 mat[c + r * n_cols] = 0.;
1541 /* Prepare table T for submission, and submit it. */
1543 submit (struct tab_table *t)
1550 tab_resize (t, -1, 0);
1551 if (tab_nr (t) == tab_t (t))
1556 tab_offset (t, 0, 0);
1558 for (i = 2; i < nvar; i++)
1559 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1560 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1561 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1562 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1564 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1566 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1567 tab_dim (t, crosstabs_dim);
1571 /* Sets the widths of all the columns and heights of all the rows in
1572 table T for driver D. */
1574 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1578 /* Width of a numerical column. */
1579 int c = outp_string_width (d, "0.000000");
1580 if (cmd.miss == CRS_REPORT)
1581 c += outp_string_width (d, "M");
1583 /* Set width for header columns. */
1586 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1588 if (w < d->prop_em_width * 8)
1589 w = d->prop_em_width * 8;
1591 if (w > d->prop_em_width * 15)
1592 w = d->prop_em_width * 15;
1594 for (i = 0; i < t->l; i++)
1598 for (i = t->l; i < t->nc; i++)
1601 for (i = 0; i < t->nr; i++)
1602 t->h[i] = tab_natural_height (t, d, i);
1605 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1606 int *cnt, int pivot);
1607 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1608 int *cnt, int pivot);
1610 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1612 static struct table_entry **
1613 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1615 return (mode == GENERAL
1616 ? find_pivot_extent_general (tp, cnt, pivot)
1617 : find_pivot_extent_integer (tp, cnt, pivot));
1620 /* Find the extent of a region in TP that contains one table. If
1621 PIVOT != 0 that means a set of table entries with identical table
1622 number; otherwise they also have to have the same values for every
1623 dimension after the row and column dimensions. The table that is
1624 searched starts at TP and has length CNT. Returns the first entry
1625 after the last one in the table; sets *CNT to the number of
1626 remaining values. If there are no entries in TP at all, returns
1627 NULL. A yucky interface, admittedly, but it works. */
1628 static struct table_entry **
1629 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1631 struct table_entry *fp = *tp;
1636 x = xtab[(*tp)->table];
1644 if ((*tp)->table != fp->table)
1649 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1656 /* Integer mode correspondent to find_pivot_extent_general(). This
1657 could be optimized somewhat, but I just don't give a crap about
1658 CROSSTABS performance in integer mode, which is just a
1659 CROSSTABS wart as far as I'm concerned.
1661 That said, feel free to send optimization patches to me. */
1662 static struct table_entry **
1663 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1665 struct table_entry *fp = *tp;
1670 x = xtab[(*tp)->table];
1678 if ((*tp)->table != fp->table)
1683 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1690 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1691 result. WIDTH_ points to an int which is either 0 for a
1692 numeric value or a string width for a string value. */
1694 compare_value (const void *a_, const void *b_, void *width_)
1696 const union value *a = a_;
1697 const union value *b = b_;
1698 const int *pwidth = width_;
1699 const int width = *pwidth;
1702 return (a->f < b->f) ? -1 : (a->f > b->f);
1704 return strncmp (a->s, b->s, width);
1707 /* Given an array of ENTRY_CNT table_entry structures starting at
1708 ENTRIES, creates a sorted list of the values that the variable
1709 with index VAR_INDEX takes on. The values are returned as a
1710 malloc()'darray stored in *VALUES, with the number of values
1711 stored in *VALUE_CNT.
1714 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1715 union value **values, int *value_cnt)
1717 if (mode == GENERAL)
1719 int width = xtab[(*entries)->table]->v[var_idx]->width;
1722 *values = xmalloc (sizeof **values * entry_cnt);
1723 for (i = 0; i < entry_cnt; i++)
1724 (*values)[i] = entries[i]->v[var_idx];
1725 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1726 compare_value, &width);
1730 struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs;
1733 assert (mode == INTEGER);
1734 *values = xmalloc (sizeof **values * crs->count);
1735 for (i = 0; i < crs->count; i++)
1736 (*values)[i].f = i + crs->min;
1737 *value_cnt = crs->count;
1741 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1742 from V, displayed with print format spec from variable VAR. When
1743 in REPORT missing-value mode, missing values have an M appended. */
1745 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1746 const union value *v, const struct variable *var)
1748 struct len_string s;
1750 const char *label = val_labs_find (var->val_labs, *v);
1753 tab_text (table, c, r, TAB_LEFT, label);
1757 s.length = var->print.w;
1758 s.string = tab_alloc (table, s.length + 1);
1759 data_out (s.string, &var->print, v);
1760 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1761 s.string[s.length++] = 'M';
1762 while (s.length && *s.string == ' ')
1767 tab_raw (table, c, r, opt, &s);
1770 /* Draws a line across TABLE at the current row to indicate the most
1771 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1772 that changed, and puts the values that changed into the table. TB
1773 and X must be the corresponding table_entry and crosstab,
1776 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1778 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1780 for (; first_difference >= 2; first_difference--)
1781 table_value_missing (table, nvar - first_difference - 1, 0,
1782 TAB_RIGHT, &tb->v[first_difference],
1783 x->v[first_difference]);
1786 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1788 float_M_suffix (struct tab_table *table, int c, int r, double v)
1790 static const struct fmt_spec f = {FMT_F, 8, 0};
1791 struct len_string s;
1794 s.string = tab_alloc (table, 9);
1796 data_out (s.string, &f, (union value *) &v);
1797 while (*s.string == ' ')
1802 tab_raw (table, c, r, TAB_RIGHT, &s);
1805 /* Displays the crosstabulation table. */
1807 display_crosstabulation (void)
1812 for (r = 0; r < n_rows; r++)
1813 table_value_missing (table, nvar - 2, r * num_cells,
1814 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1816 tab_text (table, nvar - 2, n_rows * num_cells,
1817 TAB_LEFT, _("Total"));
1819 /* Put in the actual cells. */
1824 tab_offset (table, nvar - 1, -1);
1825 for (r = 0; r < n_rows; r++)
1828 tab_hline (table, TAL_1, -1, n_cols, 0);
1829 for (c = 0; c < n_cols; c++)
1831 double expected_value;
1834 expected_value = row_tot[r] * col_tot[c] / W;
1835 for (i = 0; i < num_cells; i++)
1845 v = *mp / row_tot[r] * 100.;
1848 v = *mp / col_tot[c] * 100.;
1853 case CRS_CL_EXPECTED:
1856 case CRS_CL_RESIDUAL:
1857 v = *mp - expected_value;
1859 case CRS_CL_SRESIDUAL:
1860 v = (*mp - expected_value) / sqrt (expected_value);
1862 case CRS_CL_ASRESIDUAL:
1863 v = ((*mp - expected_value)
1864 / sqrt (expected_value
1865 * (1. - row_tot[r] / W)
1866 * (1. - col_tot[c] / W)));
1872 if (cmd.miss == CRS_REPORT
1873 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1874 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1875 float_M_suffix (table, c, i, v);
1877 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1883 tab_offset (table, -1, tab_row (table) + num_cells);
1891 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1892 for (r = 0; r < n_rows; r++)
1893 for (i = 0; i < num_cells; i++)
1906 v = row_tot[r] / W * 100.;
1909 v = row_tot[r] / W * 100.;
1911 case CRS_CL_EXPECTED:
1912 case CRS_CL_RESIDUAL:
1913 case CRS_CL_SRESIDUAL:
1914 case CRS_CL_ASRESIDUAL:
1921 if (cmd.miss == CRS_REPORT
1922 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1923 float_M_suffix (table, n_cols, 0, v);
1925 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1927 tab_next_row (table);
1931 /* Column totals, grand total. */
1936 tab_hline (table, TAL_1, -1, n_cols, 0);
1937 for (c = 0; c <= n_cols; c++)
1939 double ct = c < n_cols ? col_tot[c] : W;
1942 for (i = j = 0; i < num_cells; i++)
1960 case CRS_CL_EXPECTED:
1961 case CRS_CL_RESIDUAL:
1962 case CRS_CL_SRESIDUAL:
1963 case CRS_CL_ASRESIDUAL:
1969 if (cmd.miss == CRS_REPORT && c < n_cols
1970 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1971 float_M_suffix (table, c, j, v);
1973 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1979 tab_offset (table, -1, tab_row (table) + j);
1982 tab_offset (table, 0, -1);
1985 static void calc_r (double *X, double *Y, double *, double *, double *);
1986 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1988 /* Display chi-square statistics. */
1990 display_chisq (void)
1992 static const char *chisq_stats[N_CHISQ] =
1994 N_("Pearson Chi-Square"),
1995 N_("Likelihood Ratio"),
1996 N_("Fisher's Exact Test"),
1997 N_("Continuity Correction"),
1998 N_("Linear-by-Linear Association"),
2000 double chisq_v[N_CHISQ];
2001 double fisher1, fisher2;
2007 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2009 tab_offset (chisq, nvar - 2, -1);
2011 for (i = 0; i < N_CHISQ; i++)
2013 if ((i != 2 && chisq_v[i] == SYSMIS)
2014 || (i == 2 && fisher1 == SYSMIS))
2018 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2021 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2022 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2023 tab_float (chisq, 3, 0, TAB_RIGHT,
2024 chisq_sig (chisq_v[i], df[i]), 8, 3);
2029 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2030 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2032 tab_next_row (chisq);
2035 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2036 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2037 tab_next_row (chisq);
2039 tab_offset (chisq, 0, -1);
2042 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2043 double[N_SYMMETRIC]);
2045 /* Display symmetric measures. */
2047 display_symmetric (void)
2049 static const char *categories[] =
2051 N_("Nominal by Nominal"),
2052 N_("Ordinal by Ordinal"),
2053 N_("Interval by Interval"),
2054 N_("Measure of Agreement"),
2057 static const char *stats[N_SYMMETRIC] =
2061 N_("Contingency Coefficient"),
2062 N_("Kendall's tau-b"),
2063 N_("Kendall's tau-c"),
2065 N_("Spearman Correlation"),
2070 static const int stats_categories[N_SYMMETRIC] =
2072 0, 0, 0, 1, 1, 1, 1, 2, 3,
2076 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2079 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2082 tab_offset (sym, nvar - 2, -1);
2084 for (i = 0; i < N_SYMMETRIC; i++)
2086 if (sym_v[i] == SYSMIS)
2089 if (stats_categories[i] != last_cat)
2091 last_cat = stats_categories[i];
2092 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2095 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2096 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2097 if (sym_ase[i] != SYSMIS)
2098 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2099 if (sym_t[i] != SYSMIS)
2100 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2101 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2105 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2106 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2109 tab_offset (sym, 0, -1);
2112 static int calc_risk (double[], double[], double[], union value *);
2114 /* Display risk estimate. */
2119 double risk_v[3], lower[3], upper[3];
2123 if (!calc_risk (risk_v, upper, lower, c))
2126 tab_offset (risk, nvar - 2, -1);
2128 for (i = 0; i < 3; i++)
2130 if (risk_v[i] == SYSMIS)
2136 if (x->v[COL_VAR]->type == NUMERIC)
2137 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2138 x->v[COL_VAR]->name, c[0].f, c[1].f);
2140 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2141 x->v[COL_VAR]->name,
2142 x->v[COL_VAR]->width, c[0].s,
2143 x->v[COL_VAR]->width, c[1].s);
2147 if (x->v[ROW_VAR]->type == NUMERIC)
2148 sprintf (buf, _("For cohort %s = %g"),
2149 x->v[ROW_VAR]->name, rows[i - 1].f);
2151 sprintf (buf, _("For cohort %s = %.*s"),
2152 x->v[ROW_VAR]->name,
2153 x->v[ROW_VAR]->width, rows[i - 1].s);
2157 tab_text (risk, 0, 0, TAB_LEFT, buf);
2158 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2159 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2160 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2161 tab_next_row (risk);
2164 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2165 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2166 tab_next_row (risk);
2168 tab_offset (risk, 0, -1);
2171 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2172 double[N_DIRECTIONAL]);
2174 /* Display directional measures. */
2176 display_directional (void)
2178 static const char *categories[] =
2180 N_("Nominal by Nominal"),
2181 N_("Ordinal by Ordinal"),
2182 N_("Nominal by Interval"),
2185 static const char *stats[] =
2188 N_("Goodman and Kruskal tau"),
2189 N_("Uncertainty Coefficient"),
2194 static const char *types[] =
2201 static const int stats_categories[N_DIRECTIONAL] =
2203 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2206 static const int stats_stats[N_DIRECTIONAL] =
2208 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2211 static const int stats_types[N_DIRECTIONAL] =
2213 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2216 static const int *stats_lookup[] =
2223 static const char **stats_names[] =
2235 double direct_v[N_DIRECTIONAL];
2236 double direct_ase[N_DIRECTIONAL];
2237 double direct_t[N_DIRECTIONAL];
2241 if (!calc_directional (direct_v, direct_ase, direct_t))
2244 tab_offset (direct, nvar - 2, -1);
2246 for (i = 0; i < N_DIRECTIONAL; i++)
2248 if (direct_v[i] == SYSMIS)
2254 for (j = 0; j < 3; j++)
2255 if (last[j] != stats_lookup[j][i])
2258 tab_hline (direct, TAL_1, j, 6, 0);
2263 int k = last[j] = stats_lookup[j][i];
2268 string = x->v[0]->name;
2270 string = x->v[1]->name;
2272 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2273 gettext (stats_names[j][k]), string);
2278 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2279 if (direct_ase[i] != SYSMIS)
2280 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2281 if (direct_t[i] != SYSMIS)
2282 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2283 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2284 tab_next_row (direct);
2287 tab_offset (direct, 0, -1);
2290 /* Statistical calculations. */
2292 /* Returns the value of the gamma (factorial) function for an integer
2295 gamma_int (double x)
2300 for (i = 2; i < x; i++)
2305 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2307 static inline double
2308 Pr (int a, int b, int c, int d)
2310 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2311 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2312 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2313 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2314 / gamma_int (a + b + c + d + 1.));
2317 /* Swap the contents of A and B. */
2319 swap (int *a, int *b)
2326 /* Calculate significance for Fisher's exact test as specified in
2327 _SPSS Statistical Algorithms_, Appendix 5. */
2329 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2333 if (min (c, d) < min (a, b))
2334 swap (&a, &c), swap (&b, &d);
2335 if (min (b, d) < min (a, c))
2336 swap (&a, &b), swap (&c, &d);
2340 swap (&a, &b), swap (&c, &d);
2342 swap (&a, &c), swap (&b, &d);
2346 for (x = 0; x <= a; x++)
2347 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2349 *fisher2 = *fisher1;
2350 for (x = 1; x <= b; x++)
2351 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2354 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2355 columns with values COLS and N_ROWS rows with values ROWS. Values
2356 in the matrix sum to W. */
2358 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2359 double *fisher1, double *fisher2)
2363 chisq[0] = chisq[1] = 0.;
2364 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2365 *fisher1 = *fisher2 = SYSMIS;
2367 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2369 if (ns_rows <= 1 || ns_cols <= 1)
2371 chisq[0] = chisq[1] = SYSMIS;
2375 for (r = 0; r < n_rows; r++)
2376 for (c = 0; c < n_cols; c++)
2378 const double expected = row_tot[r] * col_tot[c] / W;
2379 const double freq = mat[n_cols * r + c];
2380 const double residual = freq - expected;
2383 chisq[0] += residual * residual / expected;
2385 chisq[1] += freq * log (expected / freq);
2396 /* Calculate Yates and Fisher exact test. */
2397 if (ns_cols == 2 && ns_rows == 2)
2399 double f11, f12, f21, f22;
2405 for (i = j = 0; i < n_cols; i++)
2406 if (col_tot[i] != 0.)
2415 f11 = mat[nz_cols[0]];
2416 f12 = mat[nz_cols[1]];
2417 f21 = mat[nz_cols[0] + n_cols];
2418 f22 = mat[nz_cols[1] + n_cols];
2423 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2426 chisq[3] = (W * x * x
2427 / (f11 + f12) / (f21 + f22)
2428 / (f11 + f21) / (f12 + f22));
2436 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2437 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2440 /* Calculate Mantel-Haenszel. */
2441 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2443 double r, ase_0, ase_1;
2444 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2446 chisq[4] = (W - 1.) * r * r;
2451 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2452 ASE_1, and ase_0 into ASE_0. The row and column values must be
2453 passed in X and Y. */
2455 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2457 double SX, SY, S, T;
2459 double sum_XYf, sum_X2Y2f;
2460 double sum_Xr, sum_X2r;
2461 double sum_Yc, sum_Y2c;
2464 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2465 for (j = 0; j < n_cols; j++)
2467 double fij = mat[j + i * n_cols];
2468 double product = X[i] * Y[j];
2469 double temp = fij * product;
2471 sum_X2Y2f += temp * product;
2474 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2476 sum_Xr += X[i] * row_tot[i];
2477 sum_X2r += X[i] * X[i] * row_tot[i];
2481 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2483 sum_Yc += Y[i] * col_tot[i];
2484 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2488 S = sum_XYf - sum_Xr * sum_Yc / W;
2489 SX = sum_X2r - sum_Xr * sum_Xr / W;
2490 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2493 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2498 for (s = c = 0., i = 0; i < n_rows; i++)
2499 for (j = 0; j < n_cols; j++)
2501 double Xresid, Yresid;
2504 Xresid = X[i] - Xbar;
2505 Yresid = Y[j] - Ybar;
2506 temp = (T * Xresid * Yresid
2508 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2509 y = mat[j + i * n_cols] * temp * temp - c;
2514 *ase_1 = sqrt (s) / (T * T);
2518 static double somers_d_v[3];
2519 static double somers_d_ase[3];
2520 static double somers_d_t[3];
2522 /* Calculate symmetric statistics and their asymptotic standard
2523 errors. Returns 0 if none could be calculated. */
2525 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2526 double t[N_SYMMETRIC])
2528 int q = min (ns_rows, ns_cols);
2537 for (i = 0; i < N_SYMMETRIC; i++)
2538 v[i] = ase[i] = t[i] = SYSMIS;
2541 /* Phi, Cramer's V, contingency coefficient. */
2542 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2544 double Xp = 0.; /* Pearson chi-square. */
2549 for (r = 0; r < n_rows; r++)
2550 for (c = 0; c < n_cols; c++)
2552 const double expected = row_tot[r] * col_tot[c] / W;
2553 const double freq = mat[n_cols * r + c];
2554 const double residual = freq - expected;
2557 Xp += residual * residual / expected;
2561 if (cmd.a_statistics[CRS_ST_PHI])
2563 v[0] = sqrt (Xp / W);
2564 v[1] = sqrt (Xp / (W * (q - 1)));
2566 if (cmd.a_statistics[CRS_ST_CC])
2567 v[2] = sqrt (Xp / (Xp + W));
2570 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2571 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2576 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2583 for (r = 0; r < n_rows; r++)
2584 Dr -= row_tot[r] * row_tot[r];
2585 for (c = 0; c < n_cols; c++)
2586 Dc -= col_tot[c] * col_tot[c];
2592 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2593 for (c = 0; c < n_cols; c++)
2597 for (r = 0; r < n_rows; r++)
2598 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2608 for (i = 0; i < n_rows; i++)
2612 for (j = 1; j < n_cols; j++)
2613 Cij += col_tot[j] - cum[j + i * n_cols];
2616 for (j = 1; j < n_cols; j++)
2617 Dij += cum[j + (i - 1) * n_cols];
2621 double fij = mat[j + i * n_cols];
2627 assert (j < n_cols);
2629 Cij -= col_tot[j] - cum[j + i * n_cols];
2630 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2634 Cij += cum[j - 1 + (i - 1) * n_cols];
2635 Dij -= cum[j + (i - 1) * n_cols];
2641 if (cmd.a_statistics[CRS_ST_BTAU])
2642 v[3] = (P - Q) / sqrt (Dr * Dc);
2643 if (cmd.a_statistics[CRS_ST_CTAU])
2644 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2645 if (cmd.a_statistics[CRS_ST_GAMMA])
2646 v[5] = (P - Q) / (P + Q);
2648 /* ASE for tau-b, tau-c, gamma. Calculations could be
2649 eliminated here, at expense of memory. */
2654 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2655 for (i = 0; i < n_rows; i++)
2659 for (j = 1; j < n_cols; j++)
2660 Cij += col_tot[j] - cum[j + i * n_cols];
2663 for (j = 1; j < n_cols; j++)
2664 Dij += cum[j + (i - 1) * n_cols];
2668 double fij = mat[j + i * n_cols];
2670 if (cmd.a_statistics[CRS_ST_BTAU])
2672 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2673 + v[3] * (row_tot[i] * Dc
2674 + col_tot[j] * Dr));
2675 btau_cum += fij * temp * temp;
2679 const double temp = Cij - Dij;
2680 ctau_cum += fij * temp * temp;
2683 if (cmd.a_statistics[CRS_ST_GAMMA])
2685 const double temp = Q * Cij - P * Dij;
2686 gamma_cum += fij * temp * temp;
2689 if (cmd.a_statistics[CRS_ST_D])
2691 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2692 - (P - Q) * (W - row_tot[i]));
2693 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2694 - (Q - P) * (W - col_tot[j]));
2699 assert (j < n_cols);
2701 Cij -= col_tot[j] - cum[j + i * n_cols];
2702 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2706 Cij += cum[j - 1 + (i - 1) * n_cols];
2707 Dij -= cum[j + (i - 1) * n_cols];
2713 btau_var = ((btau_cum
2714 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2716 if (cmd.a_statistics[CRS_ST_BTAU])
2718 ase[3] = sqrt (btau_var);
2719 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2722 if (cmd.a_statistics[CRS_ST_CTAU])
2724 ase[4] = ((2 * q / ((q - 1) * W * W))
2725 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2726 t[4] = v[4] / ase[4];
2728 if (cmd.a_statistics[CRS_ST_GAMMA])
2730 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2731 t[5] = v[5] / (2. / (P + Q)
2732 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2734 if (cmd.a_statistics[CRS_ST_D])
2736 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2737 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2738 somers_d_t[0] = (somers_d_v[0]
2740 * sqrt (ctau_cum - sqr (P - Q) / W)));
2741 somers_d_v[1] = (P - Q) / Dc;
2742 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2743 somers_d_t[1] = (somers_d_v[1]
2745 * sqrt (ctau_cum - sqr (P - Q) / W)));
2746 somers_d_v[2] = (P - Q) / Dr;
2747 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2748 somers_d_t[2] = (somers_d_v[2]
2750 * sqrt (ctau_cum - sqr (P - Q) / W)));
2756 /* Spearman correlation, Pearson's r. */
2757 if (cmd.a_statistics[CRS_ST_CORR])
2759 double *R = local_alloc (sizeof *R * n_rows);
2760 double *C = local_alloc (sizeof *C * n_cols);
2763 double y, t, c = 0., s = 0.;
2768 R[i] = s + (row_tot[i] + 1.) / 2.;
2775 assert (i < n_rows);
2780 double y, t, c = 0., s = 0.;
2785 C[j] = s + (col_tot[j] + 1.) / 2;
2792 assert (j < n_cols);
2796 calc_r (R, C, &v[6], &t[6], &ase[6]);
2802 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2806 /* Cohen's kappa. */
2807 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2809 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2812 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2813 i < ns_rows; i++, j++)
2817 while (col_tot[j] == 0.)
2820 prod = row_tot[i] * col_tot[j];
2821 sum = row_tot[i] + col_tot[j];
2823 sum_fii += mat[j + i * n_cols];
2825 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2826 sum_riciri_ci += prod * sum;
2828 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2829 for (j = 0; j < ns_cols; j++)
2831 double sum = row_tot[i] + col_tot[j];
2832 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2835 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2837 ase[8] = sqrt ((W * W * sum_rici
2838 + sum_rici * sum_rici
2839 - W * sum_riciri_ci)
2840 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2842 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2843 / sqr (W * W - sum_rici))
2844 + ((2. * (W - sum_fii)
2845 * (2. * sum_fii * sum_rici
2846 - W * sum_fiiri_ci))
2847 / cube (W * W - sum_rici))
2848 + (sqr (W - sum_fii)
2849 * (W * sum_fijri_ci2 - 4.
2850 * sum_rici * sum_rici)
2851 / pow4 (W * W - sum_rici))));
2853 t[8] = v[8] / ase[8];
2860 /* Calculate risk estimate. */
2862 calc_risk (double *value, double *upper, double *lower, union value *c)
2864 double f11, f12, f21, f22;
2870 for (i = 0; i < 3; i++)
2871 value[i] = upper[i] = lower[i] = SYSMIS;
2874 if (ns_rows != 2 || ns_cols != 2)
2881 for (i = j = 0; i < n_cols; i++)
2882 if (col_tot[i] != 0.)
2891 f11 = mat[nz_cols[0]];
2892 f12 = mat[nz_cols[1]];
2893 f21 = mat[nz_cols[0] + n_cols];
2894 f22 = mat[nz_cols[1] + n_cols];
2896 c[0] = cols[nz_cols[0]];
2897 c[1] = cols[nz_cols[1]];
2900 value[0] = (f11 * f22) / (f12 * f21);
2901 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2902 lower[0] = value[0] * exp (-1.960 * v);
2903 upper[0] = value[0] * exp (1.960 * v);
2905 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2906 v = sqrt ((f12 / (f11 * (f11 + f12)))
2907 + (f22 / (f21 * (f21 + f22))));
2908 lower[1] = value[1] * exp (-1.960 * v);
2909 upper[1] = value[1] * exp (1.960 * v);
2911 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2912 v = sqrt ((f11 / (f12 * (f11 + f12)))
2913 + (f21 / (f22 * (f21 + f22))));
2914 lower[2] = value[2] * exp (-1.960 * v);
2915 upper[2] = value[2] * exp (1.960 * v);
2920 /* Calculate directional measures. */
2922 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2923 double t[N_DIRECTIONAL])
2928 for (i = 0; i < N_DIRECTIONAL; i++)
2929 v[i] = ase[i] = t[i] = SYSMIS;
2933 if (cmd.a_statistics[CRS_ST_LAMBDA])
2935 double *fim = xmalloc (sizeof *fim * n_rows);
2936 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2937 double *fmj = xmalloc (sizeof *fmj * n_cols);
2938 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2939 double sum_fim, sum_fmj;
2941 int rm_index, cm_index;
2944 /* Find maximum for each row and their sum. */
2945 for (sum_fim = 0., i = 0; i < n_rows; i++)
2947 double max = mat[i * n_cols];
2950 for (j = 1; j < n_cols; j++)
2951 if (mat[j + i * n_cols] > max)
2953 max = mat[j + i * n_cols];
2957 sum_fim += fim[i] = max;
2958 fim_index[i] = index;
2961 /* Find maximum for each column. */
2962 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2964 double max = mat[j];
2967 for (i = 1; i < n_rows; i++)
2968 if (mat[j + i * n_cols] > max)
2970 max = mat[j + i * n_cols];
2974 sum_fmj += fmj[j] = max;
2975 fmj_index[j] = index;
2978 /* Find maximum row total. */
2981 for (i = 1; i < n_rows; i++)
2982 if (row_tot[i] > rm)
2988 /* Find maximum column total. */
2991 for (j = 1; j < n_cols; j++)
2992 if (col_tot[j] > cm)
2998 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2999 v[1] = (sum_fmj - rm) / (W - rm);
3000 v[2] = (sum_fim - cm) / (W - cm);
3002 /* ASE1 for Y given X. */
3006 for (accum = 0., i = 0; i < n_rows; i++)
3007 for (j = 0; j < n_cols; j++)
3009 const int deltaj = j == cm_index;
3010 accum += (mat[j + i * n_cols]
3011 * sqr ((j == fim_index[i])
3016 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3019 /* ASE0 for Y given X. */
3023 for (accum = 0., i = 0; i < n_rows; i++)
3024 if (cm_index != fim_index[i])
3025 accum += (mat[i * n_cols + fim_index[i]]
3026 + mat[i * n_cols + cm_index]);
3027 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3030 /* ASE1 for X given Y. */
3034 for (accum = 0., i = 0; i < n_rows; i++)
3035 for (j = 0; j < n_cols; j++)
3037 const int deltaj = i == rm_index;
3038 accum += (mat[j + i * n_cols]
3039 * sqr ((i == fmj_index[j])
3044 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3047 /* ASE0 for X given Y. */
3051 for (accum = 0., j = 0; j < n_cols; j++)
3052 if (rm_index != fmj_index[j])
3053 accum += (mat[j + n_cols * fmj_index[j]]
3054 + mat[j + n_cols * rm_index]);
3055 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3058 /* Symmetric ASE0 and ASE1. */
3063 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3064 for (j = 0; j < n_cols; j++)
3066 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3067 int temp1 = (i == rm_index) + (j == cm_index);
3068 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3069 accum1 += (mat[j + i * n_cols]
3070 * sqr (temp0 + (v[0] - 1.) * temp1));
3072 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3073 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3074 / (2. * W - rm - cm));
3083 double sum_fij2_ri, sum_fij2_ci;
3084 double sum_ri2, sum_cj2;
3086 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3087 for (j = 0; j < n_cols; j++)
3089 double temp = sqr (mat[j + i * n_cols]);
3090 sum_fij2_ri += temp / row_tot[i];
3091 sum_fij2_ci += temp / col_tot[j];
3094 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3095 sum_ri2 += row_tot[i] * row_tot[i];
3097 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3098 sum_cj2 += col_tot[j] * col_tot[j];
3100 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3101 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3105 if (cmd.a_statistics[CRS_ST_UC])
3107 double UX, UY, UXY, P;
3108 double ase1_yx, ase1_xy, ase1_sym;
3111 for (UX = 0., i = 0; i < n_rows; i++)
3112 if (row_tot[i] > 0.)
3113 UX -= row_tot[i] / W * log (row_tot[i] / W);
3115 for (UY = 0., j = 0; j < n_cols; j++)
3116 if (col_tot[j] > 0.)
3117 UY -= col_tot[j] / W * log (col_tot[j] / W);
3119 for (UXY = P = 0., i = 0; i < n_rows; i++)
3120 for (j = 0; j < n_cols; j++)
3122 double entry = mat[j + i * n_cols];
3127 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3128 UXY -= entry / W * log (entry / W);
3131 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3132 for (j = 0; j < n_cols; j++)
3134 double entry = mat[j + i * n_cols];
3139 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3140 + (UX - UXY) * log (col_tot[j] / W));
3141 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3142 + (UY - UXY) * log (row_tot[i] / W));
3143 ase1_sym += entry * sqr ((UXY
3144 * log (row_tot[i] * col_tot[j] / (W * W)))
3145 - (UX + UY) * log (entry / W));
3148 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3149 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3150 t[5] = v[5] / ((2. / (W * (UX + UY)))
3151 * sqrt (P - sqr (UX + UY - UXY) / W));
3153 v[6] = (UX + UY - UXY) / UX;
3154 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3155 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3157 v[7] = (UX + UY - UXY) / UY;
3158 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3159 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3163 if (cmd.a_statistics[CRS_ST_D])
3168 calc_symmetric (NULL, NULL, NULL);
3169 for (i = 0; i < 3; i++)
3171 v[8 + i] = somers_d_v[i];
3172 ase[8 + i] = somers_d_ase[i];
3173 t[8 + i] = somers_d_t[i];
3178 if (cmd.a_statistics[CRS_ST_ETA])
3181 double sum_Xr, sum_X2r;
3185 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3187 sum_Xr += rows[i].f * row_tot[i];
3188 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3190 SX = sum_X2r - sum_Xr * sum_Xr / W;
3192 for (SXW = 0., j = 0; j < n_cols; j++)
3196 for (cum = 0., i = 0; i < n_rows; i++)
3198 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3199 cum += rows[i].f * mat[j + i * n_cols];
3202 SXW -= cum * cum / col_tot[j];
3204 v[11] = sqrt (1. - SXW / SX);
3208 double sum_Yc, sum_Y2c;
3212 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3214 sum_Yc += cols[i].f * col_tot[i];
3215 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3217 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3219 for (SYW = 0., i = 0; i < n_rows; i++)
3223 for (cum = 0., j = 0; j < n_cols; j++)
3225 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3226 cum += cols[j].f * mat[j + i * n_cols];
3229 SYW -= cum * cum / row_tot[i];
3231 v[12] = sqrt (1. - SYW / SY);