1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value values[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *vars[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
140 static int write; /* One of WR_* that specifies the WRITE style. */
142 /* Command parsing info. */
143 static struct cmd_crosstabs cmd;
146 static struct pool *pl_tc; /* For table cells. */
147 static struct pool *pl_col; /* For column data. */
149 static int internal_cmd_crosstabs (void);
150 static void precalc (void *);
151 static int calc_general (struct ccase *, void *);
152 static int calc_integer (struct ccase *, void *);
153 static void postcalc (void *);
154 static void submit (struct tab_table *);
156 static void format_short (char *s, const struct fmt_spec *fp,
157 const union value *v);
160 static void debug_print (void);
161 static void print_table_entries (struct table_entry **tab);
164 /* Parse and execute CROSSTABS, then clean up. */
168 int result = internal_cmd_crosstabs ();
171 pool_destroy (pl_tc);
172 pool_destroy (pl_col);
177 /* Parses and executes the CROSSTABS procedure. */
179 internal_cmd_crosstabs (void)
187 pl_tc = pool_create ();
188 pl_col = pool_create ();
190 if (!parse_crosstabs (&cmd))
194 /* Needs variables. */
198 mode = variables ? INTEGER : GENERAL;
203 cmd.a_cells[CRS_CL_COUNT] = 1;
209 for (i = 0; i < CRS_CL_count; i++)
214 cmd.a_cells[CRS_CL_COUNT] = 1;
215 cmd.a_cells[CRS_CL_ROW] = 1;
216 cmd.a_cells[CRS_CL_COLUMN] = 1;
217 cmd.a_cells[CRS_CL_TOTAL] = 1;
219 if (cmd.a_cells[CRS_CL_ALL])
221 for (i = 0; i < CRS_CL_count; i++)
223 cmd.a_cells[CRS_CL_ALL] = 0;
225 cmd.a_cells[CRS_CL_NONE] = 0;
227 for (num_cells = i = 0; i < CRS_CL_count; i++)
229 cells[num_cells++] = i;
232 if (cmd.sbc_statistics)
237 for (i = 0; i < CRS_ST_count; i++)
238 if (cmd.a_statistics[i])
241 cmd.a_statistics[CRS_ST_CHISQ] = 1;
242 if (cmd.a_statistics[CRS_ST_ALL])
243 for (i = 0; i < CRS_ST_count; i++)
244 cmd.a_statistics[i] = 1;
248 if (cmd.miss == CRS_REPORT && mode == GENERAL)
250 msg (SE, _("Missing mode REPORT not allowed in general mode. "
251 "Assuming MISSING=TABLE."));
252 cmd.miss = CRS_TABLE;
256 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
257 cmd.a_write[CRS_WR_ALL] = 0;
258 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
260 msg (SE, _("Write mode ALL not allowed in general mode. "
261 "Assuming WRITE=CELLS."));
262 cmd.a_write[CRS_WR_CELLS] = 1;
265 && (cmd.a_write[CRS_WR_NONE]
266 + cmd.a_write[CRS_WR_ALL]
267 + cmd.a_write[CRS_WR_CELLS] == 0))
268 cmd.a_write[CRS_WR_CELLS] = 1;
269 if (cmd.a_write[CRS_WR_CELLS])
270 write = CRS_WR_CELLS;
271 else if (cmd.a_write[CRS_WR_ALL])
276 procedure_with_splits (precalc,
277 mode == GENERAL ? calc_general : calc_integer,
283 /* Parses the TABLES subcommand. */
285 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
287 struct var_set *var_set;
289 struct variable ***by = NULL;
294 /* Ensure that this is a TABLES subcommand. */
295 if (!lex_match_id ("TABLES")
296 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
301 if (variables != NULL)
302 var_set = var_set_create_from_array (variables, variables_cnt);
304 var_set = var_set_create_from_dict (default_dict);
305 assert (var_set != NULL);
309 by = xrealloc (by, sizeof *by * (n_by + 1));
310 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
311 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
312 PV_NO_DUPLICATE | PV_NO_SCRATCH))
317 if (!lex_match (T_BY))
321 lex_error (_("expecting BY"));
330 int *by_iter = xcalloc (sizeof *by_iter * n_by);
333 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
334 for (i = 0; i < nx; i++)
338 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
345 for (i = 0; i < n_by; i++)
346 x->vars[i] = by[i][by_iter[i]];
352 for (i = n_by - 1; i >= 0; i--)
354 if (++by_iter[i] < by_nvar[i])
367 /* All return paths lead here. */
371 for (i = 0; i < n_by; i++)
377 var_set_destroy (var_set);
382 /* Parses the VARIABLES subcommand. */
384 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
388 msg (SE, _("VARIABLES must be specified before TABLES."));
396 int orig_nv = variables_cnt;
401 if (!parse_variables (default_dict, &variables, &variables_cnt,
402 (PV_APPEND | PV_NUMERIC
403 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
408 lex_error ("expecting `('");
413 if (!lex_force_int ())
415 min = lex_integer ();
420 if (!lex_force_int ())
422 max = lex_integer ();
425 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
433 lex_error ("expecting `)'");
438 for (i = orig_nv; i < variables_cnt; i++)
440 variables[i]->p.crs.min = min;
441 variables[i]->p.crs.max = max + 1.;
442 variables[i]->p.crs.count = max - min + 1;
461 printf ("CROSSTABS\n");
463 if (variables != NULL)
467 printf ("\t/VARIABLES=");
468 for (i = 0; i < variables_cnt; i++)
470 struct variable *v = variables[i];
472 printf ("%s ", v->name);
473 if (i < variables_cnt - 1)
475 struct variable *nv = variables[i + 1];
477 if (v->p.crs.min == nv->p.crs.min
478 && v->p.crs.max == nv->p.crs.max)
481 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
489 printf ("\t/TABLES=");
490 for (i = 0; i < nxtab; i++)
492 struct crosstab *x = xtab[i];
497 for (j = 0; j < x->nvar; j++)
501 printf ("%s", x->v[j]->name);
507 #endif /* DEBUGGING */
509 /* Data file processing. */
511 static int compare_table_entry (const void *, const void *, void *);
512 static unsigned hash_table_entry (const void *, void *);
514 /* Set up the crosstabulation tables for processing. */
516 precalc (void *aux UNUSED)
520 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
530 for (i = 0; i < nxtab; i++)
532 struct crosstab *x = xtab[i];
537 x->ofs = n_sorted_tab;
539 for (j = 2; j < x->nvar; j++)
540 count *= x->vars[j - 2]->p.crs.count;
542 sorted_tab = xrealloc (sorted_tab,
543 sizeof *sorted_tab * (n_sorted_tab + count));
544 v = local_alloc (sizeof *v * x->nvar);
545 for (j = 2; j < x->nvar; j++)
546 v[j] = x->vars[j]->p.crs.min;
547 for (j = 0; j < count; j++)
549 struct table_entry *te;
552 te = sorted_tab[n_sorted_tab++]
553 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
557 const int mat_size = (x->vars[0]->p.crs.count
558 * x->vars[1]->p.crs.count);
561 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
562 for (m = 0; m < mat_size; m++)
566 for (k = 2; k < x->nvar; k++)
567 te->values[k].f = v[k];
568 for (k = 2; k < x->nvar; k++)
569 if (++v[k] >= x->vars[k]->p.crs.max)
570 v[k] = x->vars[k]->p.crs.min;
577 sorted_tab = xrealloc (sorted_tab,
578 sizeof *sorted_tab * (n_sorted_tab + 1));
579 sorted_tab[n_sorted_tab] = NULL;
583 /* Form crosstabulations for general mode. */
585 calc_general (struct ccase *c, void *aux UNUSED)
590 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
592 /* Flattened current table index. */
595 for (t = 0; t < nxtab; t++)
597 struct crosstab *x = xtab[t];
598 const size_t entry_size = (sizeof (struct table_entry)
599 + sizeof (union value) * (x->nvar - 1));
600 struct table_entry *te = local_alloc (entry_size);
602 /* Construct table entry for the current record and table. */
608 for (j = 0; j < x->nvar; j++)
610 if ((cmd.miss == CRS_TABLE
611 && is_missing (&c->data[x->vars[j]->fv], x->vars[j]))
612 || (cmd.miss == CRS_INCLUDE
613 && is_system_missing (&c->data[x->vars[j]->fv],
616 x->missing += weight;
620 if (x->vars[j]->type == NUMERIC)
621 te->values[j].f = c->data[x->vars[j]->fv].f;
624 memcpy (te->values[j].s, c->data[x->vars[j]->fv].s,
627 /* Necessary in order to simplify comparisons. */
628 memset (&te->values[j].s[x->vars[j]->width], 0,
629 sizeof (union value) - x->vars[j]->width);
634 /* Add record to hash table. */
636 struct table_entry **tepp
637 = (struct table_entry **) hsh_probe (gen_tab, te);
640 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
643 memcpy (tep, te, entry_size);
648 (*tepp)->u.freq += weight;
659 calc_integer (struct ccase *c, void *aux UNUSED)
664 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
666 /* Flattened current table index. */
669 for (t = 0; t < nxtab; t++)
671 struct crosstab *x = xtab[t];
676 for (i = 0; i < x->nvar; i++)
678 struct variable *const v = x->vars[i];
679 double value = c->data[v->fv].f;
681 /* Note that the first test also rules out SYSMIS. */
682 if ((value < v->p.crs.min || value >= v->p.crs.max)
683 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
685 x->missing += weight;
691 ofs += fact * ((int) value - v->p.crs.min);
692 fact *= v->p.crs.count;
697 const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min;
698 const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min;
699 const int col_dim = x->vars[COL_VAR]->p.crs.count;
701 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
711 /* Print out all table entries in NULL-terminated TAB for use by a
712 debugger (a person, not a program). */
714 print_table_entries (struct table_entry **tab)
716 printf ("raw crosstabulation data:\n");
719 const struct crosstab *x = xtab[(*tab)->table];
722 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
723 for (i = 0; i < x->nvar; i++)
727 printf ("%s:", x->v[i]->name);
729 if (x->v[i]->type == NUMERIC)
730 printf ("%g", (*tab)->v[i].f);
732 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
740 /* Compare the table_entry's at A and B and return a strcmp()-type
743 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
745 const struct table_entry *a = a_;
746 const struct table_entry *b = b_;
748 if (a->table > b->table)
750 else if (a->table < b->table)
754 const struct crosstab *x = xtab[a->table];
757 for (i = x->nvar - 1; i >= 0; i--)
758 if (x->vars[i]->type == NUMERIC)
760 const double diffnum = a->values[i].f - b->values[i].f;
763 else if (diffnum > 0)
768 assert (x->vars[i]->type == ALPHA);
770 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
781 /* Calculate a hash value from table_entry A. */
783 hash_table_entry (const void *a_, void *foo UNUSED)
785 const struct table_entry *a = a_;
790 for (i = 0; i < xtab[a->table]->nvar; i++)
791 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
796 /* Post-data reading calculations. */
798 static struct table_entry **find_pivot_extent (struct table_entry **,
799 int *cnt, int pivot);
800 static void enum_var_values (struct table_entry **entries, int entry_cnt,
802 union value **values, int *value_cnt);
803 static void output_pivot_table (struct table_entry **, struct table_entry **,
804 double **, double **, double **,
805 int *, int *, int *);
806 static void make_summary_table (void);
809 postcalc (void *aux UNUSED)
813 n_sorted_tab = hsh_count (gen_tab);
814 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
816 print_table_entries (sorted_tab);
820 make_summary_table ();
822 /* Identify all the individual crosstabulation tables, and deal with
825 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
826 int pc = n_sorted_tab; /* Pivot count. */
828 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
829 int maxrows = 0, maxcols = 0, maxcells = 0;
833 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
837 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
838 &maxrows, &maxcols, &maxcells);
847 hsh_destroy (gen_tab);
850 static void insert_summary (struct tab_table *, int tab_index, double valid);
852 /* Output a table summarizing the cases processed. */
854 make_summary_table (void)
856 struct tab_table *summary;
858 struct table_entry **pb = sorted_tab, **pe;
859 int pc = n_sorted_tab;
862 summary = tab_create (7, 3 + nxtab, 1);
863 tab_title (summary, 0, _("Summary."));
864 tab_headers (summary, 1, 0, 3, 0);
865 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
866 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
867 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
868 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
869 tab_hline (summary, TAL_1, 1, 6, 1);
870 tab_hline (summary, TAL_1, 1, 6, 2);
871 tab_vline (summary, TAL_1, 3, 1, 1);
872 tab_vline (summary, TAL_1, 5, 1, 1);
876 for (i = 0; i < 3; i++)
878 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
879 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
882 tab_offset (summary, 0, 3);
888 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
892 while (cur_tab < (*pb)->table)
893 insert_summary (summary, cur_tab++, 0.);
896 for (valid = 0.; pb < pe; pb++)
897 valid += (*pb)->u.freq;
900 const struct crosstab *const x = xtab[(*pb)->table];
901 const int n_cols = x->vars[COL_VAR]->p.crs.count;
902 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
903 const int count = n_cols * n_rows;
905 for (valid = 0.; pb < pe; pb++)
907 const double *data = (*pb)->u.data;
910 for (i = 0; i < count; i++)
914 insert_summary (summary, cur_tab++, valid);
919 while (cur_tab < nxtab)
920 insert_summary (summary, cur_tab++, 0.);
925 /* Inserts a line into T describing the crosstabulation at index
926 TAB_INDEX, which has VALID valid observations. */
928 insert_summary (struct tab_table *t, int tab_index, double valid)
930 struct crosstab *x = xtab[tab_index];
932 tab_hline (t, TAL_1, 0, 6, 0);
934 /* Crosstabulation name. */
936 char *buf = local_alloc (128 * x->nvar);
940 for (i = 0; i < x->nvar; i++)
943 cp = stpcpy (cp, " * ");
946 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
948 tab_text (t, 0, 0, TAB_LEFT, buf);
953 /* Counts and percentages. */
963 for (i = 0; i < 3; i++)
965 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
966 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
977 static struct tab_table *table; /* Crosstabulation table. */
978 static struct tab_table *chisq; /* Chi-square table. */
979 static struct tab_table *sym; /* Symmetric measures table. */
980 static struct tab_table *risk; /* Risk estimate table. */
981 static struct tab_table *direct; /* Directional measures table. */
984 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
986 /* Column values, number of columns. */
987 static union value *cols;
990 /* Row values, number of rows. */
991 static union value *rows;
994 /* Number of statistically interesting columns/rows (columns/rows with
996 static int ns_cols, ns_rows;
998 /* Crosstabulation. */
999 static struct crosstab *x;
1001 /* Number of variables from the crosstabulation to consider. This is
1002 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1005 /* Matrix contents. */
1006 static double *mat; /* Matrix proper. */
1007 static double *row_tot; /* Row totals. */
1008 static double *col_tot; /* Column totals. */
1009 static double W; /* Grand total. */
1011 static void display_dimensions (struct tab_table *, int first_difference,
1012 struct table_entry *);
1013 static void display_crosstabulation (void);
1014 static void display_chisq (void);
1015 static void display_symmetric (void);
1016 static void display_risk (void);
1017 static void display_directional (void);
1018 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1019 static void table_value_missing (struct tab_table *table, int c, int r,
1020 unsigned char opt, const union value *v,
1021 const struct variable *var);
1022 static void delete_missing (void);
1024 /* Output pivot table beginning at PB and continuing until PE,
1025 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1026 hold *MAXROWS entries. */
1028 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1029 double **matp, double **row_totp, double **col_totp,
1030 int *maxrows, int *maxcols, int *maxcells)
1033 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1034 int tc = pe - pb; /* Table count. */
1036 /* Table entry for header comparison. */
1037 struct table_entry *cmp = NULL;
1039 x = xtab[(*pb)->table];
1040 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1042 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1044 /* Crosstabulation table initialization. */
1047 table = tab_create (nvar + n_cols,
1048 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1049 tab_headers (table, nvar - 1, 0, 2, 0);
1051 /* First header line. */
1052 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1053 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1055 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1057 /* Second header line. */
1061 for (i = 2; i < nvar; i++)
1062 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1063 TAB_RIGHT | TAT_TITLE,
1065 ? x->vars[i]->label : x->vars[i]->name));
1066 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1067 x->vars[ROW_VAR]->name);
1068 for (i = 0; i < n_cols; i++)
1069 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1071 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1074 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1075 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1079 char *title = local_alloc (x->nvar * 64 + 128);
1083 if (cmd.pivot == CRS_PIVOT)
1084 for (i = 0; i < nvar; i++)
1087 cp = stpcpy (cp, " by ");
1088 cp = stpcpy (cp, x->vars[i]->name);
1092 cp = spprintf (cp, "%s by %s for",
1093 x->vars[0]->name, x->vars[1]->name);
1094 for (i = 2; i < nvar; i++)
1096 char buf[64], *bufp;
1101 cp = stpcpy (cp, x->vars[i]->name);
1103 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1104 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1106 cp = stpcpy (cp, bufp);
1110 cp = stpcpy (cp, " [");
1111 for (i = 0; i < num_cells; i++)
1119 static const struct tuple cell_names[] =
1121 {CRS_CL_COUNT, N_("count")},
1122 {CRS_CL_ROW, N_("row %")},
1123 {CRS_CL_COLUMN, N_("column %")},
1124 {CRS_CL_TOTAL, N_("total %")},
1125 {CRS_CL_EXPECTED, N_("expected")},
1126 {CRS_CL_RESIDUAL, N_("residual")},
1127 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1128 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1132 const struct tuple *t;
1134 for (t = cell_names; t->value != cells[i]; t++)
1135 assert (t->value != -1);
1137 cp = stpcpy (cp, ", ");
1138 cp = stpcpy (cp, gettext (t->name));
1142 tab_title (table, 0, title);
1146 tab_offset (table, 0, 2);
1151 /* Chi-square table initialization. */
1152 if (cmd.a_statistics[CRS_ST_CHISQ])
1154 chisq = tab_create (6 + (nvar - 2),
1155 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1156 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1158 tab_title (chisq, 0, "Chi-square tests.");
1160 tab_offset (chisq, nvar - 2, 0);
1161 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1162 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1163 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1164 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1165 _("Asymp. Sig. (2-sided)"));
1166 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1167 _("Exact. Sig. (2-sided)"));
1168 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1169 _("Exact. Sig. (1-sided)"));
1171 tab_offset (chisq, 0, 1);
1176 /* Symmetric measures. */
1177 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1178 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1179 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1180 || cmd.a_statistics[CRS_ST_KAPPA])
1182 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1183 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1184 tab_title (sym, 0, "Symmetric measures.");
1186 tab_offset (sym, nvar - 2, 0);
1187 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1188 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1189 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1190 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1191 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1192 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1193 tab_offset (sym, 0, 1);
1198 /* Risk estimate. */
1199 if (cmd.a_statistics[CRS_ST_RISK])
1201 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1202 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1203 tab_title (risk, 0, "Risk estimate.");
1205 tab_offset (risk, nvar - 2, 0);
1206 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1207 _(" 95%% Confidence Interval"));
1208 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1209 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1210 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1211 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1212 tab_hline (risk, TAL_1, 2, 3, 1);
1213 tab_vline (risk, TAL_1, 2, 0, 1);
1214 tab_offset (risk, 0, 2);
1219 /* Directional measures. */
1220 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1221 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1223 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1224 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1225 tab_title (direct, 0, "Directional measures.");
1227 tab_offset (direct, nvar - 2, 0);
1228 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1229 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1230 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1231 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1232 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1233 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1234 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1235 tab_offset (direct, 0, 1);
1242 /* Find pivot subtable if applicable. */
1243 te = find_pivot_extent (tb, &tc, 0);
1247 /* Find all the row variable values. */
1248 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1250 /* Allocate memory space for the column and row totals. */
1251 if (n_rows > *maxrows)
1253 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1254 row_tot = *row_totp;
1257 if (n_cols > *maxcols)
1259 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1260 col_tot = *col_totp;
1264 /* Allocate table space for the matrix. */
1265 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1266 tab_realloc (table, -1,
1267 max (tab_nr (table) + (n_rows + 1) * num_cells,
1268 tab_nr (table) * (pe - pb) / (te - tb)));
1270 if (mode == GENERAL)
1272 /* Allocate memory space for the matrix. */
1273 if (n_cols * n_rows > *maxcells)
1275 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1276 *maxcells = n_cols * n_rows;
1281 /* Build the matrix and calculate column totals. */
1283 union value *cur_col = cols;
1284 union value *cur_row = rows;
1286 double *cp = col_tot;
1287 struct table_entry **p;
1290 for (p = &tb[0]; p < te; p++)
1292 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1296 for (; cur_row < &rows[n_rows]; cur_row++)
1302 mp = &mat[cur_col - cols];
1305 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1312 *cp += *mp = (*p)->u.freq;
1317 /* Zero out the rest of the matrix. */
1318 for (; cur_row < &rows[n_rows]; cur_row++)
1324 if (cur_col < &cols[n_cols])
1326 const int rem_cols = n_cols - (cur_col - cols);
1329 for (c = 0; c < rem_cols; c++)
1331 mp = &mat[cur_col - cols];
1332 for (r = 0; r < n_rows; r++)
1334 for (c = 0; c < rem_cols; c++)
1336 mp += n_cols - rem_cols;
1344 double *tp = col_tot;
1346 assert (mode == INTEGER);
1347 mat = (*tb)->u.data;
1350 /* Calculate column totals. */
1351 for (c = 0; c < n_cols; c++)
1354 double *cp = &mat[c];
1356 for (r = 0; r < n_rows; r++)
1357 cum += cp[r * n_cols];
1365 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1366 ns_cols += *cp != 0.;
1369 /* Calculate row totals. */
1372 double *rp = row_tot;
1375 for (ns_rows = 0, r = n_rows; r--; )
1378 for (c = n_cols; c--; )
1386 /* Calculate grand total. */
1392 if (n_rows < n_cols)
1393 tp = row_tot, n = n_rows;
1395 tp = col_tot, n = n_cols;
1402 /* Print the matrix. */
1406 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1407 for (i = 2; i < nvar; i++)
1408 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1411 for (c = 0; c < n_cols; c++)
1412 printf ("%4g", cols[c].f);
1414 for (r = 0; r < n_rows; r++)
1416 printf ("%4g:", rows[r].f);
1417 for (c = 0; c < n_cols; c++)
1418 printf ("%4g", mat[c + r * n_cols]);
1419 printf ("%4g", row_tot[r]);
1423 for (c = 0; c < n_cols; c++)
1424 printf ("%4g", col_tot[c]);
1430 /* Find the first variable that differs from the last subtable,
1431 then display the values of the dimensioning variables for
1432 each table that needs it. */
1434 int first_difference = nvar - 1;
1437 for (; ; first_difference--)
1439 assert (first_difference >= 2);
1440 if (memcmp (&cmp->values[first_difference],
1441 &(*tb)->values[first_difference],
1442 sizeof *cmp->values))
1448 display_dimensions (table, first_difference, *tb);
1450 display_dimensions (chisq, first_difference, *tb);
1452 display_dimensions (sym, first_difference, *tb);
1454 display_dimensions (risk, first_difference, *tb);
1456 display_dimensions (direct, first_difference, *tb);
1460 display_crosstabulation ();
1461 if (cmd.miss == CRS_REPORT)
1466 display_symmetric ();
1470 display_directional ();
1481 tab_resize (chisq, 4 + (nvar - 2), -1);
1492 /* Delete missing rows and columns for statistical analysis when
1495 delete_missing (void)
1500 for (r = 0; r < n_rows; r++)
1501 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1505 for (c = 0; c < n_cols; c++)
1506 mat[c + r * n_cols] = 0.;
1514 for (c = 0; c < n_cols; c++)
1515 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1519 for (r = 0; r < n_rows; r++)
1520 mat[c + r * n_cols] = 0.;
1526 /* Prepare table T for submission, and submit it. */
1528 submit (struct tab_table *t)
1535 tab_resize (t, -1, 0);
1536 if (tab_nr (t) == tab_t (t))
1541 tab_offset (t, 0, 0);
1543 for (i = 2; i < nvar; i++)
1544 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1545 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1546 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1547 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1549 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1551 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1552 tab_dim (t, crosstabs_dim);
1556 /* Sets the widths of all the columns and heights of all the rows in
1557 table T for driver D. */
1559 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1563 /* Width of a numerical column. */
1564 int c = outp_string_width (d, "0.000000");
1565 if (cmd.miss == CRS_REPORT)
1566 c += outp_string_width (d, "M");
1568 /* Set width for header columns. */
1571 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1573 if (w < d->prop_em_width * 8)
1574 w = d->prop_em_width * 8;
1576 if (w > d->prop_em_width * 15)
1577 w = d->prop_em_width * 15;
1579 for (i = 0; i < t->l; i++)
1583 for (i = t->l; i < t->nc; i++)
1586 for (i = 0; i < t->nr; i++)
1587 t->h[i] = tab_natural_height (t, d, i);
1590 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1591 int *cnt, int pivot);
1592 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1593 int *cnt, int pivot);
1595 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1597 static struct table_entry **
1598 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1600 return (mode == GENERAL
1601 ? find_pivot_extent_general (tp, cnt, pivot)
1602 : find_pivot_extent_integer (tp, cnt, pivot));
1605 /* Find the extent of a region in TP that contains one table. If
1606 PIVOT != 0 that means a set of table entries with identical table
1607 number; otherwise they also have to have the same values for every
1608 dimension after the row and column dimensions. The table that is
1609 searched starts at TP and has length CNT. Returns the first entry
1610 after the last one in the table; sets *CNT to the number of
1611 remaining values. If there are no entries in TP at all, returns
1612 NULL. A yucky interface, admittedly, but it works. */
1613 static struct table_entry **
1614 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1616 struct table_entry *fp = *tp;
1621 x = xtab[(*tp)->table];
1629 if ((*tp)->table != fp->table)
1634 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1641 /* Integer mode correspondent to find_pivot_extent_general(). This
1642 could be optimized somewhat, but I just don't give a crap about
1643 CROSSTABS performance in integer mode, which is just a
1644 CROSSTABS wart as far as I'm concerned.
1646 That said, feel free to send optimization patches to me. */
1647 static struct table_entry **
1648 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1650 struct table_entry *fp = *tp;
1655 x = xtab[(*tp)->table];
1663 if ((*tp)->table != fp->table)
1668 if (memcmp (&(*tp)->values[2], &fp->values[2],
1669 sizeof (union value) * (x->nvar - 2)))
1676 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1677 result. WIDTH_ points to an int which is either 0 for a
1678 numeric value or a string width for a string value. */
1680 compare_value (const void *a_, const void *b_, void *width_)
1682 const union value *a = a_;
1683 const union value *b = b_;
1684 const int *pwidth = width_;
1685 const int width = *pwidth;
1688 return (a->f < b->f) ? -1 : (a->f > b->f);
1690 return strncmp (a->s, b->s, width);
1693 /* Given an array of ENTRY_CNT table_entry structures starting at
1694 ENTRIES, creates a sorted list of the values that the variable
1695 with index VAR_INDEX takes on. The values are returned as a
1696 malloc()'darray stored in *VALUES, with the number of values
1697 stored in *VALUE_CNT.
1700 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1701 union value **values, int *value_cnt)
1703 if (mode == GENERAL)
1705 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1708 *values = xmalloc (sizeof **values * entry_cnt);
1709 for (i = 0; i < entry_cnt; i++)
1710 (*values)[i] = entries[i]->values[var_idx];
1711 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1712 compare_value, &width);
1716 struct crosstab_proc *crs
1717 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1720 assert (mode == INTEGER);
1721 *values = xmalloc (sizeof **values * crs->count);
1722 for (i = 0; i < crs->count; i++)
1723 (*values)[i].f = i + crs->min;
1724 *value_cnt = crs->count;
1728 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1729 from V, displayed with print format spec from variable VAR. When
1730 in REPORT missing-value mode, missing values have an M appended. */
1732 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1733 const union value *v, const struct variable *var)
1735 struct len_string s;
1737 const char *label = val_labs_find (var->val_labs, *v);
1740 tab_text (table, c, r, TAB_LEFT, label);
1744 s.string = tab_alloc (table, var->print.w);
1745 format_short (s.string, &var->print, v);
1746 s.length = strlen (s.string);
1747 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1748 s.string[s.length++] = 'M';
1749 while (s.length && *s.string == ' ')
1754 tab_raw (table, c, r, opt, &s);
1757 /* Draws a line across TABLE at the current row to indicate the most
1758 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1759 that changed, and puts the values that changed into the table. TB
1760 and X must be the corresponding table_entry and crosstab,
1763 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1765 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1767 for (; first_difference >= 2; first_difference--)
1768 table_value_missing (table, nvar - first_difference - 1, 0,
1769 TAB_RIGHT, &tb->values[first_difference],
1770 x->vars[first_difference]);
1773 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1774 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1775 additionally suffixed with a letter `M'. */
1777 format_cell_entry (struct tab_table *table, int c, int r, double value,
1778 char suffix, int mark_missing)
1780 const struct fmt_spec f = {FMT_F, 10, 1};
1782 struct len_string s;
1785 s.string = tab_alloc (table, 16);
1787 data_out (s.string, &f, &v);
1788 while (*s.string == ' ')
1794 s.string[s.length++] = suffix;
1796 s.string[s.length++] = 'M';
1798 tab_raw (table, c, r, TAB_RIGHT, &s);
1801 /* Displays the crosstabulation table. */
1803 display_crosstabulation (void)
1808 for (r = 0; r < n_rows; r++)
1809 table_value_missing (table, nvar - 2, r * num_cells,
1810 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1812 tab_text (table, nvar - 2, n_rows * num_cells,
1813 TAB_LEFT, _("Total"));
1815 /* Put in the actual cells. */
1820 tab_offset (table, nvar - 1, -1);
1821 for (r = 0; r < n_rows; r++)
1824 tab_hline (table, TAL_1, -1, n_cols, 0);
1825 for (c = 0; c < n_cols; c++)
1827 int mark_missing = 0;
1828 double expected_value = row_tot[r] * col_tot[c] / W;
1829 if (cmd.miss == CRS_REPORT
1830 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1831 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1833 for (i = 0; i < num_cells; i++)
1844 v = *mp / row_tot[r] * 100.;
1848 v = *mp / col_tot[c] * 100.;
1855 case CRS_CL_EXPECTED:
1858 case CRS_CL_RESIDUAL:
1859 v = *mp - expected_value;
1861 case CRS_CL_SRESIDUAL:
1862 v = (*mp - expected_value) / sqrt (expected_value);
1864 case CRS_CL_ASRESIDUAL:
1865 v = ((*mp - expected_value)
1866 / sqrt (expected_value
1867 * (1. - row_tot[r] / W)
1868 * (1. - col_tot[c] / W)));
1875 format_cell_entry (table, c, i, v, suffix, mark_missing);
1881 tab_offset (table, -1, tab_row (table) + num_cells);
1889 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1890 for (r = 0; r < n_rows; r++)
1893 int mark_missing = 0;
1895 if (cmd.miss == CRS_REPORT
1896 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1899 for (i = 0; i < num_cells; i++)
1913 v = row_tot[r] / W * 100.;
1917 v = row_tot[r] / W * 100.;
1920 case CRS_CL_EXPECTED:
1921 case CRS_CL_RESIDUAL:
1922 case CRS_CL_SRESIDUAL:
1923 case CRS_CL_ASRESIDUAL:
1931 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1932 tab_next_row (table);
1937 /* Column totals, grand total. */
1943 tab_hline (table, TAL_1, -1, n_cols, 0);
1944 for (c = 0; c <= n_cols; c++)
1946 double ct = c < n_cols ? col_tot[c] : W;
1947 int mark_missing = 0;
1951 if (cmd.miss == CRS_REPORT && c < n_cols
1952 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1955 for (i = 0; i < num_cells; i++)
1977 case CRS_CL_EXPECTED:
1978 case CRS_CL_RESIDUAL:
1979 case CRS_CL_SRESIDUAL:
1980 case CRS_CL_ASRESIDUAL:
1987 format_cell_entry (table, c, i, v, suffix, mark_missing);
1992 tab_offset (table, -1, tab_row (table) + last_row);
1995 tab_offset (table, 0, -1);
1998 static void calc_r (double *X, double *Y, double *, double *, double *);
1999 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
2001 /* Display chi-square statistics. */
2003 display_chisq (void)
2005 static const char *chisq_stats[N_CHISQ] =
2007 N_("Pearson Chi-Square"),
2008 N_("Likelihood Ratio"),
2009 N_("Fisher's Exact Test"),
2010 N_("Continuity Correction"),
2011 N_("Linear-by-Linear Association"),
2013 double chisq_v[N_CHISQ];
2014 double fisher1, fisher2;
2020 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2022 tab_offset (chisq, nvar - 2, -1);
2024 for (i = 0; i < N_CHISQ; i++)
2026 if ((i != 2 && chisq_v[i] == SYSMIS)
2027 || (i == 2 && fisher1 == SYSMIS))
2031 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2034 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2035 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2036 tab_float (chisq, 3, 0, TAB_RIGHT,
2037 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2042 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2043 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2045 tab_next_row (chisq);
2048 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2049 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2050 tab_next_row (chisq);
2052 tab_offset (chisq, 0, -1);
2055 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2056 double[N_SYMMETRIC]);
2058 /* Display symmetric measures. */
2060 display_symmetric (void)
2062 static const char *categories[] =
2064 N_("Nominal by Nominal"),
2065 N_("Ordinal by Ordinal"),
2066 N_("Interval by Interval"),
2067 N_("Measure of Agreement"),
2070 static const char *stats[N_SYMMETRIC] =
2074 N_("Contingency Coefficient"),
2075 N_("Kendall's tau-b"),
2076 N_("Kendall's tau-c"),
2078 N_("Spearman Correlation"),
2083 static const int stats_categories[N_SYMMETRIC] =
2085 0, 0, 0, 1, 1, 1, 1, 2, 3,
2089 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2092 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2095 tab_offset (sym, nvar - 2, -1);
2097 for (i = 0; i < N_SYMMETRIC; i++)
2099 if (sym_v[i] == SYSMIS)
2102 if (stats_categories[i] != last_cat)
2104 last_cat = stats_categories[i];
2105 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2108 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2109 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2110 if (sym_ase[i] != SYSMIS)
2111 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2112 if (sym_t[i] != SYSMIS)
2113 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2114 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2118 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2119 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2122 tab_offset (sym, 0, -1);
2125 static int calc_risk (double[], double[], double[], union value *);
2127 /* Display risk estimate. */
2132 double risk_v[3], lower[3], upper[3];
2136 if (!calc_risk (risk_v, upper, lower, c))
2139 tab_offset (risk, nvar - 2, -1);
2141 for (i = 0; i < 3; i++)
2143 if (risk_v[i] == SYSMIS)
2149 if (x->vars[COL_VAR]->type == NUMERIC)
2150 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2151 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2153 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2154 x->vars[COL_VAR]->name,
2155 x->vars[COL_VAR]->width, c[0].s,
2156 x->vars[COL_VAR]->width, c[1].s);
2160 if (x->vars[ROW_VAR]->type == NUMERIC)
2161 sprintf (buf, _("For cohort %s = %g"),
2162 x->vars[ROW_VAR]->name, rows[i - 1].f);
2164 sprintf (buf, _("For cohort %s = %.*s"),
2165 x->vars[ROW_VAR]->name,
2166 x->vars[ROW_VAR]->width, rows[i - 1].s);
2170 tab_text (risk, 0, 0, TAB_LEFT, buf);
2171 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2172 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2173 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2174 tab_next_row (risk);
2177 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2178 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2179 tab_next_row (risk);
2181 tab_offset (risk, 0, -1);
2184 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2185 double[N_DIRECTIONAL]);
2187 /* Display directional measures. */
2189 display_directional (void)
2191 static const char *categories[] =
2193 N_("Nominal by Nominal"),
2194 N_("Ordinal by Ordinal"),
2195 N_("Nominal by Interval"),
2198 static const char *stats[] =
2201 N_("Goodman and Kruskal tau"),
2202 N_("Uncertainty Coefficient"),
2207 static const char *types[] =
2214 static const int stats_categories[N_DIRECTIONAL] =
2216 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2219 static const int stats_stats[N_DIRECTIONAL] =
2221 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2224 static const int stats_types[N_DIRECTIONAL] =
2226 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2229 static const int *stats_lookup[] =
2236 static const char **stats_names[] =
2248 double direct_v[N_DIRECTIONAL];
2249 double direct_ase[N_DIRECTIONAL];
2250 double direct_t[N_DIRECTIONAL];
2254 if (!calc_directional (direct_v, direct_ase, direct_t))
2257 tab_offset (direct, nvar - 2, -1);
2259 for (i = 0; i < N_DIRECTIONAL; i++)
2261 if (direct_v[i] == SYSMIS)
2267 for (j = 0; j < 3; j++)
2268 if (last[j] != stats_lookup[j][i])
2271 tab_hline (direct, TAL_1, j, 6, 0);
2276 int k = last[j] = stats_lookup[j][i];
2281 string = x->vars[0]->name;
2283 string = x->vars[1]->name;
2285 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2286 gettext (stats_names[j][k]), string);
2291 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2292 if (direct_ase[i] != SYSMIS)
2293 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2294 if (direct_t[i] != SYSMIS)
2295 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2296 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2297 tab_next_row (direct);
2300 tab_offset (direct, 0, -1);
2303 /* Statistical calculations. */
2305 /* Returns the value of the gamma (factorial) function for an integer
2308 gamma_int (double x)
2313 for (i = 2; i < x; i++)
2318 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2320 static inline double
2321 Pr (int a, int b, int c, int d)
2323 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2324 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2325 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2326 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2327 / gamma_int (a + b + c + d + 1.));
2330 /* Swap the contents of A and B. */
2332 swap (int *a, int *b)
2339 /* Calculate significance for Fisher's exact test as specified in
2340 _SPSS Statistical Algorithms_, Appendix 5. */
2342 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2346 if (min (c, d) < min (a, b))
2347 swap (&a, &c), swap (&b, &d);
2348 if (min (b, d) < min (a, c))
2349 swap (&a, &b), swap (&c, &d);
2353 swap (&a, &b), swap (&c, &d);
2355 swap (&a, &c), swap (&b, &d);
2359 for (x = 0; x <= a; x++)
2360 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2362 *fisher2 = *fisher1;
2363 for (x = 1; x <= b; x++)
2364 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2367 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2368 columns with values COLS and N_ROWS rows with values ROWS. Values
2369 in the matrix sum to W. */
2371 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2372 double *fisher1, double *fisher2)
2376 chisq[0] = chisq[1] = 0.;
2377 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2378 *fisher1 = *fisher2 = SYSMIS;
2380 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2382 if (ns_rows <= 1 || ns_cols <= 1)
2384 chisq[0] = chisq[1] = SYSMIS;
2388 for (r = 0; r < n_rows; r++)
2389 for (c = 0; c < n_cols; c++)
2391 const double expected = row_tot[r] * col_tot[c] / W;
2392 const double freq = mat[n_cols * r + c];
2393 const double residual = freq - expected;
2395 chisq[0] += residual * residual / expected;
2397 chisq[1] += freq * log (expected / freq);
2408 /* Calculate Yates and Fisher exact test. */
2409 if (ns_cols == 2 && ns_rows == 2)
2411 double f11, f12, f21, f22;
2417 for (i = j = 0; i < n_cols; i++)
2418 if (col_tot[i] != 0.)
2427 f11 = mat[nz_cols[0]];
2428 f12 = mat[nz_cols[1]];
2429 f21 = mat[nz_cols[0] + n_cols];
2430 f22 = mat[nz_cols[1] + n_cols];
2435 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2438 chisq[3] = (W * x * x
2439 / (f11 + f12) / (f21 + f22)
2440 / (f11 + f21) / (f12 + f22));
2448 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2449 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2452 /* Calculate Mantel-Haenszel. */
2453 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2455 double r, ase_0, ase_1;
2456 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2458 chisq[4] = (W - 1.) * r * r;
2463 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2464 ASE_1, and ase_0 into ASE_0. The row and column values must be
2465 passed in X and Y. */
2467 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2469 double SX, SY, S, T;
2471 double sum_XYf, sum_X2Y2f;
2472 double sum_Xr, sum_X2r;
2473 double sum_Yc, sum_Y2c;
2476 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2477 for (j = 0; j < n_cols; j++)
2479 double fij = mat[j + i * n_cols];
2480 double product = X[i] * Y[j];
2481 double temp = fij * product;
2483 sum_X2Y2f += temp * product;
2486 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2488 sum_Xr += X[i] * row_tot[i];
2489 sum_X2r += X[i] * X[i] * row_tot[i];
2493 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2495 sum_Yc += Y[i] * col_tot[i];
2496 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2500 S = sum_XYf - sum_Xr * sum_Yc / W;
2501 SX = sum_X2r - sum_Xr * sum_Xr / W;
2502 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2505 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2510 for (s = c = 0., i = 0; i < n_rows; i++)
2511 for (j = 0; j < n_cols; j++)
2513 double Xresid, Yresid;
2516 Xresid = X[i] - Xbar;
2517 Yresid = Y[j] - Ybar;
2518 temp = (T * Xresid * Yresid
2520 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2521 y = mat[j + i * n_cols] * temp * temp - c;
2526 *ase_1 = sqrt (s) / (T * T);
2530 static double somers_d_v[3];
2531 static double somers_d_ase[3];
2532 static double somers_d_t[3];
2534 /* Calculate symmetric statistics and their asymptotic standard
2535 errors. Returns 0 if none could be calculated. */
2537 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2538 double t[N_SYMMETRIC])
2540 int q = min (ns_rows, ns_cols);
2549 for (i = 0; i < N_SYMMETRIC; i++)
2550 v[i] = ase[i] = t[i] = SYSMIS;
2553 /* Phi, Cramer's V, contingency coefficient. */
2554 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2556 double Xp = 0.; /* Pearson chi-square. */
2561 for (r = 0; r < n_rows; r++)
2562 for (c = 0; c < n_cols; c++)
2564 const double expected = row_tot[r] * col_tot[c] / W;
2565 const double freq = mat[n_cols * r + c];
2566 const double residual = freq - expected;
2568 Xp += residual * residual / expected;
2572 if (cmd.a_statistics[CRS_ST_PHI])
2574 v[0] = sqrt (Xp / W);
2575 v[1] = sqrt (Xp / (W * (q - 1)));
2577 if (cmd.a_statistics[CRS_ST_CC])
2578 v[2] = sqrt (Xp / (Xp + W));
2581 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2582 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2587 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2594 for (r = 0; r < n_rows; r++)
2595 Dr -= row_tot[r] * row_tot[r];
2596 for (c = 0; c < n_cols; c++)
2597 Dc -= col_tot[c] * col_tot[c];
2603 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2604 for (c = 0; c < n_cols; c++)
2608 for (r = 0; r < n_rows; r++)
2609 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2619 for (i = 0; i < n_rows; i++)
2623 for (j = 1; j < n_cols; j++)
2624 Cij += col_tot[j] - cum[j + i * n_cols];
2627 for (j = 1; j < n_cols; j++)
2628 Dij += cum[j + (i - 1) * n_cols];
2632 double fij = mat[j + i * n_cols];
2638 assert (j < n_cols);
2640 Cij -= col_tot[j] - cum[j + i * n_cols];
2641 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2645 Cij += cum[j - 1 + (i - 1) * n_cols];
2646 Dij -= cum[j + (i - 1) * n_cols];
2652 if (cmd.a_statistics[CRS_ST_BTAU])
2653 v[3] = (P - Q) / sqrt (Dr * Dc);
2654 if (cmd.a_statistics[CRS_ST_CTAU])
2655 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2656 if (cmd.a_statistics[CRS_ST_GAMMA])
2657 v[5] = (P - Q) / (P + Q);
2659 /* ASE for tau-b, tau-c, gamma. Calculations could be
2660 eliminated here, at expense of memory. */
2665 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2666 for (i = 0; i < n_rows; i++)
2670 for (j = 1; j < n_cols; j++)
2671 Cij += col_tot[j] - cum[j + i * n_cols];
2674 for (j = 1; j < n_cols; j++)
2675 Dij += cum[j + (i - 1) * n_cols];
2679 double fij = mat[j + i * n_cols];
2681 if (cmd.a_statistics[CRS_ST_BTAU])
2683 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2684 + v[3] * (row_tot[i] * Dc
2685 + col_tot[j] * Dr));
2686 btau_cum += fij * temp * temp;
2690 const double temp = Cij - Dij;
2691 ctau_cum += fij * temp * temp;
2694 if (cmd.a_statistics[CRS_ST_GAMMA])
2696 const double temp = Q * Cij - P * Dij;
2697 gamma_cum += fij * temp * temp;
2700 if (cmd.a_statistics[CRS_ST_D])
2702 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2703 - (P - Q) * (W - row_tot[i]));
2704 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2705 - (Q - P) * (W - col_tot[j]));
2710 assert (j < n_cols);
2712 Cij -= col_tot[j] - cum[j + i * n_cols];
2713 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2717 Cij += cum[j - 1 + (i - 1) * n_cols];
2718 Dij -= cum[j + (i - 1) * n_cols];
2724 btau_var = ((btau_cum
2725 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2727 if (cmd.a_statistics[CRS_ST_BTAU])
2729 ase[3] = sqrt (btau_var);
2730 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2733 if (cmd.a_statistics[CRS_ST_CTAU])
2735 ase[4] = ((2 * q / ((q - 1) * W * W))
2736 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2737 t[4] = v[4] / ase[4];
2739 if (cmd.a_statistics[CRS_ST_GAMMA])
2741 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2742 t[5] = v[5] / (2. / (P + Q)
2743 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2745 if (cmd.a_statistics[CRS_ST_D])
2747 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2748 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2749 somers_d_t[0] = (somers_d_v[0]
2751 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2752 somers_d_v[1] = (P - Q) / Dc;
2753 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2754 somers_d_t[1] = (somers_d_v[1]
2756 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2757 somers_d_v[2] = (P - Q) / Dr;
2758 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2759 somers_d_t[2] = (somers_d_v[2]
2761 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2767 /* Spearman correlation, Pearson's r. */
2768 if (cmd.a_statistics[CRS_ST_CORR])
2770 double *R = local_alloc (sizeof *R * n_rows);
2771 double *C = local_alloc (sizeof *C * n_cols);
2774 double y, t, c = 0., s = 0.;
2779 R[i] = s + (row_tot[i] + 1.) / 2.;
2786 assert (i < n_rows);
2791 double y, t, c = 0., s = 0.;
2796 C[j] = s + (col_tot[j] + 1.) / 2;
2803 assert (j < n_cols);
2807 calc_r (R, C, &v[6], &t[6], &ase[6]);
2813 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2817 /* Cohen's kappa. */
2818 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2820 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2823 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2824 i < ns_rows; i++, j++)
2828 while (col_tot[j] == 0.)
2831 prod = row_tot[i] * col_tot[j];
2832 sum = row_tot[i] + col_tot[j];
2834 sum_fii += mat[j + i * n_cols];
2836 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2837 sum_riciri_ci += prod * sum;
2839 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2840 for (j = 0; j < ns_cols; j++)
2842 double sum = row_tot[i] + col_tot[j];
2843 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2846 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2848 ase[8] = sqrt ((W * W * sum_rici
2849 + sum_rici * sum_rici
2850 - W * sum_riciri_ci)
2851 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2853 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2854 / pow2 (W * W - sum_rici))
2855 + ((2. * (W - sum_fii)
2856 * (2. * sum_fii * sum_rici
2857 - W * sum_fiiri_ci))
2858 / cube (W * W - sum_rici))
2859 + (pow2 (W - sum_fii)
2860 * (W * sum_fijri_ci2 - 4.
2861 * sum_rici * sum_rici)
2862 / pow4 (W * W - sum_rici))));
2864 t[8] = v[8] / ase[8];
2871 /* Calculate risk estimate. */
2873 calc_risk (double *value, double *upper, double *lower, union value *c)
2875 double f11, f12, f21, f22;
2881 for (i = 0; i < 3; i++)
2882 value[i] = upper[i] = lower[i] = SYSMIS;
2885 if (ns_rows != 2 || ns_cols != 2)
2892 for (i = j = 0; i < n_cols; i++)
2893 if (col_tot[i] != 0.)
2902 f11 = mat[nz_cols[0]];
2903 f12 = mat[nz_cols[1]];
2904 f21 = mat[nz_cols[0] + n_cols];
2905 f22 = mat[nz_cols[1] + n_cols];
2907 c[0] = cols[nz_cols[0]];
2908 c[1] = cols[nz_cols[1]];
2911 value[0] = (f11 * f22) / (f12 * f21);
2912 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2913 lower[0] = value[0] * exp (-1.960 * v);
2914 upper[0] = value[0] * exp (1.960 * v);
2916 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2917 v = sqrt ((f12 / (f11 * (f11 + f12)))
2918 + (f22 / (f21 * (f21 + f22))));
2919 lower[1] = value[1] * exp (-1.960 * v);
2920 upper[1] = value[1] * exp (1.960 * v);
2922 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2923 v = sqrt ((f11 / (f12 * (f11 + f12)))
2924 + (f21 / (f22 * (f21 + f22))));
2925 lower[2] = value[2] * exp (-1.960 * v);
2926 upper[2] = value[2] * exp (1.960 * v);
2931 /* Calculate directional measures. */
2933 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2934 double t[N_DIRECTIONAL])
2939 for (i = 0; i < N_DIRECTIONAL; i++)
2940 v[i] = ase[i] = t[i] = SYSMIS;
2944 if (cmd.a_statistics[CRS_ST_LAMBDA])
2946 double *fim = xmalloc (sizeof *fim * n_rows);
2947 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2948 double *fmj = xmalloc (sizeof *fmj * n_cols);
2949 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2950 double sum_fim, sum_fmj;
2952 int rm_index, cm_index;
2955 /* Find maximum for each row and their sum. */
2956 for (sum_fim = 0., i = 0; i < n_rows; i++)
2958 double max = mat[i * n_cols];
2961 for (j = 1; j < n_cols; j++)
2962 if (mat[j + i * n_cols] > max)
2964 max = mat[j + i * n_cols];
2968 sum_fim += fim[i] = max;
2969 fim_index[i] = index;
2972 /* Find maximum for each column. */
2973 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2975 double max = mat[j];
2978 for (i = 1; i < n_rows; i++)
2979 if (mat[j + i * n_cols] > max)
2981 max = mat[j + i * n_cols];
2985 sum_fmj += fmj[j] = max;
2986 fmj_index[j] = index;
2989 /* Find maximum row total. */
2992 for (i = 1; i < n_rows; i++)
2993 if (row_tot[i] > rm)
2999 /* Find maximum column total. */
3002 for (j = 1; j < n_cols; j++)
3003 if (col_tot[j] > cm)
3009 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3010 v[1] = (sum_fmj - rm) / (W - rm);
3011 v[2] = (sum_fim - cm) / (W - cm);
3013 /* ASE1 for Y given X. */
3017 for (accum = 0., i = 0; i < n_rows; i++)
3018 for (j = 0; j < n_cols; j++)
3020 const int deltaj = j == cm_index;
3021 accum += (mat[j + i * n_cols]
3022 * pow2 ((j == fim_index[i])
3027 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3030 /* ASE0 for Y given X. */
3034 for (accum = 0., i = 0; i < n_rows; i++)
3035 if (cm_index != fim_index[i])
3036 accum += (mat[i * n_cols + fim_index[i]]
3037 + mat[i * n_cols + cm_index]);
3038 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3041 /* ASE1 for X given Y. */
3045 for (accum = 0., i = 0; i < n_rows; i++)
3046 for (j = 0; j < n_cols; j++)
3048 const int deltaj = i == rm_index;
3049 accum += (mat[j + i * n_cols]
3050 * pow2 ((i == fmj_index[j])
3055 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3058 /* ASE0 for X given Y. */
3062 for (accum = 0., j = 0; j < n_cols; j++)
3063 if (rm_index != fmj_index[j])
3064 accum += (mat[j + n_cols * fmj_index[j]]
3065 + mat[j + n_cols * rm_index]);
3066 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3069 /* Symmetric ASE0 and ASE1. */
3074 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3075 for (j = 0; j < n_cols; j++)
3077 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3078 int temp1 = (i == rm_index) + (j == cm_index);
3079 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3080 accum1 += (mat[j + i * n_cols]
3081 * pow2 (temp0 + (v[0] - 1.) * temp1));
3083 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3084 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3085 / (2. * W - rm - cm));
3094 double sum_fij2_ri, sum_fij2_ci;
3095 double sum_ri2, sum_cj2;
3097 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3098 for (j = 0; j < n_cols; j++)
3100 double temp = pow2 (mat[j + i * n_cols]);
3101 sum_fij2_ri += temp / row_tot[i];
3102 sum_fij2_ci += temp / col_tot[j];
3105 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3106 sum_ri2 += row_tot[i] * row_tot[i];
3108 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3109 sum_cj2 += col_tot[j] * col_tot[j];
3111 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3112 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3116 if (cmd.a_statistics[CRS_ST_UC])
3118 double UX, UY, UXY, P;
3119 double ase1_yx, ase1_xy, ase1_sym;
3122 for (UX = 0., i = 0; i < n_rows; i++)
3123 if (row_tot[i] > 0.)
3124 UX -= row_tot[i] / W * log (row_tot[i] / W);
3126 for (UY = 0., j = 0; j < n_cols; j++)
3127 if (col_tot[j] > 0.)
3128 UY -= col_tot[j] / W * log (col_tot[j] / W);
3130 for (UXY = P = 0., i = 0; i < n_rows; i++)
3131 for (j = 0; j < n_cols; j++)
3133 double entry = mat[j + i * n_cols];
3138 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3139 UXY -= entry / W * log (entry / W);
3142 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3143 for (j = 0; j < n_cols; j++)
3145 double entry = mat[j + i * n_cols];
3150 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3151 + (UX - UXY) * log (col_tot[j] / W));
3152 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3153 + (UY - UXY) * log (row_tot[i] / W));
3154 ase1_sym += entry * pow2 ((UXY
3155 * log (row_tot[i] * col_tot[j] / (W * W)))
3156 - (UX + UY) * log (entry / W));
3159 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3160 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3161 t[5] = v[5] / ((2. / (W * (UX + UY)))
3162 * sqrt (P - pow2 (UX + UY - UXY) / W));
3164 v[6] = (UX + UY - UXY) / UX;
3165 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3166 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3168 v[7] = (UX + UY - UXY) / UY;
3169 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3170 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3174 if (cmd.a_statistics[CRS_ST_D])
3179 calc_symmetric (NULL, NULL, NULL);
3180 for (i = 0; i < 3; i++)
3182 v[8 + i] = somers_d_v[i];
3183 ase[8 + i] = somers_d_ase[i];
3184 t[8 + i] = somers_d_t[i];
3189 if (cmd.a_statistics[CRS_ST_ETA])
3192 double sum_Xr, sum_X2r;
3196 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3198 sum_Xr += rows[i].f * row_tot[i];
3199 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3201 SX = sum_X2r - sum_Xr * sum_Xr / W;
3203 for (SXW = 0., j = 0; j < n_cols; j++)
3207 for (cum = 0., i = 0; i < n_rows; i++)
3209 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3210 cum += rows[i].f * mat[j + i * n_cols];
3213 SXW -= cum * cum / col_tot[j];
3215 v[11] = sqrt (1. - SXW / SX);
3219 double sum_Yc, sum_Y2c;
3223 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3225 sum_Yc += cols[i].f * col_tot[i];
3226 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3228 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3230 for (SYW = 0., i = 0; i < n_rows; i++)
3234 for (cum = 0., j = 0; j < n_cols; j++)
3236 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3237 cum += cols[j].f * mat[j + i * n_cols];
3240 SYW -= cum * cum / row_tot[i];
3242 v[12] = sqrt (1. - SYW / SY);
3249 /* A wrapper around data_out() that limits string output to short
3250 string width and null terminates the result. */
3252 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3254 struct fmt_spec fmt_subst;
3256 /* Limit to short string width. */
3257 if (formats[fp->type].cat & FCAT_STRING)
3261 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3262 if (fmt_subst.type == FMT_A)
3263 fmt_subst.w = min (8, fmt_subst.w);
3265 fmt_subst.w = min (16, fmt_subst.w);
3271 data_out (s, fp, v);
3273 /* Null terminate. */