1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value values[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *vars[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
140 static int write; /* One of WR_* that specifies the WRITE style. */
142 /* Command parsing info. */
143 static struct cmd_crosstabs cmd;
146 static struct pool *pl_tc; /* For table cells. */
147 static struct pool *pl_col; /* For column data. */
149 static int internal_cmd_crosstabs (void);
150 static void precalc (void *);
151 static int calc_general (struct ccase *, void *);
152 static int calc_integer (struct ccase *, void *);
153 static void postcalc (void *);
154 static void submit (struct tab_table *);
156 static void format_short (char *s, const struct fmt_spec *fp,
157 const union value *v);
160 static void debug_print (void);
161 static void print_table_entries (struct table_entry **tab);
164 /* Parse and execute CROSSTABS, then clean up. */
168 int result = internal_cmd_crosstabs ();
171 pool_destroy (pl_tc);
172 pool_destroy (pl_col);
177 /* Parses and executes the CROSSTABS procedure. */
179 internal_cmd_crosstabs (void)
187 pl_tc = pool_create ();
188 pl_col = pool_create ();
190 if (!parse_crosstabs (&cmd))
194 /* Needs variables. */
198 mode = variables ? INTEGER : GENERAL;
203 cmd.a_cells[CRS_CL_COUNT] = 1;
209 for (i = 0; i < CRS_CL_count; i++)
214 cmd.a_cells[CRS_CL_COUNT] = 1;
215 cmd.a_cells[CRS_CL_ROW] = 1;
216 cmd.a_cells[CRS_CL_COLUMN] = 1;
217 cmd.a_cells[CRS_CL_TOTAL] = 1;
219 if (cmd.a_cells[CRS_CL_ALL])
221 for (i = 0; i < CRS_CL_count; i++)
223 cmd.a_cells[CRS_CL_ALL] = 0;
225 cmd.a_cells[CRS_CL_NONE] = 0;
227 for (num_cells = i = 0; i < CRS_CL_count; i++)
229 cells[num_cells++] = i;
232 if (cmd.sbc_statistics)
237 for (i = 0; i < CRS_ST_count; i++)
238 if (cmd.a_statistics[i])
241 cmd.a_statistics[CRS_ST_CHISQ] = 1;
242 if (cmd.a_statistics[CRS_ST_ALL])
243 for (i = 0; i < CRS_ST_count; i++)
244 cmd.a_statistics[i] = 1;
248 if (cmd.miss == CRS_REPORT && mode == GENERAL)
250 msg (SE, _("Missing mode REPORT not allowed in general mode. "
251 "Assuming MISSING=TABLE."));
252 cmd.miss = CRS_TABLE;
256 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
257 cmd.a_write[CRS_WR_ALL] = 0;
258 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
260 msg (SE, _("Write mode ALL not allowed in general mode. "
261 "Assuming WRITE=CELLS."));
262 cmd.a_write[CRS_WR_CELLS] = 1;
265 && (cmd.a_write[CRS_WR_NONE]
266 + cmd.a_write[CRS_WR_ALL]
267 + cmd.a_write[CRS_WR_CELLS] == 0))
268 cmd.a_write[CRS_WR_CELLS] = 1;
269 if (cmd.a_write[CRS_WR_CELLS])
270 write = CRS_WR_CELLS;
271 else if (cmd.a_write[CRS_WR_ALL])
276 procedure_with_splits (precalc,
277 mode == GENERAL ? calc_general : calc_integer,
283 /* Parses the TABLES subcommand. */
285 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
287 struct var_set *var_set;
289 struct variable ***by = NULL;
294 /* Ensure that this is a TABLES subcommand. */
295 if (!lex_match_id ("TABLES")
296 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
301 if (variables != NULL)
302 var_set = var_set_create_from_array (variables, variables_cnt);
304 var_set = var_set_create_from_dict (default_dict);
305 assert (var_set != NULL);
309 by = xrealloc (by, sizeof *by * (n_by + 1));
310 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
311 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
312 PV_NO_DUPLICATE | PV_NO_SCRATCH))
317 if (!lex_match (T_BY))
321 lex_error (_("expecting BY"));
330 int *by_iter = xcalloc (sizeof *by_iter * n_by);
333 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
334 for (i = 0; i < nx; i++)
338 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
345 for (i = 0; i < n_by; i++)
346 x->vars[i] = by[i][by_iter[i]];
352 for (i = n_by - 1; i >= 0; i--)
354 if (++by_iter[i] < by_nvar[i])
367 /* All return paths lead here. */
371 for (i = 0; i < n_by; i++)
377 var_set_destroy (var_set);
382 /* Parses the VARIABLES subcommand. */
384 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
388 msg (SE, _("VARIABLES must be specified before TABLES."));
396 int orig_nv = variables_cnt;
401 if (!parse_variables (default_dict, &variables, &variables_cnt,
402 (PV_APPEND | PV_NUMERIC
403 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
408 lex_error ("expecting `('");
413 if (!lex_force_int ())
415 min = lex_integer ();
420 if (!lex_force_int ())
422 max = lex_integer ();
425 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
433 lex_error ("expecting `)'");
438 for (i = orig_nv; i < variables_cnt; i++)
440 variables[i]->p.crs.min = min;
441 variables[i]->p.crs.max = max + 1.;
442 variables[i]->p.crs.count = max - min + 1;
461 printf ("CROSSTABS\n");
463 if (variables != NULL)
467 printf ("\t/VARIABLES=");
468 for (i = 0; i < variables_cnt; i++)
470 struct variable *v = variables[i];
472 printf ("%s ", v->name);
473 if (i < variables_cnt - 1)
475 struct variable *nv = variables[i + 1];
477 if (v->p.crs.min == nv->p.crs.min
478 && v->p.crs.max == nv->p.crs.max)
481 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
489 printf ("\t/TABLES=");
490 for (i = 0; i < nxtab; i++)
492 struct crosstab *x = xtab[i];
497 for (j = 0; j < x->nvar; j++)
501 printf ("%s", x->v[j]->name);
507 #endif /* DEBUGGING */
509 /* Data file processing. */
511 static int compare_table_entry (const void *, const void *, void *);
512 static unsigned hash_table_entry (const void *, void *);
514 /* Set up the crosstabulation tables for processing. */
516 precalc (void *aux UNUSED)
520 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
530 for (i = 0; i < nxtab; i++)
532 struct crosstab *x = xtab[i];
537 x->ofs = n_sorted_tab;
539 for (j = 2; j < x->nvar; j++)
540 count *= x->vars[j - 2]->p.crs.count;
542 sorted_tab = xrealloc (sorted_tab,
543 sizeof *sorted_tab * (n_sorted_tab + count));
544 v = local_alloc (sizeof *v * x->nvar);
545 for (j = 2; j < x->nvar; j++)
546 v[j] = x->vars[j]->p.crs.min;
547 for (j = 0; j < count; j++)
549 struct table_entry *te;
552 te = sorted_tab[n_sorted_tab++]
553 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
557 const int mat_size = (x->vars[0]->p.crs.count
558 * x->vars[1]->p.crs.count);
561 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
562 for (m = 0; m < mat_size; m++)
566 for (k = 2; k < x->nvar; k++)
567 te->values[k].f = v[k];
568 for (k = 2; k < x->nvar; k++)
569 if (++v[k] >= x->vars[k]->p.crs.max)
570 v[k] = x->vars[k]->p.crs.min;
577 sorted_tab = xrealloc (sorted_tab,
578 sizeof *sorted_tab * (n_sorted_tab + 1));
579 sorted_tab[n_sorted_tab] = NULL;
583 /* Form crosstabulations for general mode. */
585 calc_general (struct ccase *c, void *aux UNUSED)
588 double weight = dict_get_case_weight (default_dict, c);
590 /* Flattened current table index. */
593 for (t = 0; t < nxtab; t++)
595 struct crosstab *x = xtab[t];
596 const size_t entry_size = (sizeof (struct table_entry)
597 + sizeof (union value) * (x->nvar - 1));
598 struct table_entry *te = local_alloc (entry_size);
600 /* Construct table entry for the current record and table. */
606 for (j = 0; j < x->nvar; j++)
608 if ((cmd.miss == CRS_TABLE
609 && is_missing (&c->data[x->vars[j]->fv], x->vars[j]))
610 || (cmd.miss == CRS_INCLUDE
611 && is_system_missing (&c->data[x->vars[j]->fv],
614 x->missing += weight;
618 if (x->vars[j]->type == NUMERIC)
619 te->values[j].f = c->data[x->vars[j]->fv].f;
622 memcpy (te->values[j].s, c->data[x->vars[j]->fv].s,
625 /* Necessary in order to simplify comparisons. */
626 memset (&te->values[j].s[x->vars[j]->width], 0,
627 sizeof (union value) - x->vars[j]->width);
632 /* Add record to hash table. */
634 struct table_entry **tepp
635 = (struct table_entry **) hsh_probe (gen_tab, te);
638 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
641 memcpy (tep, te, entry_size);
646 (*tepp)->u.freq += weight;
657 calc_integer (struct ccase *c, void *aux UNUSED)
660 double weight = dict_get_case_weight (default_dict, c);
662 /* Flattened current table index. */
665 for (t = 0; t < nxtab; t++)
667 struct crosstab *x = xtab[t];
672 for (i = 0; i < x->nvar; i++)
674 struct variable *const v = x->vars[i];
675 double value = c->data[v->fv].f;
677 /* Note that the first test also rules out SYSMIS. */
678 if ((value < v->p.crs.min || value >= v->p.crs.max)
679 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
681 x->missing += weight;
687 ofs += fact * ((int) value - v->p.crs.min);
688 fact *= v->p.crs.count;
693 const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min;
694 const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min;
695 const int col_dim = x->vars[COL_VAR]->p.crs.count;
697 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
707 /* Print out all table entries in NULL-terminated TAB for use by a
708 debugger (a person, not a program). */
710 print_table_entries (struct table_entry **tab)
712 printf ("raw crosstabulation data:\n");
715 const struct crosstab *x = xtab[(*tab)->table];
718 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
719 for (i = 0; i < x->nvar; i++)
723 printf ("%s:", x->v[i]->name);
725 if (x->v[i]->type == NUMERIC)
726 printf ("%g", (*tab)->v[i].f);
728 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
736 /* Compare the table_entry's at A and B and return a strcmp()-type
739 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
741 const struct table_entry *a = a_;
742 const struct table_entry *b = b_;
744 if (a->table > b->table)
746 else if (a->table < b->table)
750 const struct crosstab *x = xtab[a->table];
753 for (i = x->nvar - 1; i >= 0; i--)
754 if (x->vars[i]->type == NUMERIC)
756 const double diffnum = a->values[i].f - b->values[i].f;
759 else if (diffnum > 0)
764 assert (x->vars[i]->type == ALPHA);
766 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
777 /* Calculate a hash value from table_entry A. */
779 hash_table_entry (const void *a_, void *foo UNUSED)
781 const struct table_entry *a = a_;
786 for (i = 0; i < xtab[a->table]->nvar; i++)
787 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
792 /* Post-data reading calculations. */
794 static struct table_entry **find_pivot_extent (struct table_entry **,
795 int *cnt, int pivot);
796 static void enum_var_values (struct table_entry **entries, int entry_cnt,
798 union value **values, int *value_cnt);
799 static void output_pivot_table (struct table_entry **, struct table_entry **,
800 double **, double **, double **,
801 int *, int *, int *);
802 static void make_summary_table (void);
805 postcalc (void *aux UNUSED)
809 n_sorted_tab = hsh_count (gen_tab);
810 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
812 print_table_entries (sorted_tab);
816 make_summary_table ();
818 /* Identify all the individual crosstabulation tables, and deal with
821 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
822 int pc = n_sorted_tab; /* Pivot count. */
824 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
825 int maxrows = 0, maxcols = 0, maxcells = 0;
829 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
833 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
834 &maxrows, &maxcols, &maxcells);
843 hsh_destroy (gen_tab);
846 static void insert_summary (struct tab_table *, int tab_index, double valid);
848 /* Output a table summarizing the cases processed. */
850 make_summary_table (void)
852 struct tab_table *summary;
854 struct table_entry **pb = sorted_tab, **pe;
855 int pc = n_sorted_tab;
858 summary = tab_create (7, 3 + nxtab, 1);
859 tab_title (summary, 0, _("Summary."));
860 tab_headers (summary, 1, 0, 3, 0);
861 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
862 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
863 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
864 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
865 tab_hline (summary, TAL_1, 1, 6, 1);
866 tab_hline (summary, TAL_1, 1, 6, 2);
867 tab_vline (summary, TAL_1, 3, 1, 1);
868 tab_vline (summary, TAL_1, 5, 1, 1);
872 for (i = 0; i < 3; i++)
874 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
875 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
878 tab_offset (summary, 0, 3);
884 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
888 while (cur_tab < (*pb)->table)
889 insert_summary (summary, cur_tab++, 0.);
892 for (valid = 0.; pb < pe; pb++)
893 valid += (*pb)->u.freq;
896 const struct crosstab *const x = xtab[(*pb)->table];
897 const int n_cols = x->vars[COL_VAR]->p.crs.count;
898 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
899 const int count = n_cols * n_rows;
901 for (valid = 0.; pb < pe; pb++)
903 const double *data = (*pb)->u.data;
906 for (i = 0; i < count; i++)
910 insert_summary (summary, cur_tab++, valid);
915 while (cur_tab < nxtab)
916 insert_summary (summary, cur_tab++, 0.);
921 /* Inserts a line into T describing the crosstabulation at index
922 TAB_INDEX, which has VALID valid observations. */
924 insert_summary (struct tab_table *t, int tab_index, double valid)
926 struct crosstab *x = xtab[tab_index];
928 tab_hline (t, TAL_1, 0, 6, 0);
930 /* Crosstabulation name. */
932 char *buf = local_alloc (128 * x->nvar);
936 for (i = 0; i < x->nvar; i++)
939 cp = stpcpy (cp, " * ");
942 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
944 tab_text (t, 0, 0, TAB_LEFT, buf);
949 /* Counts and percentages. */
959 for (i = 0; i < 3; i++)
961 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
962 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
973 static struct tab_table *table; /* Crosstabulation table. */
974 static struct tab_table *chisq; /* Chi-square table. */
975 static struct tab_table *sym; /* Symmetric measures table. */
976 static struct tab_table *risk; /* Risk estimate table. */
977 static struct tab_table *direct; /* Directional measures table. */
980 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
982 /* Column values, number of columns. */
983 static union value *cols;
986 /* Row values, number of rows. */
987 static union value *rows;
990 /* Number of statistically interesting columns/rows (columns/rows with
992 static int ns_cols, ns_rows;
994 /* Crosstabulation. */
995 static struct crosstab *x;
997 /* Number of variables from the crosstabulation to consider. This is
998 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1001 /* Matrix contents. */
1002 static double *mat; /* Matrix proper. */
1003 static double *row_tot; /* Row totals. */
1004 static double *col_tot; /* Column totals. */
1005 static double W; /* Grand total. */
1007 static void display_dimensions (struct tab_table *, int first_difference,
1008 struct table_entry *);
1009 static void display_crosstabulation (void);
1010 static void display_chisq (void);
1011 static void display_symmetric (void);
1012 static void display_risk (void);
1013 static void display_directional (void);
1014 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1015 static void table_value_missing (struct tab_table *table, int c, int r,
1016 unsigned char opt, const union value *v,
1017 const struct variable *var);
1018 static void delete_missing (void);
1020 /* Output pivot table beginning at PB and continuing until PE,
1021 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1022 hold *MAXROWS entries. */
1024 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1025 double **matp, double **row_totp, double **col_totp,
1026 int *maxrows, int *maxcols, int *maxcells)
1029 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1030 int tc = pe - pb; /* Table count. */
1032 /* Table entry for header comparison. */
1033 struct table_entry *cmp = NULL;
1035 x = xtab[(*pb)->table];
1036 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1038 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1040 /* Crosstabulation table initialization. */
1043 table = tab_create (nvar + n_cols,
1044 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1045 tab_headers (table, nvar - 1, 0, 2, 0);
1047 /* First header line. */
1048 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1049 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1051 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1053 /* Second header line. */
1057 for (i = 2; i < nvar; i++)
1058 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1059 TAB_RIGHT | TAT_TITLE,
1061 ? x->vars[i]->label : x->vars[i]->name));
1062 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1063 x->vars[ROW_VAR]->name);
1064 for (i = 0; i < n_cols; i++)
1065 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1067 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1070 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1071 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1075 char *title = local_alloc (x->nvar * 64 + 128);
1079 if (cmd.pivot == CRS_PIVOT)
1080 for (i = 0; i < nvar; i++)
1083 cp = stpcpy (cp, " by ");
1084 cp = stpcpy (cp, x->vars[i]->name);
1088 cp = spprintf (cp, "%s by %s for",
1089 x->vars[0]->name, x->vars[1]->name);
1090 for (i = 2; i < nvar; i++)
1092 char buf[64], *bufp;
1097 cp = stpcpy (cp, x->vars[i]->name);
1099 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1100 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1102 cp = stpcpy (cp, bufp);
1106 cp = stpcpy (cp, " [");
1107 for (i = 0; i < num_cells; i++)
1115 static const struct tuple cell_names[] =
1117 {CRS_CL_COUNT, N_("count")},
1118 {CRS_CL_ROW, N_("row %")},
1119 {CRS_CL_COLUMN, N_("column %")},
1120 {CRS_CL_TOTAL, N_("total %")},
1121 {CRS_CL_EXPECTED, N_("expected")},
1122 {CRS_CL_RESIDUAL, N_("residual")},
1123 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1124 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1128 const struct tuple *t;
1130 for (t = cell_names; t->value != cells[i]; t++)
1131 assert (t->value != -1);
1133 cp = stpcpy (cp, ", ");
1134 cp = stpcpy (cp, gettext (t->name));
1138 tab_title (table, 0, title);
1142 tab_offset (table, 0, 2);
1147 /* Chi-square table initialization. */
1148 if (cmd.a_statistics[CRS_ST_CHISQ])
1150 chisq = tab_create (6 + (nvar - 2),
1151 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1152 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1154 tab_title (chisq, 0, "Chi-square tests.");
1156 tab_offset (chisq, nvar - 2, 0);
1157 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1158 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1159 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1160 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1161 _("Asymp. Sig. (2-sided)"));
1162 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1163 _("Exact. Sig. (2-sided)"));
1164 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1165 _("Exact. Sig. (1-sided)"));
1167 tab_offset (chisq, 0, 1);
1172 /* Symmetric measures. */
1173 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1174 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1175 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1176 || cmd.a_statistics[CRS_ST_KAPPA])
1178 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1179 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1180 tab_title (sym, 0, "Symmetric measures.");
1182 tab_offset (sym, nvar - 2, 0);
1183 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1184 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1185 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1186 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1187 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1188 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1189 tab_offset (sym, 0, 1);
1194 /* Risk estimate. */
1195 if (cmd.a_statistics[CRS_ST_RISK])
1197 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1198 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1199 tab_title (risk, 0, "Risk estimate.");
1201 tab_offset (risk, nvar - 2, 0);
1202 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1203 _(" 95%% Confidence Interval"));
1204 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1205 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1206 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1207 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1208 tab_hline (risk, TAL_1, 2, 3, 1);
1209 tab_vline (risk, TAL_1, 2, 0, 1);
1210 tab_offset (risk, 0, 2);
1215 /* Directional measures. */
1216 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1217 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1219 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1220 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1221 tab_title (direct, 0, "Directional measures.");
1223 tab_offset (direct, nvar - 2, 0);
1224 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1225 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1226 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1227 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1228 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1229 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1230 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1231 tab_offset (direct, 0, 1);
1238 /* Find pivot subtable if applicable. */
1239 te = find_pivot_extent (tb, &tc, 0);
1243 /* Find all the row variable values. */
1244 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1246 /* Allocate memory space for the column and row totals. */
1247 if (n_rows > *maxrows)
1249 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1250 row_tot = *row_totp;
1253 if (n_cols > *maxcols)
1255 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1256 col_tot = *col_totp;
1260 /* Allocate table space for the matrix. */
1261 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1262 tab_realloc (table, -1,
1263 max (tab_nr (table) + (n_rows + 1) * num_cells,
1264 tab_nr (table) * (pe - pb) / (te - tb)));
1266 if (mode == GENERAL)
1268 /* Allocate memory space for the matrix. */
1269 if (n_cols * n_rows > *maxcells)
1271 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1272 *maxcells = n_cols * n_rows;
1277 /* Build the matrix and calculate column totals. */
1279 union value *cur_col = cols;
1280 union value *cur_row = rows;
1282 double *cp = col_tot;
1283 struct table_entry **p;
1286 for (p = &tb[0]; p < te; p++)
1288 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1292 for (; cur_row < &rows[n_rows]; cur_row++)
1298 mp = &mat[cur_col - cols];
1301 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1308 *cp += *mp = (*p)->u.freq;
1313 /* Zero out the rest of the matrix. */
1314 for (; cur_row < &rows[n_rows]; cur_row++)
1320 if (cur_col < &cols[n_cols])
1322 const int rem_cols = n_cols - (cur_col - cols);
1325 for (c = 0; c < rem_cols; c++)
1327 mp = &mat[cur_col - cols];
1328 for (r = 0; r < n_rows; r++)
1330 for (c = 0; c < rem_cols; c++)
1332 mp += n_cols - rem_cols;
1340 double *tp = col_tot;
1342 assert (mode == INTEGER);
1343 mat = (*tb)->u.data;
1346 /* Calculate column totals. */
1347 for (c = 0; c < n_cols; c++)
1350 double *cp = &mat[c];
1352 for (r = 0; r < n_rows; r++)
1353 cum += cp[r * n_cols];
1361 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1362 ns_cols += *cp != 0.;
1365 /* Calculate row totals. */
1368 double *rp = row_tot;
1371 for (ns_rows = 0, r = n_rows; r--; )
1374 for (c = n_cols; c--; )
1382 /* Calculate grand total. */
1388 if (n_rows < n_cols)
1389 tp = row_tot, n = n_rows;
1391 tp = col_tot, n = n_cols;
1398 /* Print the matrix. */
1402 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1403 for (i = 2; i < nvar; i++)
1404 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1407 for (c = 0; c < n_cols; c++)
1408 printf ("%4g", cols[c].f);
1410 for (r = 0; r < n_rows; r++)
1412 printf ("%4g:", rows[r].f);
1413 for (c = 0; c < n_cols; c++)
1414 printf ("%4g", mat[c + r * n_cols]);
1415 printf ("%4g", row_tot[r]);
1419 for (c = 0; c < n_cols; c++)
1420 printf ("%4g", col_tot[c]);
1426 /* Find the first variable that differs from the last subtable,
1427 then display the values of the dimensioning variables for
1428 each table that needs it. */
1430 int first_difference = nvar - 1;
1433 for (; ; first_difference--)
1435 assert (first_difference >= 2);
1436 if (memcmp (&cmp->values[first_difference],
1437 &(*tb)->values[first_difference],
1438 sizeof *cmp->values))
1444 display_dimensions (table, first_difference, *tb);
1446 display_dimensions (chisq, first_difference, *tb);
1448 display_dimensions (sym, first_difference, *tb);
1450 display_dimensions (risk, first_difference, *tb);
1452 display_dimensions (direct, first_difference, *tb);
1456 display_crosstabulation ();
1457 if (cmd.miss == CRS_REPORT)
1462 display_symmetric ();
1466 display_directional ();
1477 tab_resize (chisq, 4 + (nvar - 2), -1);
1488 /* Delete missing rows and columns for statistical analysis when
1491 delete_missing (void)
1496 for (r = 0; r < n_rows; r++)
1497 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1501 for (c = 0; c < n_cols; c++)
1502 mat[c + r * n_cols] = 0.;
1510 for (c = 0; c < n_cols; c++)
1511 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1515 for (r = 0; r < n_rows; r++)
1516 mat[c + r * n_cols] = 0.;
1522 /* Prepare table T for submission, and submit it. */
1524 submit (struct tab_table *t)
1531 tab_resize (t, -1, 0);
1532 if (tab_nr (t) == tab_t (t))
1537 tab_offset (t, 0, 0);
1539 for (i = 2; i < nvar; i++)
1540 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1541 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1542 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1543 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1545 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1547 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1548 tab_dim (t, crosstabs_dim);
1552 /* Sets the widths of all the columns and heights of all the rows in
1553 table T for driver D. */
1555 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1559 /* Width of a numerical column. */
1560 int c = outp_string_width (d, "0.000000");
1561 if (cmd.miss == CRS_REPORT)
1562 c += outp_string_width (d, "M");
1564 /* Set width for header columns. */
1567 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1569 if (w < d->prop_em_width * 8)
1570 w = d->prop_em_width * 8;
1572 if (w > d->prop_em_width * 15)
1573 w = d->prop_em_width * 15;
1575 for (i = 0; i < t->l; i++)
1579 for (i = t->l; i < t->nc; i++)
1582 for (i = 0; i < t->nr; i++)
1583 t->h[i] = tab_natural_height (t, d, i);
1586 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1587 int *cnt, int pivot);
1588 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1589 int *cnt, int pivot);
1591 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1593 static struct table_entry **
1594 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1596 return (mode == GENERAL
1597 ? find_pivot_extent_general (tp, cnt, pivot)
1598 : find_pivot_extent_integer (tp, cnt, pivot));
1601 /* Find the extent of a region in TP that contains one table. If
1602 PIVOT != 0 that means a set of table entries with identical table
1603 number; otherwise they also have to have the same values for every
1604 dimension after the row and column dimensions. The table that is
1605 searched starts at TP and has length CNT. Returns the first entry
1606 after the last one in the table; sets *CNT to the number of
1607 remaining values. If there are no entries in TP at all, returns
1608 NULL. A yucky interface, admittedly, but it works. */
1609 static struct table_entry **
1610 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1612 struct table_entry *fp = *tp;
1617 x = xtab[(*tp)->table];
1625 if ((*tp)->table != fp->table)
1630 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1637 /* Integer mode correspondent to find_pivot_extent_general(). This
1638 could be optimized somewhat, but I just don't give a crap about
1639 CROSSTABS performance in integer mode, which is just a
1640 CROSSTABS wart as far as I'm concerned.
1642 That said, feel free to send optimization patches to me. */
1643 static struct table_entry **
1644 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1646 struct table_entry *fp = *tp;
1651 x = xtab[(*tp)->table];
1659 if ((*tp)->table != fp->table)
1664 if (memcmp (&(*tp)->values[2], &fp->values[2],
1665 sizeof (union value) * (x->nvar - 2)))
1672 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1673 result. WIDTH_ points to an int which is either 0 for a
1674 numeric value or a string width for a string value. */
1676 compare_value (const void *a_, const void *b_, void *width_)
1678 const union value *a = a_;
1679 const union value *b = b_;
1680 const int *pwidth = width_;
1681 const int width = *pwidth;
1684 return (a->f < b->f) ? -1 : (a->f > b->f);
1686 return strncmp (a->s, b->s, width);
1689 /* Given an array of ENTRY_CNT table_entry structures starting at
1690 ENTRIES, creates a sorted list of the values that the variable
1691 with index VAR_INDEX takes on. The values are returned as a
1692 malloc()'darray stored in *VALUES, with the number of values
1693 stored in *VALUE_CNT.
1696 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1697 union value **values, int *value_cnt)
1699 if (mode == GENERAL)
1701 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1704 *values = xmalloc (sizeof **values * entry_cnt);
1705 for (i = 0; i < entry_cnt; i++)
1706 (*values)[i] = entries[i]->values[var_idx];
1707 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1708 compare_value, &width);
1712 struct crosstab_proc *crs
1713 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1716 assert (mode == INTEGER);
1717 *values = xmalloc (sizeof **values * crs->count);
1718 for (i = 0; i < crs->count; i++)
1719 (*values)[i].f = i + crs->min;
1720 *value_cnt = crs->count;
1724 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1725 from V, displayed with print format spec from variable VAR. When
1726 in REPORT missing-value mode, missing values have an M appended. */
1728 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1729 const union value *v, const struct variable *var)
1731 struct len_string s;
1733 const char *label = val_labs_find (var->val_labs, *v);
1736 tab_text (table, c, r, TAB_LEFT, label);
1740 s.string = tab_alloc (table, var->print.w);
1741 format_short (s.string, &var->print, v);
1742 s.length = strlen (s.string);
1743 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1744 s.string[s.length++] = 'M';
1745 while (s.length && *s.string == ' ')
1750 tab_raw (table, c, r, opt, &s);
1753 /* Draws a line across TABLE at the current row to indicate the most
1754 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1755 that changed, and puts the values that changed into the table. TB
1756 and X must be the corresponding table_entry and crosstab,
1759 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1761 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1763 for (; first_difference >= 2; first_difference--)
1764 table_value_missing (table, nvar - first_difference - 1, 0,
1765 TAB_RIGHT, &tb->values[first_difference],
1766 x->vars[first_difference]);
1769 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1770 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1771 additionally suffixed with a letter `M'. */
1773 format_cell_entry (struct tab_table *table, int c, int r, double value,
1774 char suffix, int mark_missing)
1776 const struct fmt_spec f = {FMT_F, 10, 1};
1778 struct len_string s;
1781 s.string = tab_alloc (table, 16);
1783 data_out (s.string, &f, &v);
1784 while (*s.string == ' ')
1790 s.string[s.length++] = suffix;
1792 s.string[s.length++] = 'M';
1794 tab_raw (table, c, r, TAB_RIGHT, &s);
1797 /* Displays the crosstabulation table. */
1799 display_crosstabulation (void)
1804 for (r = 0; r < n_rows; r++)
1805 table_value_missing (table, nvar - 2, r * num_cells,
1806 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1808 tab_text (table, nvar - 2, n_rows * num_cells,
1809 TAB_LEFT, _("Total"));
1811 /* Put in the actual cells. */
1816 tab_offset (table, nvar - 1, -1);
1817 for (r = 0; r < n_rows; r++)
1820 tab_hline (table, TAL_1, -1, n_cols, 0);
1821 for (c = 0; c < n_cols; c++)
1823 int mark_missing = 0;
1824 double expected_value = row_tot[r] * col_tot[c] / W;
1825 if (cmd.miss == CRS_REPORT
1826 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1827 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1829 for (i = 0; i < num_cells; i++)
1840 v = *mp / row_tot[r] * 100.;
1844 v = *mp / col_tot[c] * 100.;
1851 case CRS_CL_EXPECTED:
1854 case CRS_CL_RESIDUAL:
1855 v = *mp - expected_value;
1857 case CRS_CL_SRESIDUAL:
1858 v = (*mp - expected_value) / sqrt (expected_value);
1860 case CRS_CL_ASRESIDUAL:
1861 v = ((*mp - expected_value)
1862 / sqrt (expected_value
1863 * (1. - row_tot[r] / W)
1864 * (1. - col_tot[c] / W)));
1871 format_cell_entry (table, c, i, v, suffix, mark_missing);
1877 tab_offset (table, -1, tab_row (table) + num_cells);
1885 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1886 for (r = 0; r < n_rows; r++)
1889 int mark_missing = 0;
1891 if (cmd.miss == CRS_REPORT
1892 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1895 for (i = 0; i < num_cells; i++)
1909 v = row_tot[r] / W * 100.;
1913 v = row_tot[r] / W * 100.;
1916 case CRS_CL_EXPECTED:
1917 case CRS_CL_RESIDUAL:
1918 case CRS_CL_SRESIDUAL:
1919 case CRS_CL_ASRESIDUAL:
1927 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1928 tab_next_row (table);
1933 /* Column totals, grand total. */
1939 tab_hline (table, TAL_1, -1, n_cols, 0);
1940 for (c = 0; c <= n_cols; c++)
1942 double ct = c < n_cols ? col_tot[c] : W;
1943 int mark_missing = 0;
1947 if (cmd.miss == CRS_REPORT && c < n_cols
1948 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1951 for (i = 0; i < num_cells; i++)
1973 case CRS_CL_EXPECTED:
1974 case CRS_CL_RESIDUAL:
1975 case CRS_CL_SRESIDUAL:
1976 case CRS_CL_ASRESIDUAL:
1983 format_cell_entry (table, c, i, v, suffix, mark_missing);
1988 tab_offset (table, -1, tab_row (table) + last_row);
1991 tab_offset (table, 0, -1);
1994 static void calc_r (double *X, double *Y, double *, double *, double *);
1995 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1997 /* Display chi-square statistics. */
1999 display_chisq (void)
2001 static const char *chisq_stats[N_CHISQ] =
2003 N_("Pearson Chi-Square"),
2004 N_("Likelihood Ratio"),
2005 N_("Fisher's Exact Test"),
2006 N_("Continuity Correction"),
2007 N_("Linear-by-Linear Association"),
2009 double chisq_v[N_CHISQ];
2010 double fisher1, fisher2;
2016 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2018 tab_offset (chisq, nvar - 2, -1);
2020 for (i = 0; i < N_CHISQ; i++)
2022 if ((i != 2 && chisq_v[i] == SYSMIS)
2023 || (i == 2 && fisher1 == SYSMIS))
2027 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2030 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2031 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2032 tab_float (chisq, 3, 0, TAB_RIGHT,
2033 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2038 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2039 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2041 tab_next_row (chisq);
2044 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2045 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2046 tab_next_row (chisq);
2048 tab_offset (chisq, 0, -1);
2051 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2052 double[N_SYMMETRIC]);
2054 /* Display symmetric measures. */
2056 display_symmetric (void)
2058 static const char *categories[] =
2060 N_("Nominal by Nominal"),
2061 N_("Ordinal by Ordinal"),
2062 N_("Interval by Interval"),
2063 N_("Measure of Agreement"),
2066 static const char *stats[N_SYMMETRIC] =
2070 N_("Contingency Coefficient"),
2071 N_("Kendall's tau-b"),
2072 N_("Kendall's tau-c"),
2074 N_("Spearman Correlation"),
2079 static const int stats_categories[N_SYMMETRIC] =
2081 0, 0, 0, 1, 1, 1, 1, 2, 3,
2085 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2088 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2091 tab_offset (sym, nvar - 2, -1);
2093 for (i = 0; i < N_SYMMETRIC; i++)
2095 if (sym_v[i] == SYSMIS)
2098 if (stats_categories[i] != last_cat)
2100 last_cat = stats_categories[i];
2101 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2104 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2105 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2106 if (sym_ase[i] != SYSMIS)
2107 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2108 if (sym_t[i] != SYSMIS)
2109 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2110 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2114 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2115 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2118 tab_offset (sym, 0, -1);
2121 static int calc_risk (double[], double[], double[], union value *);
2123 /* Display risk estimate. */
2128 double risk_v[3], lower[3], upper[3];
2132 if (!calc_risk (risk_v, upper, lower, c))
2135 tab_offset (risk, nvar - 2, -1);
2137 for (i = 0; i < 3; i++)
2139 if (risk_v[i] == SYSMIS)
2145 if (x->vars[COL_VAR]->type == NUMERIC)
2146 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2147 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2149 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2150 x->vars[COL_VAR]->name,
2151 x->vars[COL_VAR]->width, c[0].s,
2152 x->vars[COL_VAR]->width, c[1].s);
2156 if (x->vars[ROW_VAR]->type == NUMERIC)
2157 sprintf (buf, _("For cohort %s = %g"),
2158 x->vars[ROW_VAR]->name, rows[i - 1].f);
2160 sprintf (buf, _("For cohort %s = %.*s"),
2161 x->vars[ROW_VAR]->name,
2162 x->vars[ROW_VAR]->width, rows[i - 1].s);
2166 tab_text (risk, 0, 0, TAB_LEFT, buf);
2167 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2168 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2169 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2170 tab_next_row (risk);
2173 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2174 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2175 tab_next_row (risk);
2177 tab_offset (risk, 0, -1);
2180 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2181 double[N_DIRECTIONAL]);
2183 /* Display directional measures. */
2185 display_directional (void)
2187 static const char *categories[] =
2189 N_("Nominal by Nominal"),
2190 N_("Ordinal by Ordinal"),
2191 N_("Nominal by Interval"),
2194 static const char *stats[] =
2197 N_("Goodman and Kruskal tau"),
2198 N_("Uncertainty Coefficient"),
2203 static const char *types[] =
2210 static const int stats_categories[N_DIRECTIONAL] =
2212 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2215 static const int stats_stats[N_DIRECTIONAL] =
2217 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2220 static const int stats_types[N_DIRECTIONAL] =
2222 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2225 static const int *stats_lookup[] =
2232 static const char **stats_names[] =
2244 double direct_v[N_DIRECTIONAL];
2245 double direct_ase[N_DIRECTIONAL];
2246 double direct_t[N_DIRECTIONAL];
2250 if (!calc_directional (direct_v, direct_ase, direct_t))
2253 tab_offset (direct, nvar - 2, -1);
2255 for (i = 0; i < N_DIRECTIONAL; i++)
2257 if (direct_v[i] == SYSMIS)
2263 for (j = 0; j < 3; j++)
2264 if (last[j] != stats_lookup[j][i])
2267 tab_hline (direct, TAL_1, j, 6, 0);
2272 int k = last[j] = stats_lookup[j][i];
2277 string = x->vars[0]->name;
2279 string = x->vars[1]->name;
2281 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2282 gettext (stats_names[j][k]), string);
2287 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2288 if (direct_ase[i] != SYSMIS)
2289 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2290 if (direct_t[i] != SYSMIS)
2291 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2292 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2293 tab_next_row (direct);
2296 tab_offset (direct, 0, -1);
2299 /* Statistical calculations. */
2301 /* Returns the value of the gamma (factorial) function for an integer
2304 gamma_int (double x)
2309 for (i = 2; i < x; i++)
2314 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2316 static inline double
2317 Pr (int a, int b, int c, int d)
2319 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2320 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2321 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2322 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2323 / gamma_int (a + b + c + d + 1.));
2326 /* Swap the contents of A and B. */
2328 swap (int *a, int *b)
2335 /* Calculate significance for Fisher's exact test as specified in
2336 _SPSS Statistical Algorithms_, Appendix 5. */
2338 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2342 if (min (c, d) < min (a, b))
2343 swap (&a, &c), swap (&b, &d);
2344 if (min (b, d) < min (a, c))
2345 swap (&a, &b), swap (&c, &d);
2349 swap (&a, &b), swap (&c, &d);
2351 swap (&a, &c), swap (&b, &d);
2355 for (x = 0; x <= a; x++)
2356 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2358 *fisher2 = *fisher1;
2359 for (x = 1; x <= b; x++)
2360 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2363 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2364 columns with values COLS and N_ROWS rows with values ROWS. Values
2365 in the matrix sum to W. */
2367 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2368 double *fisher1, double *fisher2)
2372 chisq[0] = chisq[1] = 0.;
2373 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2374 *fisher1 = *fisher2 = SYSMIS;
2376 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2378 if (ns_rows <= 1 || ns_cols <= 1)
2380 chisq[0] = chisq[1] = SYSMIS;
2384 for (r = 0; r < n_rows; r++)
2385 for (c = 0; c < n_cols; c++)
2387 const double expected = row_tot[r] * col_tot[c] / W;
2388 const double freq = mat[n_cols * r + c];
2389 const double residual = freq - expected;
2391 chisq[0] += residual * residual / expected;
2393 chisq[1] += freq * log (expected / freq);
2404 /* Calculate Yates and Fisher exact test. */
2405 if (ns_cols == 2 && ns_rows == 2)
2407 double f11, f12, f21, f22;
2413 for (i = j = 0; i < n_cols; i++)
2414 if (col_tot[i] != 0.)
2423 f11 = mat[nz_cols[0]];
2424 f12 = mat[nz_cols[1]];
2425 f21 = mat[nz_cols[0] + n_cols];
2426 f22 = mat[nz_cols[1] + n_cols];
2431 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2434 chisq[3] = (W * x * x
2435 / (f11 + f12) / (f21 + f22)
2436 / (f11 + f21) / (f12 + f22));
2444 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2445 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2448 /* Calculate Mantel-Haenszel. */
2449 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2451 double r, ase_0, ase_1;
2452 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2454 chisq[4] = (W - 1.) * r * r;
2459 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2460 ASE_1, and ase_0 into ASE_0. The row and column values must be
2461 passed in X and Y. */
2463 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2465 double SX, SY, S, T;
2467 double sum_XYf, sum_X2Y2f;
2468 double sum_Xr, sum_X2r;
2469 double sum_Yc, sum_Y2c;
2472 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2473 for (j = 0; j < n_cols; j++)
2475 double fij = mat[j + i * n_cols];
2476 double product = X[i] * Y[j];
2477 double temp = fij * product;
2479 sum_X2Y2f += temp * product;
2482 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2484 sum_Xr += X[i] * row_tot[i];
2485 sum_X2r += X[i] * X[i] * row_tot[i];
2489 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2491 sum_Yc += Y[i] * col_tot[i];
2492 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2496 S = sum_XYf - sum_Xr * sum_Yc / W;
2497 SX = sum_X2r - sum_Xr * sum_Xr / W;
2498 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2501 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2506 for (s = c = 0., i = 0; i < n_rows; i++)
2507 for (j = 0; j < n_cols; j++)
2509 double Xresid, Yresid;
2512 Xresid = X[i] - Xbar;
2513 Yresid = Y[j] - Ybar;
2514 temp = (T * Xresid * Yresid
2516 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2517 y = mat[j + i * n_cols] * temp * temp - c;
2522 *ase_1 = sqrt (s) / (T * T);
2526 static double somers_d_v[3];
2527 static double somers_d_ase[3];
2528 static double somers_d_t[3];
2530 /* Calculate symmetric statistics and their asymptotic standard
2531 errors. Returns 0 if none could be calculated. */
2533 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2534 double t[N_SYMMETRIC])
2536 int q = min (ns_rows, ns_cols);
2545 for (i = 0; i < N_SYMMETRIC; i++)
2546 v[i] = ase[i] = t[i] = SYSMIS;
2549 /* Phi, Cramer's V, contingency coefficient. */
2550 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2552 double Xp = 0.; /* Pearson chi-square. */
2557 for (r = 0; r < n_rows; r++)
2558 for (c = 0; c < n_cols; c++)
2560 const double expected = row_tot[r] * col_tot[c] / W;
2561 const double freq = mat[n_cols * r + c];
2562 const double residual = freq - expected;
2564 Xp += residual * residual / expected;
2568 if (cmd.a_statistics[CRS_ST_PHI])
2570 v[0] = sqrt (Xp / W);
2571 v[1] = sqrt (Xp / (W * (q - 1)));
2573 if (cmd.a_statistics[CRS_ST_CC])
2574 v[2] = sqrt (Xp / (Xp + W));
2577 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2578 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2583 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2590 for (r = 0; r < n_rows; r++)
2591 Dr -= row_tot[r] * row_tot[r];
2592 for (c = 0; c < n_cols; c++)
2593 Dc -= col_tot[c] * col_tot[c];
2599 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2600 for (c = 0; c < n_cols; c++)
2604 for (r = 0; r < n_rows; r++)
2605 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2615 for (i = 0; i < n_rows; i++)
2619 for (j = 1; j < n_cols; j++)
2620 Cij += col_tot[j] - cum[j + i * n_cols];
2623 for (j = 1; j < n_cols; j++)
2624 Dij += cum[j + (i - 1) * n_cols];
2628 double fij = mat[j + i * n_cols];
2634 assert (j < n_cols);
2636 Cij -= col_tot[j] - cum[j + i * n_cols];
2637 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2641 Cij += cum[j - 1 + (i - 1) * n_cols];
2642 Dij -= cum[j + (i - 1) * n_cols];
2648 if (cmd.a_statistics[CRS_ST_BTAU])
2649 v[3] = (P - Q) / sqrt (Dr * Dc);
2650 if (cmd.a_statistics[CRS_ST_CTAU])
2651 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2652 if (cmd.a_statistics[CRS_ST_GAMMA])
2653 v[5] = (P - Q) / (P + Q);
2655 /* ASE for tau-b, tau-c, gamma. Calculations could be
2656 eliminated here, at expense of memory. */
2661 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2662 for (i = 0; i < n_rows; i++)
2666 for (j = 1; j < n_cols; j++)
2667 Cij += col_tot[j] - cum[j + i * n_cols];
2670 for (j = 1; j < n_cols; j++)
2671 Dij += cum[j + (i - 1) * n_cols];
2675 double fij = mat[j + i * n_cols];
2677 if (cmd.a_statistics[CRS_ST_BTAU])
2679 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2680 + v[3] * (row_tot[i] * Dc
2681 + col_tot[j] * Dr));
2682 btau_cum += fij * temp * temp;
2686 const double temp = Cij - Dij;
2687 ctau_cum += fij * temp * temp;
2690 if (cmd.a_statistics[CRS_ST_GAMMA])
2692 const double temp = Q * Cij - P * Dij;
2693 gamma_cum += fij * temp * temp;
2696 if (cmd.a_statistics[CRS_ST_D])
2698 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2699 - (P - Q) * (W - row_tot[i]));
2700 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2701 - (Q - P) * (W - col_tot[j]));
2706 assert (j < n_cols);
2708 Cij -= col_tot[j] - cum[j + i * n_cols];
2709 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2713 Cij += cum[j - 1 + (i - 1) * n_cols];
2714 Dij -= cum[j + (i - 1) * n_cols];
2720 btau_var = ((btau_cum
2721 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2723 if (cmd.a_statistics[CRS_ST_BTAU])
2725 ase[3] = sqrt (btau_var);
2726 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2729 if (cmd.a_statistics[CRS_ST_CTAU])
2731 ase[4] = ((2 * q / ((q - 1) * W * W))
2732 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2733 t[4] = v[4] / ase[4];
2735 if (cmd.a_statistics[CRS_ST_GAMMA])
2737 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2738 t[5] = v[5] / (2. / (P + Q)
2739 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2741 if (cmd.a_statistics[CRS_ST_D])
2743 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2744 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2745 somers_d_t[0] = (somers_d_v[0]
2747 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2748 somers_d_v[1] = (P - Q) / Dc;
2749 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2750 somers_d_t[1] = (somers_d_v[1]
2752 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2753 somers_d_v[2] = (P - Q) / Dr;
2754 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2755 somers_d_t[2] = (somers_d_v[2]
2757 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2763 /* Spearman correlation, Pearson's r. */
2764 if (cmd.a_statistics[CRS_ST_CORR])
2766 double *R = local_alloc (sizeof *R * n_rows);
2767 double *C = local_alloc (sizeof *C * n_cols);
2770 double y, t, c = 0., s = 0.;
2775 R[i] = s + (row_tot[i] + 1.) / 2.;
2782 assert (i < n_rows);
2787 double y, t, c = 0., s = 0.;
2792 C[j] = s + (col_tot[j] + 1.) / 2;
2799 assert (j < n_cols);
2803 calc_r (R, C, &v[6], &t[6], &ase[6]);
2809 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2813 /* Cohen's kappa. */
2814 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2816 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2819 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2820 i < ns_rows; i++, j++)
2824 while (col_tot[j] == 0.)
2827 prod = row_tot[i] * col_tot[j];
2828 sum = row_tot[i] + col_tot[j];
2830 sum_fii += mat[j + i * n_cols];
2832 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2833 sum_riciri_ci += prod * sum;
2835 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2836 for (j = 0; j < ns_cols; j++)
2838 double sum = row_tot[i] + col_tot[j];
2839 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2842 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2844 ase[8] = sqrt ((W * W * sum_rici
2845 + sum_rici * sum_rici
2846 - W * sum_riciri_ci)
2847 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2849 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2850 / pow2 (W * W - sum_rici))
2851 + ((2. * (W - sum_fii)
2852 * (2. * sum_fii * sum_rici
2853 - W * sum_fiiri_ci))
2854 / cube (W * W - sum_rici))
2855 + (pow2 (W - sum_fii)
2856 * (W * sum_fijri_ci2 - 4.
2857 * sum_rici * sum_rici)
2858 / pow4 (W * W - sum_rici))));
2860 t[8] = v[8] / ase[8];
2867 /* Calculate risk estimate. */
2869 calc_risk (double *value, double *upper, double *lower, union value *c)
2871 double f11, f12, f21, f22;
2877 for (i = 0; i < 3; i++)
2878 value[i] = upper[i] = lower[i] = SYSMIS;
2881 if (ns_rows != 2 || ns_cols != 2)
2888 for (i = j = 0; i < n_cols; i++)
2889 if (col_tot[i] != 0.)
2898 f11 = mat[nz_cols[0]];
2899 f12 = mat[nz_cols[1]];
2900 f21 = mat[nz_cols[0] + n_cols];
2901 f22 = mat[nz_cols[1] + n_cols];
2903 c[0] = cols[nz_cols[0]];
2904 c[1] = cols[nz_cols[1]];
2907 value[0] = (f11 * f22) / (f12 * f21);
2908 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2909 lower[0] = value[0] * exp (-1.960 * v);
2910 upper[0] = value[0] * exp (1.960 * v);
2912 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2913 v = sqrt ((f12 / (f11 * (f11 + f12)))
2914 + (f22 / (f21 * (f21 + f22))));
2915 lower[1] = value[1] * exp (-1.960 * v);
2916 upper[1] = value[1] * exp (1.960 * v);
2918 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2919 v = sqrt ((f11 / (f12 * (f11 + f12)))
2920 + (f21 / (f22 * (f21 + f22))));
2921 lower[2] = value[2] * exp (-1.960 * v);
2922 upper[2] = value[2] * exp (1.960 * v);
2927 /* Calculate directional measures. */
2929 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2930 double t[N_DIRECTIONAL])
2935 for (i = 0; i < N_DIRECTIONAL; i++)
2936 v[i] = ase[i] = t[i] = SYSMIS;
2940 if (cmd.a_statistics[CRS_ST_LAMBDA])
2942 double *fim = xmalloc (sizeof *fim * n_rows);
2943 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2944 double *fmj = xmalloc (sizeof *fmj * n_cols);
2945 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2946 double sum_fim, sum_fmj;
2948 int rm_index, cm_index;
2951 /* Find maximum for each row and their sum. */
2952 for (sum_fim = 0., i = 0; i < n_rows; i++)
2954 double max = mat[i * n_cols];
2957 for (j = 1; j < n_cols; j++)
2958 if (mat[j + i * n_cols] > max)
2960 max = mat[j + i * n_cols];
2964 sum_fim += fim[i] = max;
2965 fim_index[i] = index;
2968 /* Find maximum for each column. */
2969 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2971 double max = mat[j];
2974 for (i = 1; i < n_rows; i++)
2975 if (mat[j + i * n_cols] > max)
2977 max = mat[j + i * n_cols];
2981 sum_fmj += fmj[j] = max;
2982 fmj_index[j] = index;
2985 /* Find maximum row total. */
2988 for (i = 1; i < n_rows; i++)
2989 if (row_tot[i] > rm)
2995 /* Find maximum column total. */
2998 for (j = 1; j < n_cols; j++)
2999 if (col_tot[j] > cm)
3005 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3006 v[1] = (sum_fmj - rm) / (W - rm);
3007 v[2] = (sum_fim - cm) / (W - cm);
3009 /* ASE1 for Y given X. */
3013 for (accum = 0., i = 0; i < n_rows; i++)
3014 for (j = 0; j < n_cols; j++)
3016 const int deltaj = j == cm_index;
3017 accum += (mat[j + i * n_cols]
3018 * pow2 ((j == fim_index[i])
3023 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3026 /* ASE0 for Y given X. */
3030 for (accum = 0., i = 0; i < n_rows; i++)
3031 if (cm_index != fim_index[i])
3032 accum += (mat[i * n_cols + fim_index[i]]
3033 + mat[i * n_cols + cm_index]);
3034 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3037 /* ASE1 for X given Y. */
3041 for (accum = 0., i = 0; i < n_rows; i++)
3042 for (j = 0; j < n_cols; j++)
3044 const int deltaj = i == rm_index;
3045 accum += (mat[j + i * n_cols]
3046 * pow2 ((i == fmj_index[j])
3051 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3054 /* ASE0 for X given Y. */
3058 for (accum = 0., j = 0; j < n_cols; j++)
3059 if (rm_index != fmj_index[j])
3060 accum += (mat[j + n_cols * fmj_index[j]]
3061 + mat[j + n_cols * rm_index]);
3062 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3065 /* Symmetric ASE0 and ASE1. */
3070 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3071 for (j = 0; j < n_cols; j++)
3073 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3074 int temp1 = (i == rm_index) + (j == cm_index);
3075 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3076 accum1 += (mat[j + i * n_cols]
3077 * pow2 (temp0 + (v[0] - 1.) * temp1));
3079 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3080 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3081 / (2. * W - rm - cm));
3090 double sum_fij2_ri, sum_fij2_ci;
3091 double sum_ri2, sum_cj2;
3093 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3094 for (j = 0; j < n_cols; j++)
3096 double temp = pow2 (mat[j + i * n_cols]);
3097 sum_fij2_ri += temp / row_tot[i];
3098 sum_fij2_ci += temp / col_tot[j];
3101 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3102 sum_ri2 += row_tot[i] * row_tot[i];
3104 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3105 sum_cj2 += col_tot[j] * col_tot[j];
3107 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3108 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3112 if (cmd.a_statistics[CRS_ST_UC])
3114 double UX, UY, UXY, P;
3115 double ase1_yx, ase1_xy, ase1_sym;
3118 for (UX = 0., i = 0; i < n_rows; i++)
3119 if (row_tot[i] > 0.)
3120 UX -= row_tot[i] / W * log (row_tot[i] / W);
3122 for (UY = 0., j = 0; j < n_cols; j++)
3123 if (col_tot[j] > 0.)
3124 UY -= col_tot[j] / W * log (col_tot[j] / W);
3126 for (UXY = P = 0., i = 0; i < n_rows; i++)
3127 for (j = 0; j < n_cols; j++)
3129 double entry = mat[j + i * n_cols];
3134 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3135 UXY -= entry / W * log (entry / W);
3138 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3139 for (j = 0; j < n_cols; j++)
3141 double entry = mat[j + i * n_cols];
3146 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3147 + (UX - UXY) * log (col_tot[j] / W));
3148 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3149 + (UY - UXY) * log (row_tot[i] / W));
3150 ase1_sym += entry * pow2 ((UXY
3151 * log (row_tot[i] * col_tot[j] / (W * W)))
3152 - (UX + UY) * log (entry / W));
3155 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3156 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3157 t[5] = v[5] / ((2. / (W * (UX + UY)))
3158 * sqrt (P - pow2 (UX + UY - UXY) / W));
3160 v[6] = (UX + UY - UXY) / UX;
3161 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3162 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3164 v[7] = (UX + UY - UXY) / UY;
3165 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3166 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3170 if (cmd.a_statistics[CRS_ST_D])
3175 calc_symmetric (NULL, NULL, NULL);
3176 for (i = 0; i < 3; i++)
3178 v[8 + i] = somers_d_v[i];
3179 ase[8 + i] = somers_d_ase[i];
3180 t[8 + i] = somers_d_t[i];
3185 if (cmd.a_statistics[CRS_ST_ETA])
3188 double sum_Xr, sum_X2r;
3192 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3194 sum_Xr += rows[i].f * row_tot[i];
3195 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3197 SX = sum_X2r - sum_Xr * sum_Xr / W;
3199 for (SXW = 0., j = 0; j < n_cols; j++)
3203 for (cum = 0., i = 0; i < n_rows; i++)
3205 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3206 cum += rows[i].f * mat[j + i * n_cols];
3209 SXW -= cum * cum / col_tot[j];
3211 v[11] = sqrt (1. - SXW / SX);
3215 double sum_Yc, sum_Y2c;
3219 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3221 sum_Yc += cols[i].f * col_tot[i];
3222 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3224 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3226 for (SYW = 0., i = 0; i < n_rows; i++)
3230 for (cum = 0., j = 0; j < n_cols; j++)
3232 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3233 cum += cols[j].f * mat[j + i * n_cols];
3236 SYW -= cum * cum / row_tot[i];
3238 v[12] = sqrt (1. - SYW / SY);
3245 /* A wrapper around data_out() that limits string output to short
3246 string width and null terminates the result. */
3248 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3250 struct fmt_spec fmt_subst;
3252 /* Limit to short string width. */
3253 if (formats[fp->type].cat & FCAT_STRING)
3257 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3258 if (fmt_subst.type == FMT_A)
3259 fmt_subst.w = min (8, fmt_subst.w);
3261 fmt_subst.w = min (16, fmt_subst.w);
3267 data_out (s, fp, v);
3269 /* Null terminate. */