1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
41 #include "dictionary.h"
52 #include "value-labels.h"
58 #include "debug-print.h"
64 +missing=miss:!table/include/report;
65 +write[wr_]=none,cells,all;
66 +format=fmt:!labels/nolabels/novallabs,
69 tabl:!tables/notables,
72 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
74 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
80 /* Number of chi-square statistics. */
83 /* Number of symmetric statistics. */
86 /* Number of directional statistics. */
87 #define N_DIRECTIONAL 13
89 /* A single table entry for general mode. */
92 int table; /* Flattened table number. */
95 double freq; /* Frequency count. */
96 double *data; /* Crosstabulation table for integer mode. */
99 union value values[1]; /* Values. */
102 /* A crosstabulation. */
105 int nvar; /* Number of variables. */
106 double missing; /* Missing cases count. */
107 int ofs; /* Integer mode: Offset into sorted_tab[]. */
108 struct variable *vars[2]; /* At least two variables; sorted by
109 larger indices first. */
112 /* Integer mode variable info. */
115 int min; /* Minimum value. */
116 int max; /* Maximum value + 1. */
117 int count; /* max - min. */
120 static inline struct var_range *
121 get_var_range (struct variable *v)
124 assert (v->aux != NULL);
128 /* Indexes into crosstab.v. */
135 /* General mode crosstabulation table. */
136 static struct hsh_table *gen_tab; /* Hash table. */
137 static int n_sorted_tab; /* Number of entries in sorted_tab. */
138 static struct table_entry **sorted_tab; /* Sorted table. */
140 /* Variables specifies on VARIABLES. */
141 static struct variable **variables;
142 static size_t variables_cnt;
145 static struct crosstab **xtab;
148 /* Integer or general mode? */
157 static int num_cells; /* Number of cells requested. */
158 static int cells[8]; /* Cells requested. */
161 static int write; /* One of WR_* that specifies the WRITE style. */
163 /* Command parsing info. */
164 static struct cmd_crosstabs cmd;
167 static struct pool *pl_tc; /* For table cells. */
168 static struct pool *pl_col; /* For column data. */
170 static int internal_cmd_crosstabs (void);
171 static void precalc (void *);
172 static int calc_general (struct ccase *, void *);
173 static int calc_integer (struct ccase *, void *);
174 static void postcalc (void *);
175 static void submit (struct tab_table *);
177 static void format_short (char *s, const struct fmt_spec *fp,
178 const union value *v);
180 /* Parse and execute CROSSTABS, then clean up. */
184 int result = internal_cmd_crosstabs ();
187 pool_destroy (pl_tc);
188 pool_destroy (pl_col);
193 /* Parses and executes the CROSSTABS procedure. */
195 internal_cmd_crosstabs (void)
203 pl_tc = pool_create ();
204 pl_col = pool_create ();
206 if (!parse_crosstabs (&cmd))
209 mode = variables ? INTEGER : GENERAL;
214 cmd.a_cells[CRS_CL_COUNT] = 1;
220 for (i = 0; i < CRS_CL_count; i++)
225 cmd.a_cells[CRS_CL_COUNT] = 1;
226 cmd.a_cells[CRS_CL_ROW] = 1;
227 cmd.a_cells[CRS_CL_COLUMN] = 1;
228 cmd.a_cells[CRS_CL_TOTAL] = 1;
230 if (cmd.a_cells[CRS_CL_ALL])
232 for (i = 0; i < CRS_CL_count; i++)
234 cmd.a_cells[CRS_CL_ALL] = 0;
236 cmd.a_cells[CRS_CL_NONE] = 0;
238 for (num_cells = i = 0; i < CRS_CL_count; i++)
240 cells[num_cells++] = i;
243 if (cmd.sbc_statistics)
248 for (i = 0; i < CRS_ST_count; i++)
249 if (cmd.a_statistics[i])
252 cmd.a_statistics[CRS_ST_CHISQ] = 1;
253 if (cmd.a_statistics[CRS_ST_ALL])
254 for (i = 0; i < CRS_ST_count; i++)
255 cmd.a_statistics[i] = 1;
259 if (cmd.miss == CRS_REPORT && mode == GENERAL)
261 msg (SE, _("Missing mode REPORT not allowed in general mode. "
262 "Assuming MISSING=TABLE."));
263 cmd.miss = CRS_TABLE;
267 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
268 cmd.a_write[CRS_WR_ALL] = 0;
269 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
271 msg (SE, _("Write mode ALL not allowed in general mode. "
272 "Assuming WRITE=CELLS."));
273 cmd.a_write[CRS_WR_CELLS] = 1;
276 && (cmd.a_write[CRS_WR_NONE]
277 + cmd.a_write[CRS_WR_ALL]
278 + cmd.a_write[CRS_WR_CELLS] == 0))
279 cmd.a_write[CRS_WR_CELLS] = 1;
280 if (cmd.a_write[CRS_WR_CELLS])
281 write = CRS_WR_CELLS;
282 else if (cmd.a_write[CRS_WR_ALL])
287 procedure_with_splits (precalc,
288 mode == GENERAL ? calc_general : calc_integer,
294 /* Parses the TABLES subcommand. */
296 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
298 struct var_set *var_set;
300 struct variable ***by = NULL;
305 /* Ensure that this is a TABLES subcommand. */
306 if (!lex_match_id ("TABLES")
307 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
312 if (variables != NULL)
313 var_set = var_set_create_from_array (variables, variables_cnt);
315 var_set = var_set_create_from_dict (default_dict);
316 assert (var_set != NULL);
320 by = xrealloc (by, sizeof *by * (n_by + 1));
321 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
322 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
323 PV_NO_DUPLICATE | PV_NO_SCRATCH))
328 if (!lex_match (T_BY))
332 lex_error (_("expecting BY"));
341 int *by_iter = xcalloc (sizeof *by_iter * n_by);
344 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
345 for (i = 0; i < nx; i++)
349 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
356 for (i = 0; i < n_by; i++)
357 x->vars[i] = by[i][by_iter[i]];
363 for (i = n_by - 1; i >= 0; i--)
365 if (++by_iter[i] < by_nvar[i])
378 /* All return paths lead here. */
382 for (i = 0; i < n_by; i++)
388 var_set_destroy (var_set);
393 /* Parses the VARIABLES subcommand. */
395 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
399 msg (SE, _("VARIABLES must be specified before TABLES."));
407 int orig_nv = variables_cnt;
412 if (!parse_variables (default_dict, &variables, &variables_cnt,
413 (PV_APPEND | PV_NUMERIC
414 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
419 lex_error ("expecting `('");
424 if (!lex_force_int ())
426 min = lex_integer ();
431 if (!lex_force_int ())
433 max = lex_integer ();
436 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
444 lex_error ("expecting `)'");
449 for (i = orig_nv; i < variables_cnt; i++)
451 struct var_range *vr = xmalloc (sizeof *vr);
454 vr->count = max - min + 1;
455 var_attach_aux (variables[i], vr, var_dtor_free);
470 /* Data file processing. */
472 static int compare_table_entry (const void *, const void *, void *);
473 static unsigned hash_table_entry (const void *, void *);
475 /* Set up the crosstabulation tables for processing. */
477 precalc (void *aux UNUSED)
481 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
491 for (i = 0; i < nxtab; i++)
493 struct crosstab *x = xtab[i];
498 x->ofs = n_sorted_tab;
500 for (j = 2; j < x->nvar; j++)
501 count *= get_var_range (x->vars[j - 2])->count;
503 sorted_tab = xrealloc (sorted_tab,
504 sizeof *sorted_tab * (n_sorted_tab + count));
505 v = local_alloc (sizeof *v * x->nvar);
506 for (j = 2; j < x->nvar; j++)
507 v[j] = get_var_range (x->vars[j])->min;
508 for (j = 0; j < count; j++)
510 struct table_entry *te;
513 te = sorted_tab[n_sorted_tab++]
514 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
518 int row_cnt = get_var_range (x->vars[0])->count;
519 int col_cnt = get_var_range (x->vars[1])->count;
520 const int mat_size = row_cnt * col_cnt;
523 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
524 for (m = 0; m < mat_size; m++)
528 for (k = 2; k < x->nvar; k++)
529 te->values[k].f = v[k];
530 for (k = 2; k < x->nvar; k++)
532 struct var_range *vr = get_var_range (x->vars[k]);
533 if (++v[k] >= vr->max)
542 sorted_tab = xrealloc (sorted_tab,
543 sizeof *sorted_tab * (n_sorted_tab + 1));
544 sorted_tab[n_sorted_tab] = NULL;
548 /* Form crosstabulations for general mode. */
550 calc_general (struct ccase *c, void *aux UNUSED)
555 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
557 /* Flattened current table index. */
560 for (t = 0; t < nxtab; t++)
562 struct crosstab *x = xtab[t];
563 const size_t entry_size = (sizeof (struct table_entry)
564 + sizeof (union value) * (x->nvar - 1));
565 struct table_entry *te = local_alloc (entry_size);
567 /* Construct table entry for the current record and table. */
573 for (j = 0; j < x->nvar; j++)
575 if ((cmd.miss == CRS_TABLE
576 && is_missing (case_data (c, x->vars[j]->fv), x->vars[j]))
577 || (cmd.miss == CRS_INCLUDE
578 && is_system_missing (case_data (c, x->vars[j]->fv),
581 x->missing += weight;
585 if (x->vars[j]->type == NUMERIC)
586 te->values[j].f = case_num (c, x->vars[j]->fv);
589 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
592 /* Necessary in order to simplify comparisons. */
593 memset (&te->values[j].s[x->vars[j]->width], 0,
594 sizeof (union value) - x->vars[j]->width);
599 /* Add record to hash table. */
601 struct table_entry **tepp
602 = (struct table_entry **) hsh_probe (gen_tab, te);
605 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
608 memcpy (tep, te, entry_size);
613 (*tepp)->u.freq += weight;
624 calc_integer (struct ccase *c, void *aux UNUSED)
629 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
631 /* Flattened current table index. */
634 for (t = 0; t < nxtab; t++)
636 struct crosstab *x = xtab[t];
641 for (i = 0; i < x->nvar; i++)
643 struct variable *const v = x->vars[i];
644 struct var_range *vr = get_var_range (v);
645 double value = case_num (c, v->fv);
647 /* Note that the first test also rules out SYSMIS. */
648 if ((value < vr->min || value >= vr->max)
649 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
651 x->missing += weight;
657 ofs += fact * ((int) value - vr->min);
663 struct variable *row_var = x->vars[ROW_VAR];
664 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
666 struct variable *col_var = x->vars[COL_VAR];
667 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
669 const int col_dim = get_var_range (col_var)->count;
671 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
680 /* Compare the table_entry's at A and B and return a strcmp()-type
683 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
685 const struct table_entry *a = a_;
686 const struct table_entry *b = b_;
688 if (a->table > b->table)
690 else if (a->table < b->table)
694 const struct crosstab *x = xtab[a->table];
697 for (i = x->nvar - 1; i >= 0; i--)
698 if (x->vars[i]->type == NUMERIC)
700 const double diffnum = a->values[i].f - b->values[i].f;
703 else if (diffnum > 0)
708 assert (x->vars[i]->type == ALPHA);
710 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
721 /* Calculate a hash value from table_entry A. */
723 hash_table_entry (const void *a_, void *foo UNUSED)
725 const struct table_entry *a = a_;
730 for (i = 0; i < xtab[a->table]->nvar; i++)
731 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
736 /* Post-data reading calculations. */
738 static struct table_entry **find_pivot_extent (struct table_entry **,
739 int *cnt, int pivot);
740 static void enum_var_values (struct table_entry **entries, int entry_cnt,
742 union value **values, int *value_cnt);
743 static void output_pivot_table (struct table_entry **, struct table_entry **,
744 double **, double **, double **,
745 int *, int *, int *);
746 static void make_summary_table (void);
749 postcalc (void *aux UNUSED)
753 n_sorted_tab = hsh_count (gen_tab);
754 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
757 make_summary_table ();
759 /* Identify all the individual crosstabulation tables, and deal with
762 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
763 int pc = n_sorted_tab; /* Pivot count. */
765 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
766 int maxrows = 0, maxcols = 0, maxcells = 0;
770 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
774 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
775 &maxrows, &maxcols, &maxcells);
784 hsh_destroy (gen_tab);
787 static void insert_summary (struct tab_table *, int tab_index, double valid);
789 /* Output a table summarizing the cases processed. */
791 make_summary_table (void)
793 struct tab_table *summary;
795 struct table_entry **pb = sorted_tab, **pe;
796 int pc = n_sorted_tab;
799 summary = tab_create (7, 3 + nxtab, 1);
800 tab_title (summary, 0, _("Summary."));
801 tab_headers (summary, 1, 0, 3, 0);
802 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
803 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
804 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
805 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
806 tab_hline (summary, TAL_1, 1, 6, 1);
807 tab_hline (summary, TAL_1, 1, 6, 2);
808 tab_vline (summary, TAL_1, 3, 1, 1);
809 tab_vline (summary, TAL_1, 5, 1, 1);
813 for (i = 0; i < 3; i++)
815 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
816 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
819 tab_offset (summary, 0, 3);
825 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
829 while (cur_tab < (*pb)->table)
830 insert_summary (summary, cur_tab++, 0.);
833 for (valid = 0.; pb < pe; pb++)
834 valid += (*pb)->u.freq;
837 const struct crosstab *const x = xtab[(*pb)->table];
838 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
839 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
840 const int count = n_cols * n_rows;
842 for (valid = 0.; pb < pe; pb++)
844 const double *data = (*pb)->u.data;
847 for (i = 0; i < count; i++)
851 insert_summary (summary, cur_tab++, valid);
856 while (cur_tab < nxtab)
857 insert_summary (summary, cur_tab++, 0.);
862 /* Inserts a line into T describing the crosstabulation at index
863 TAB_INDEX, which has VALID valid observations. */
865 insert_summary (struct tab_table *t, int tab_index, double valid)
867 struct crosstab *x = xtab[tab_index];
869 tab_hline (t, TAL_1, 0, 6, 0);
871 /* Crosstabulation name. */
873 char *buf = local_alloc (128 * x->nvar);
877 for (i = 0; i < x->nvar; i++)
880 cp = stpcpy (cp, " * ");
883 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
885 tab_text (t, 0, 0, TAB_LEFT, buf);
890 /* Counts and percentages. */
900 for (i = 0; i < 3; i++)
902 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
903 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
914 static struct tab_table *table; /* Crosstabulation table. */
915 static struct tab_table *chisq; /* Chi-square table. */
916 static struct tab_table *sym; /* Symmetric measures table. */
917 static struct tab_table *risk; /* Risk estimate table. */
918 static struct tab_table *direct; /* Directional measures table. */
921 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
923 /* Column values, number of columns. */
924 static union value *cols;
927 /* Row values, number of rows. */
928 static union value *rows;
931 /* Number of statistically interesting columns/rows (columns/rows with
933 static int ns_cols, ns_rows;
935 /* Crosstabulation. */
936 static struct crosstab *x;
938 /* Number of variables from the crosstabulation to consider. This is
939 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
942 /* Matrix contents. */
943 static double *mat; /* Matrix proper. */
944 static double *row_tot; /* Row totals. */
945 static double *col_tot; /* Column totals. */
946 static double W; /* Grand total. */
948 static void display_dimensions (struct tab_table *, int first_difference,
949 struct table_entry *);
950 static void display_crosstabulation (void);
951 static void display_chisq (void);
952 static void display_symmetric (void);
953 static void display_risk (void);
954 static void display_directional (void);
955 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
956 static void table_value_missing (struct tab_table *table, int c, int r,
957 unsigned char opt, const union value *v,
958 const struct variable *var);
959 static void delete_missing (void);
961 /* Output pivot table beginning at PB and continuing until PE,
962 exclusive. For efficiency, *MATP is a pointer to a matrix that can
963 hold *MAXROWS entries. */
965 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
966 double **matp, double **row_totp, double **col_totp,
967 int *maxrows, int *maxcols, int *maxcells)
970 struct table_entry **tb = pb, **te; /* Table begin, table end. */
971 int tc = pe - pb; /* Table count. */
973 /* Table entry for header comparison. */
974 struct table_entry *cmp = NULL;
976 x = xtab[(*pb)->table];
977 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
979 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
981 /* Crosstabulation table initialization. */
984 table = tab_create (nvar + n_cols,
985 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
986 tab_headers (table, nvar - 1, 0, 2, 0);
988 /* First header line. */
989 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
990 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
992 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
994 /* Second header line. */
998 for (i = 2; i < nvar; i++)
999 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1000 TAB_RIGHT | TAT_TITLE,
1002 ? x->vars[i]->label : x->vars[i]->name));
1003 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1004 x->vars[ROW_VAR]->name);
1005 for (i = 0; i < n_cols; i++)
1006 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1008 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1011 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1012 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1016 char *title = local_alloc (x->nvar * 64 + 128);
1020 if (cmd.pivot == CRS_PIVOT)
1021 for (i = 0; i < nvar; i++)
1024 cp = stpcpy (cp, " by ");
1025 cp = stpcpy (cp, x->vars[i]->name);
1029 cp = spprintf (cp, "%s by %s for",
1030 x->vars[0]->name, x->vars[1]->name);
1031 for (i = 2; i < nvar; i++)
1033 char buf[64], *bufp;
1038 cp = stpcpy (cp, x->vars[i]->name);
1040 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1041 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1043 cp = stpcpy (cp, bufp);
1047 cp = stpcpy (cp, " [");
1048 for (i = 0; i < num_cells; i++)
1056 static const struct tuple cell_names[] =
1058 {CRS_CL_COUNT, N_("count")},
1059 {CRS_CL_ROW, N_("row %")},
1060 {CRS_CL_COLUMN, N_("column %")},
1061 {CRS_CL_TOTAL, N_("total %")},
1062 {CRS_CL_EXPECTED, N_("expected")},
1063 {CRS_CL_RESIDUAL, N_("residual")},
1064 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1065 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1069 const struct tuple *t;
1071 for (t = cell_names; t->value != cells[i]; t++)
1072 assert (t->value != -1);
1074 cp = stpcpy (cp, ", ");
1075 cp = stpcpy (cp, gettext (t->name));
1079 tab_title (table, 0, title);
1083 tab_offset (table, 0, 2);
1088 /* Chi-square table initialization. */
1089 if (cmd.a_statistics[CRS_ST_CHISQ])
1091 chisq = tab_create (6 + (nvar - 2),
1092 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1093 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1095 tab_title (chisq, 0, "Chi-square tests.");
1097 tab_offset (chisq, nvar - 2, 0);
1098 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1099 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1100 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1101 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1102 _("Asymp. Sig. (2-sided)"));
1103 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1104 _("Exact. Sig. (2-sided)"));
1105 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1106 _("Exact. Sig. (1-sided)"));
1108 tab_offset (chisq, 0, 1);
1113 /* Symmetric measures. */
1114 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1115 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1116 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1117 || cmd.a_statistics[CRS_ST_KAPPA])
1119 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1120 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1121 tab_title (sym, 0, "Symmetric measures.");
1123 tab_offset (sym, nvar - 2, 0);
1124 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1125 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1126 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1127 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1128 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1129 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1130 tab_offset (sym, 0, 1);
1135 /* Risk estimate. */
1136 if (cmd.a_statistics[CRS_ST_RISK])
1138 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1139 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1140 tab_title (risk, 0, "Risk estimate.");
1142 tab_offset (risk, nvar - 2, 0);
1143 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1144 _(" 95%% Confidence Interval"));
1145 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1146 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1147 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1148 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1149 tab_hline (risk, TAL_1, 2, 3, 1);
1150 tab_vline (risk, TAL_1, 2, 0, 1);
1151 tab_offset (risk, 0, 2);
1156 /* Directional measures. */
1157 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1158 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1160 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1161 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1162 tab_title (direct, 0, "Directional measures.");
1164 tab_offset (direct, nvar - 2, 0);
1165 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1166 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1167 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1168 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1169 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1170 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1171 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1172 tab_offset (direct, 0, 1);
1179 /* Find pivot subtable if applicable. */
1180 te = find_pivot_extent (tb, &tc, 0);
1184 /* Find all the row variable values. */
1185 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1187 /* Allocate memory space for the column and row totals. */
1188 if (n_rows > *maxrows)
1190 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1191 row_tot = *row_totp;
1194 if (n_cols > *maxcols)
1196 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1197 col_tot = *col_totp;
1201 /* Allocate table space for the matrix. */
1202 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1203 tab_realloc (table, -1,
1204 max (tab_nr (table) + (n_rows + 1) * num_cells,
1205 tab_nr (table) * (pe - pb) / (te - tb)));
1207 if (mode == GENERAL)
1209 /* Allocate memory space for the matrix. */
1210 if (n_cols * n_rows > *maxcells)
1212 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1213 *maxcells = n_cols * n_rows;
1218 /* Build the matrix and calculate column totals. */
1220 union value *cur_col = cols;
1221 union value *cur_row = rows;
1223 double *cp = col_tot;
1224 struct table_entry **p;
1227 for (p = &tb[0]; p < te; p++)
1229 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1233 for (; cur_row < &rows[n_rows]; cur_row++)
1239 mp = &mat[cur_col - cols];
1242 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1249 *cp += *mp = (*p)->u.freq;
1254 /* Zero out the rest of the matrix. */
1255 for (; cur_row < &rows[n_rows]; cur_row++)
1261 if (cur_col < &cols[n_cols])
1263 const int rem_cols = n_cols - (cur_col - cols);
1266 for (c = 0; c < rem_cols; c++)
1268 mp = &mat[cur_col - cols];
1269 for (r = 0; r < n_rows; r++)
1271 for (c = 0; c < rem_cols; c++)
1273 mp += n_cols - rem_cols;
1281 double *tp = col_tot;
1283 assert (mode == INTEGER);
1284 mat = (*tb)->u.data;
1287 /* Calculate column totals. */
1288 for (c = 0; c < n_cols; c++)
1291 double *cp = &mat[c];
1293 for (r = 0; r < n_rows; r++)
1294 cum += cp[r * n_cols];
1302 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1303 ns_cols += *cp != 0.;
1306 /* Calculate row totals. */
1309 double *rp = row_tot;
1312 for (ns_rows = 0, r = n_rows; r--; )
1315 for (c = n_cols; c--; )
1323 /* Calculate grand total. */
1329 if (n_rows < n_cols)
1330 tp = row_tot, n = n_rows;
1332 tp = col_tot, n = n_cols;
1338 /* Find the first variable that differs from the last subtable,
1339 then display the values of the dimensioning variables for
1340 each table that needs it. */
1342 int first_difference = nvar - 1;
1345 for (; ; first_difference--)
1347 assert (first_difference >= 2);
1348 if (memcmp (&cmp->values[first_difference],
1349 &(*tb)->values[first_difference],
1350 sizeof *cmp->values))
1356 display_dimensions (table, first_difference, *tb);
1358 display_dimensions (chisq, first_difference, *tb);
1360 display_dimensions (sym, first_difference, *tb);
1362 display_dimensions (risk, first_difference, *tb);
1364 display_dimensions (direct, first_difference, *tb);
1368 display_crosstabulation ();
1369 if (cmd.miss == CRS_REPORT)
1374 display_symmetric ();
1378 display_directional ();
1389 tab_resize (chisq, 4 + (nvar - 2), -1);
1400 /* Delete missing rows and columns for statistical analysis when
1403 delete_missing (void)
1408 for (r = 0; r < n_rows; r++)
1409 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1413 for (c = 0; c < n_cols; c++)
1414 mat[c + r * n_cols] = 0.;
1422 for (c = 0; c < n_cols; c++)
1423 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1427 for (r = 0; r < n_rows; r++)
1428 mat[c + r * n_cols] = 0.;
1434 /* Prepare table T for submission, and submit it. */
1436 submit (struct tab_table *t)
1443 tab_resize (t, -1, 0);
1444 if (tab_nr (t) == tab_t (t))
1449 tab_offset (t, 0, 0);
1451 for (i = 2; i < nvar; i++)
1452 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1453 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1454 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1455 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1457 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1459 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1460 tab_dim (t, crosstabs_dim);
1464 /* Sets the widths of all the columns and heights of all the rows in
1465 table T for driver D. */
1467 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1471 /* Width of a numerical column. */
1472 int c = outp_string_width (d, "0.000000");
1473 if (cmd.miss == CRS_REPORT)
1474 c += outp_string_width (d, "M");
1476 /* Set width for header columns. */
1479 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1481 if (w < d->prop_em_width * 8)
1482 w = d->prop_em_width * 8;
1484 if (w > d->prop_em_width * 15)
1485 w = d->prop_em_width * 15;
1487 for (i = 0; i < t->l; i++)
1491 for (i = t->l; i < t->nc; i++)
1494 for (i = 0; i < t->nr; i++)
1495 t->h[i] = tab_natural_height (t, d, i);
1498 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1499 int *cnt, int pivot);
1500 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1501 int *cnt, int pivot);
1503 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1505 static struct table_entry **
1506 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1508 return (mode == GENERAL
1509 ? find_pivot_extent_general (tp, cnt, pivot)
1510 : find_pivot_extent_integer (tp, cnt, pivot));
1513 /* Find the extent of a region in TP that contains one table. If
1514 PIVOT != 0 that means a set of table entries with identical table
1515 number; otherwise they also have to have the same values for every
1516 dimension after the row and column dimensions. The table that is
1517 searched starts at TP and has length CNT. Returns the first entry
1518 after the last one in the table; sets *CNT to the number of
1519 remaining values. If there are no entries in TP at all, returns
1520 NULL. A yucky interface, admittedly, but it works. */
1521 static struct table_entry **
1522 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1524 struct table_entry *fp = *tp;
1529 x = xtab[(*tp)->table];
1537 if ((*tp)->table != fp->table)
1542 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1549 /* Integer mode correspondent to find_pivot_extent_general(). This
1550 could be optimized somewhat, but I just don't give a crap about
1551 CROSSTABS performance in integer mode, which is just a
1552 CROSSTABS wart as far as I'm concerned.
1554 That said, feel free to send optimization patches to me. */
1555 static struct table_entry **
1556 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1558 struct table_entry *fp = *tp;
1563 x = xtab[(*tp)->table];
1571 if ((*tp)->table != fp->table)
1576 if (memcmp (&(*tp)->values[2], &fp->values[2],
1577 sizeof (union value) * (x->nvar - 2)))
1584 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1585 result. WIDTH_ points to an int which is either 0 for a
1586 numeric value or a string width for a string value. */
1588 compare_value (const void *a_, const void *b_, void *width_)
1590 const union value *a = a_;
1591 const union value *b = b_;
1592 const int *pwidth = width_;
1593 const int width = *pwidth;
1596 return (a->f < b->f) ? -1 : (a->f > b->f);
1598 return strncmp (a->s, b->s, width);
1601 /* Given an array of ENTRY_CNT table_entry structures starting at
1602 ENTRIES, creates a sorted list of the values that the variable
1603 with index VAR_IDX takes on. The values are returned as a
1604 malloc()'darray stored in *VALUES, with the number of values
1605 stored in *VALUE_CNT.
1608 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1609 union value **values, int *value_cnt)
1611 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1613 if (mode == GENERAL)
1615 int width = v->width;
1618 *values = xmalloc (sizeof **values * entry_cnt);
1619 for (i = 0; i < entry_cnt; i++)
1620 (*values)[i] = entries[i]->values[var_idx];
1621 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1622 compare_value, &width);
1626 struct var_range *vr = get_var_range (v);
1629 assert (mode == INTEGER);
1630 *values = xmalloc (sizeof **values * vr->count);
1631 for (i = 0; i < vr->count; i++)
1632 (*values)[i].f = i + vr->min;
1633 *value_cnt = vr->count;
1637 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1638 from V, displayed with print format spec from variable VAR. When
1639 in REPORT missing-value mode, missing values have an M appended. */
1641 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1642 const union value *v, const struct variable *var)
1644 struct len_string s;
1646 const char *label = val_labs_find (var->val_labs, *v);
1649 tab_text (table, c, r, TAB_LEFT, label);
1653 s.string = tab_alloc (table, var->print.w);
1654 format_short (s.string, &var->print, v);
1655 s.length = strlen (s.string);
1656 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1657 s.string[s.length++] = 'M';
1658 while (s.length && *s.string == ' ')
1663 tab_raw (table, c, r, opt, &s);
1666 /* Draws a line across TABLE at the current row to indicate the most
1667 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1668 that changed, and puts the values that changed into the table. TB
1669 and X must be the corresponding table_entry and crosstab,
1672 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1674 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1676 for (; first_difference >= 2; first_difference--)
1677 table_value_missing (table, nvar - first_difference - 1, 0,
1678 TAB_RIGHT, &tb->values[first_difference],
1679 x->vars[first_difference]);
1682 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1683 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1684 additionally suffixed with a letter `M'. */
1686 format_cell_entry (struct tab_table *table, int c, int r, double value,
1687 char suffix, int mark_missing)
1689 const struct fmt_spec f = {FMT_F, 10, 1};
1691 struct len_string s;
1694 s.string = tab_alloc (table, 16);
1696 data_out (s.string, &f, &v);
1697 while (*s.string == ' ')
1703 s.string[s.length++] = suffix;
1705 s.string[s.length++] = 'M';
1707 tab_raw (table, c, r, TAB_RIGHT, &s);
1710 /* Displays the crosstabulation table. */
1712 display_crosstabulation (void)
1717 for (r = 0; r < n_rows; r++)
1718 table_value_missing (table, nvar - 2, r * num_cells,
1719 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1721 tab_text (table, nvar - 2, n_rows * num_cells,
1722 TAB_LEFT, _("Total"));
1724 /* Put in the actual cells. */
1729 tab_offset (table, nvar - 1, -1);
1730 for (r = 0; r < n_rows; r++)
1733 tab_hline (table, TAL_1, -1, n_cols, 0);
1734 for (c = 0; c < n_cols; c++)
1736 int mark_missing = 0;
1737 double expected_value = row_tot[r] * col_tot[c] / W;
1738 if (cmd.miss == CRS_REPORT
1739 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1740 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1742 for (i = 0; i < num_cells; i++)
1753 v = *mp / row_tot[r] * 100.;
1757 v = *mp / col_tot[c] * 100.;
1764 case CRS_CL_EXPECTED:
1767 case CRS_CL_RESIDUAL:
1768 v = *mp - expected_value;
1770 case CRS_CL_SRESIDUAL:
1771 v = (*mp - expected_value) / sqrt (expected_value);
1773 case CRS_CL_ASRESIDUAL:
1774 v = ((*mp - expected_value)
1775 / sqrt (expected_value
1776 * (1. - row_tot[r] / W)
1777 * (1. - col_tot[c] / W)));
1784 format_cell_entry (table, c, i, v, suffix, mark_missing);
1790 tab_offset (table, -1, tab_row (table) + num_cells);
1798 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1799 for (r = 0; r < n_rows; r++)
1802 int mark_missing = 0;
1804 if (cmd.miss == CRS_REPORT
1805 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1808 for (i = 0; i < num_cells; i++)
1822 v = row_tot[r] / W * 100.;
1826 v = row_tot[r] / W * 100.;
1829 case CRS_CL_EXPECTED:
1830 case CRS_CL_RESIDUAL:
1831 case CRS_CL_SRESIDUAL:
1832 case CRS_CL_ASRESIDUAL:
1840 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1841 tab_next_row (table);
1846 /* Column totals, grand total. */
1852 tab_hline (table, TAL_1, -1, n_cols, 0);
1853 for (c = 0; c <= n_cols; c++)
1855 double ct = c < n_cols ? col_tot[c] : W;
1856 int mark_missing = 0;
1860 if (cmd.miss == CRS_REPORT && c < n_cols
1861 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1864 for (i = 0; i < num_cells; i++)
1886 case CRS_CL_EXPECTED:
1887 case CRS_CL_RESIDUAL:
1888 case CRS_CL_SRESIDUAL:
1889 case CRS_CL_ASRESIDUAL:
1896 format_cell_entry (table, c, i, v, suffix, mark_missing);
1901 tab_offset (table, -1, tab_row (table) + last_row);
1904 tab_offset (table, 0, -1);
1907 static void calc_r (double *X, double *Y, double *, double *, double *);
1908 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1910 /* Display chi-square statistics. */
1912 display_chisq (void)
1914 static const char *chisq_stats[N_CHISQ] =
1916 N_("Pearson Chi-Square"),
1917 N_("Likelihood Ratio"),
1918 N_("Fisher's Exact Test"),
1919 N_("Continuity Correction"),
1920 N_("Linear-by-Linear Association"),
1922 double chisq_v[N_CHISQ];
1923 double fisher1, fisher2;
1929 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1931 tab_offset (chisq, nvar - 2, -1);
1933 for (i = 0; i < N_CHISQ; i++)
1935 if ((i != 2 && chisq_v[i] == SYSMIS)
1936 || (i == 2 && fisher1 == SYSMIS))
1940 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1943 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1944 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1945 tab_float (chisq, 3, 0, TAB_RIGHT,
1946 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1951 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1952 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1954 tab_next_row (chisq);
1957 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1958 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1959 tab_next_row (chisq);
1961 tab_offset (chisq, 0, -1);
1964 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1965 double[N_SYMMETRIC]);
1967 /* Display symmetric measures. */
1969 display_symmetric (void)
1971 static const char *categories[] =
1973 N_("Nominal by Nominal"),
1974 N_("Ordinal by Ordinal"),
1975 N_("Interval by Interval"),
1976 N_("Measure of Agreement"),
1979 static const char *stats[N_SYMMETRIC] =
1983 N_("Contingency Coefficient"),
1984 N_("Kendall's tau-b"),
1985 N_("Kendall's tau-c"),
1987 N_("Spearman Correlation"),
1992 static const int stats_categories[N_SYMMETRIC] =
1994 0, 0, 0, 1, 1, 1, 1, 2, 3,
1998 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2001 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2004 tab_offset (sym, nvar - 2, -1);
2006 for (i = 0; i < N_SYMMETRIC; i++)
2008 if (sym_v[i] == SYSMIS)
2011 if (stats_categories[i] != last_cat)
2013 last_cat = stats_categories[i];
2014 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2017 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2018 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2019 if (sym_ase[i] != SYSMIS)
2020 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2021 if (sym_t[i] != SYSMIS)
2022 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2023 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2027 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2028 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2031 tab_offset (sym, 0, -1);
2034 static int calc_risk (double[], double[], double[], union value *);
2036 /* Display risk estimate. */
2041 double risk_v[3], lower[3], upper[3];
2045 if (!calc_risk (risk_v, upper, lower, c))
2048 tab_offset (risk, nvar - 2, -1);
2050 for (i = 0; i < 3; i++)
2052 if (risk_v[i] == SYSMIS)
2058 if (x->vars[COL_VAR]->type == NUMERIC)
2059 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2060 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2062 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2063 x->vars[COL_VAR]->name,
2064 x->vars[COL_VAR]->width, c[0].s,
2065 x->vars[COL_VAR]->width, c[1].s);
2069 if (x->vars[ROW_VAR]->type == NUMERIC)
2070 sprintf (buf, _("For cohort %s = %g"),
2071 x->vars[ROW_VAR]->name, rows[i - 1].f);
2073 sprintf (buf, _("For cohort %s = %.*s"),
2074 x->vars[ROW_VAR]->name,
2075 x->vars[ROW_VAR]->width, rows[i - 1].s);
2079 tab_text (risk, 0, 0, TAB_LEFT, buf);
2080 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2081 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2082 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2083 tab_next_row (risk);
2086 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2087 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2088 tab_next_row (risk);
2090 tab_offset (risk, 0, -1);
2093 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2094 double[N_DIRECTIONAL]);
2096 /* Display directional measures. */
2098 display_directional (void)
2100 static const char *categories[] =
2102 N_("Nominal by Nominal"),
2103 N_("Ordinal by Ordinal"),
2104 N_("Nominal by Interval"),
2107 static const char *stats[] =
2110 N_("Goodman and Kruskal tau"),
2111 N_("Uncertainty Coefficient"),
2116 static const char *types[] =
2123 static const int stats_categories[N_DIRECTIONAL] =
2125 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2128 static const int stats_stats[N_DIRECTIONAL] =
2130 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2133 static const int stats_types[N_DIRECTIONAL] =
2135 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2138 static const int *stats_lookup[] =
2145 static const char **stats_names[] =
2157 double direct_v[N_DIRECTIONAL];
2158 double direct_ase[N_DIRECTIONAL];
2159 double direct_t[N_DIRECTIONAL];
2163 if (!calc_directional (direct_v, direct_ase, direct_t))
2166 tab_offset (direct, nvar - 2, -1);
2168 for (i = 0; i < N_DIRECTIONAL; i++)
2170 if (direct_v[i] == SYSMIS)
2176 for (j = 0; j < 3; j++)
2177 if (last[j] != stats_lookup[j][i])
2180 tab_hline (direct, TAL_1, j, 6, 0);
2185 int k = last[j] = stats_lookup[j][i];
2190 string = x->vars[0]->name;
2192 string = x->vars[1]->name;
2194 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2195 gettext (stats_names[j][k]), string);
2200 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2201 if (direct_ase[i] != SYSMIS)
2202 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2203 if (direct_t[i] != SYSMIS)
2204 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2205 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2206 tab_next_row (direct);
2209 tab_offset (direct, 0, -1);
2212 /* Statistical calculations. */
2214 /* Returns the value of the gamma (factorial) function for an integer
2217 gamma_int (double x)
2222 for (i = 2; i < x; i++)
2227 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2229 static inline double
2230 Pr (int a, int b, int c, int d)
2232 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2233 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2234 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2235 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2236 / gamma_int (a + b + c + d + 1.));
2239 /* Swap the contents of A and B. */
2241 swap (int *a, int *b)
2248 /* Calculate significance for Fisher's exact test as specified in
2249 _SPSS Statistical Algorithms_, Appendix 5. */
2251 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2255 if (min (c, d) < min (a, b))
2256 swap (&a, &c), swap (&b, &d);
2257 if (min (b, d) < min (a, c))
2258 swap (&a, &b), swap (&c, &d);
2262 swap (&a, &b), swap (&c, &d);
2264 swap (&a, &c), swap (&b, &d);
2268 for (x = 0; x <= a; x++)
2269 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2271 *fisher2 = *fisher1;
2272 for (x = 1; x <= b; x++)
2273 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2276 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2277 columns with values COLS and N_ROWS rows with values ROWS. Values
2278 in the matrix sum to W. */
2280 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2281 double *fisher1, double *fisher2)
2285 chisq[0] = chisq[1] = 0.;
2286 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2287 *fisher1 = *fisher2 = SYSMIS;
2289 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2291 if (ns_rows <= 1 || ns_cols <= 1)
2293 chisq[0] = chisq[1] = SYSMIS;
2297 for (r = 0; r < n_rows; r++)
2298 for (c = 0; c < n_cols; c++)
2300 const double expected = row_tot[r] * col_tot[c] / W;
2301 const double freq = mat[n_cols * r + c];
2302 const double residual = freq - expected;
2304 chisq[0] += residual * residual / expected;
2306 chisq[1] += freq * log (expected / freq);
2317 /* Calculate Yates and Fisher exact test. */
2318 if (ns_cols == 2 && ns_rows == 2)
2320 double f11, f12, f21, f22;
2326 for (i = j = 0; i < n_cols; i++)
2327 if (col_tot[i] != 0.)
2336 f11 = mat[nz_cols[0]];
2337 f12 = mat[nz_cols[1]];
2338 f21 = mat[nz_cols[0] + n_cols];
2339 f22 = mat[nz_cols[1] + n_cols];
2344 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2347 chisq[3] = (W * x * x
2348 / (f11 + f12) / (f21 + f22)
2349 / (f11 + f21) / (f12 + f22));
2357 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2358 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2361 /* Calculate Mantel-Haenszel. */
2362 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2364 double r, ase_0, ase_1;
2365 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2367 chisq[4] = (W - 1.) * r * r;
2372 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2373 ASE_1, and ase_0 into ASE_0. The row and column values must be
2374 passed in X and Y. */
2376 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2378 double SX, SY, S, T;
2380 double sum_XYf, sum_X2Y2f;
2381 double sum_Xr, sum_X2r;
2382 double sum_Yc, sum_Y2c;
2385 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2386 for (j = 0; j < n_cols; j++)
2388 double fij = mat[j + i * n_cols];
2389 double product = X[i] * Y[j];
2390 double temp = fij * product;
2392 sum_X2Y2f += temp * product;
2395 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2397 sum_Xr += X[i] * row_tot[i];
2398 sum_X2r += X[i] * X[i] * row_tot[i];
2402 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2404 sum_Yc += Y[i] * col_tot[i];
2405 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2409 S = sum_XYf - sum_Xr * sum_Yc / W;
2410 SX = sum_X2r - sum_Xr * sum_Xr / W;
2411 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2414 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2419 for (s = c = 0., i = 0; i < n_rows; i++)
2420 for (j = 0; j < n_cols; j++)
2422 double Xresid, Yresid;
2425 Xresid = X[i] - Xbar;
2426 Yresid = Y[j] - Ybar;
2427 temp = (T * Xresid * Yresid
2429 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2430 y = mat[j + i * n_cols] * temp * temp - c;
2435 *ase_1 = sqrt (s) / (T * T);
2439 static double somers_d_v[3];
2440 static double somers_d_ase[3];
2441 static double somers_d_t[3];
2443 /* Calculate symmetric statistics and their asymptotic standard
2444 errors. Returns 0 if none could be calculated. */
2446 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2447 double t[N_SYMMETRIC])
2449 int q = min (ns_rows, ns_cols);
2458 for (i = 0; i < N_SYMMETRIC; i++)
2459 v[i] = ase[i] = t[i] = SYSMIS;
2462 /* Phi, Cramer's V, contingency coefficient. */
2463 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2465 double Xp = 0.; /* Pearson chi-square. */
2470 for (r = 0; r < n_rows; r++)
2471 for (c = 0; c < n_cols; c++)
2473 const double expected = row_tot[r] * col_tot[c] / W;
2474 const double freq = mat[n_cols * r + c];
2475 const double residual = freq - expected;
2477 Xp += residual * residual / expected;
2481 if (cmd.a_statistics[CRS_ST_PHI])
2483 v[0] = sqrt (Xp / W);
2484 v[1] = sqrt (Xp / (W * (q - 1)));
2486 if (cmd.a_statistics[CRS_ST_CC])
2487 v[2] = sqrt (Xp / (Xp + W));
2490 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2491 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2496 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2503 for (r = 0; r < n_rows; r++)
2504 Dr -= row_tot[r] * row_tot[r];
2505 for (c = 0; c < n_cols; c++)
2506 Dc -= col_tot[c] * col_tot[c];
2512 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2513 for (c = 0; c < n_cols; c++)
2517 for (r = 0; r < n_rows; r++)
2518 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2528 for (i = 0; i < n_rows; i++)
2532 for (j = 1; j < n_cols; j++)
2533 Cij += col_tot[j] - cum[j + i * n_cols];
2536 for (j = 1; j < n_cols; j++)
2537 Dij += cum[j + (i - 1) * n_cols];
2541 double fij = mat[j + i * n_cols];
2547 assert (j < n_cols);
2549 Cij -= col_tot[j] - cum[j + i * n_cols];
2550 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2554 Cij += cum[j - 1 + (i - 1) * n_cols];
2555 Dij -= cum[j + (i - 1) * n_cols];
2561 if (cmd.a_statistics[CRS_ST_BTAU])
2562 v[3] = (P - Q) / sqrt (Dr * Dc);
2563 if (cmd.a_statistics[CRS_ST_CTAU])
2564 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2565 if (cmd.a_statistics[CRS_ST_GAMMA])
2566 v[5] = (P - Q) / (P + Q);
2568 /* ASE for tau-b, tau-c, gamma. Calculations could be
2569 eliminated here, at expense of memory. */
2574 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2575 for (i = 0; i < n_rows; i++)
2579 for (j = 1; j < n_cols; j++)
2580 Cij += col_tot[j] - cum[j + i * n_cols];
2583 for (j = 1; j < n_cols; j++)
2584 Dij += cum[j + (i - 1) * n_cols];
2588 double fij = mat[j + i * n_cols];
2590 if (cmd.a_statistics[CRS_ST_BTAU])
2592 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2593 + v[3] * (row_tot[i] * Dc
2594 + col_tot[j] * Dr));
2595 btau_cum += fij * temp * temp;
2599 const double temp = Cij - Dij;
2600 ctau_cum += fij * temp * temp;
2603 if (cmd.a_statistics[CRS_ST_GAMMA])
2605 const double temp = Q * Cij - P * Dij;
2606 gamma_cum += fij * temp * temp;
2609 if (cmd.a_statistics[CRS_ST_D])
2611 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2612 - (P - Q) * (W - row_tot[i]));
2613 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2614 - (Q - P) * (W - col_tot[j]));
2619 assert (j < n_cols);
2621 Cij -= col_tot[j] - cum[j + i * n_cols];
2622 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2626 Cij += cum[j - 1 + (i - 1) * n_cols];
2627 Dij -= cum[j + (i - 1) * n_cols];
2633 btau_var = ((btau_cum
2634 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2636 if (cmd.a_statistics[CRS_ST_BTAU])
2638 ase[3] = sqrt (btau_var);
2639 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2642 if (cmd.a_statistics[CRS_ST_CTAU])
2644 ase[4] = ((2 * q / ((q - 1) * W * W))
2645 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2646 t[4] = v[4] / ase[4];
2648 if (cmd.a_statistics[CRS_ST_GAMMA])
2650 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2651 t[5] = v[5] / (2. / (P + Q)
2652 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2654 if (cmd.a_statistics[CRS_ST_D])
2656 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2657 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2658 somers_d_t[0] = (somers_d_v[0]
2660 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2661 somers_d_v[1] = (P - Q) / Dc;
2662 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2663 somers_d_t[1] = (somers_d_v[1]
2665 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2666 somers_d_v[2] = (P - Q) / Dr;
2667 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2668 somers_d_t[2] = (somers_d_v[2]
2670 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2676 /* Spearman correlation, Pearson's r. */
2677 if (cmd.a_statistics[CRS_ST_CORR])
2679 double *R = local_alloc (sizeof *R * n_rows);
2680 double *C = local_alloc (sizeof *C * n_cols);
2683 double y, t, c = 0., s = 0.;
2688 R[i] = s + (row_tot[i] + 1.) / 2.;
2695 assert (i < n_rows);
2700 double y, t, c = 0., s = 0.;
2705 C[j] = s + (col_tot[j] + 1.) / 2;
2712 assert (j < n_cols);
2716 calc_r (R, C, &v[6], &t[6], &ase[6]);
2722 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2726 /* Cohen's kappa. */
2727 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2729 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2732 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2733 i < ns_rows; i++, j++)
2737 while (col_tot[j] == 0.)
2740 prod = row_tot[i] * col_tot[j];
2741 sum = row_tot[i] + col_tot[j];
2743 sum_fii += mat[j + i * n_cols];
2745 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2746 sum_riciri_ci += prod * sum;
2748 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2749 for (j = 0; j < ns_cols; j++)
2751 double sum = row_tot[i] + col_tot[j];
2752 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2755 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2757 ase[8] = sqrt ((W * W * sum_rici
2758 + sum_rici * sum_rici
2759 - W * sum_riciri_ci)
2760 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2762 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2763 / pow2 (W * W - sum_rici))
2764 + ((2. * (W - sum_fii)
2765 * (2. * sum_fii * sum_rici
2766 - W * sum_fiiri_ci))
2767 / cube (W * W - sum_rici))
2768 + (pow2 (W - sum_fii)
2769 * (W * sum_fijri_ci2 - 4.
2770 * sum_rici * sum_rici)
2771 / pow4 (W * W - sum_rici))));
2773 t[8] = v[8] / ase[8];
2780 /* Calculate risk estimate. */
2782 calc_risk (double *value, double *upper, double *lower, union value *c)
2784 double f11, f12, f21, f22;
2790 for (i = 0; i < 3; i++)
2791 value[i] = upper[i] = lower[i] = SYSMIS;
2794 if (ns_rows != 2 || ns_cols != 2)
2801 for (i = j = 0; i < n_cols; i++)
2802 if (col_tot[i] != 0.)
2811 f11 = mat[nz_cols[0]];
2812 f12 = mat[nz_cols[1]];
2813 f21 = mat[nz_cols[0] + n_cols];
2814 f22 = mat[nz_cols[1] + n_cols];
2816 c[0] = cols[nz_cols[0]];
2817 c[1] = cols[nz_cols[1]];
2820 value[0] = (f11 * f22) / (f12 * f21);
2821 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2822 lower[0] = value[0] * exp (-1.960 * v);
2823 upper[0] = value[0] * exp (1.960 * v);
2825 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2826 v = sqrt ((f12 / (f11 * (f11 + f12)))
2827 + (f22 / (f21 * (f21 + f22))));
2828 lower[1] = value[1] * exp (-1.960 * v);
2829 upper[1] = value[1] * exp (1.960 * v);
2831 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2832 v = sqrt ((f11 / (f12 * (f11 + f12)))
2833 + (f21 / (f22 * (f21 + f22))));
2834 lower[2] = value[2] * exp (-1.960 * v);
2835 upper[2] = value[2] * exp (1.960 * v);
2840 /* Calculate directional measures. */
2842 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2843 double t[N_DIRECTIONAL])
2848 for (i = 0; i < N_DIRECTIONAL; i++)
2849 v[i] = ase[i] = t[i] = SYSMIS;
2853 if (cmd.a_statistics[CRS_ST_LAMBDA])
2855 double *fim = xmalloc (sizeof *fim * n_rows);
2856 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2857 double *fmj = xmalloc (sizeof *fmj * n_cols);
2858 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2859 double sum_fim, sum_fmj;
2861 int rm_index, cm_index;
2864 /* Find maximum for each row and their sum. */
2865 for (sum_fim = 0., i = 0; i < n_rows; i++)
2867 double max = mat[i * n_cols];
2870 for (j = 1; j < n_cols; j++)
2871 if (mat[j + i * n_cols] > max)
2873 max = mat[j + i * n_cols];
2877 sum_fim += fim[i] = max;
2878 fim_index[i] = index;
2881 /* Find maximum for each column. */
2882 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2884 double max = mat[j];
2887 for (i = 1; i < n_rows; i++)
2888 if (mat[j + i * n_cols] > max)
2890 max = mat[j + i * n_cols];
2894 sum_fmj += fmj[j] = max;
2895 fmj_index[j] = index;
2898 /* Find maximum row total. */
2901 for (i = 1; i < n_rows; i++)
2902 if (row_tot[i] > rm)
2908 /* Find maximum column total. */
2911 for (j = 1; j < n_cols; j++)
2912 if (col_tot[j] > cm)
2918 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2919 v[1] = (sum_fmj - rm) / (W - rm);
2920 v[2] = (sum_fim - cm) / (W - cm);
2922 /* ASE1 for Y given X. */
2926 for (accum = 0., i = 0; i < n_rows; i++)
2927 for (j = 0; j < n_cols; j++)
2929 const int deltaj = j == cm_index;
2930 accum += (mat[j + i * n_cols]
2931 * pow2 ((j == fim_index[i])
2936 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2939 /* ASE0 for Y given X. */
2943 for (accum = 0., i = 0; i < n_rows; i++)
2944 if (cm_index != fim_index[i])
2945 accum += (mat[i * n_cols + fim_index[i]]
2946 + mat[i * n_cols + cm_index]);
2947 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2950 /* ASE1 for X given Y. */
2954 for (accum = 0., i = 0; i < n_rows; i++)
2955 for (j = 0; j < n_cols; j++)
2957 const int deltaj = i == rm_index;
2958 accum += (mat[j + i * n_cols]
2959 * pow2 ((i == fmj_index[j])
2964 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2967 /* ASE0 for X given Y. */
2971 for (accum = 0., j = 0; j < n_cols; j++)
2972 if (rm_index != fmj_index[j])
2973 accum += (mat[j + n_cols * fmj_index[j]]
2974 + mat[j + n_cols * rm_index]);
2975 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2978 /* Symmetric ASE0 and ASE1. */
2983 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2984 for (j = 0; j < n_cols; j++)
2986 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2987 int temp1 = (i == rm_index) + (j == cm_index);
2988 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
2989 accum1 += (mat[j + i * n_cols]
2990 * pow2 (temp0 + (v[0] - 1.) * temp1));
2992 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
2993 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
2994 / (2. * W - rm - cm));
3003 double sum_fij2_ri, sum_fij2_ci;
3004 double sum_ri2, sum_cj2;
3006 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3007 for (j = 0; j < n_cols; j++)
3009 double temp = pow2 (mat[j + i * n_cols]);
3010 sum_fij2_ri += temp / row_tot[i];
3011 sum_fij2_ci += temp / col_tot[j];
3014 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3015 sum_ri2 += row_tot[i] * row_tot[i];
3017 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3018 sum_cj2 += col_tot[j] * col_tot[j];
3020 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3021 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3025 if (cmd.a_statistics[CRS_ST_UC])
3027 double UX, UY, UXY, P;
3028 double ase1_yx, ase1_xy, ase1_sym;
3031 for (UX = 0., i = 0; i < n_rows; i++)
3032 if (row_tot[i] > 0.)
3033 UX -= row_tot[i] / W * log (row_tot[i] / W);
3035 for (UY = 0., j = 0; j < n_cols; j++)
3036 if (col_tot[j] > 0.)
3037 UY -= col_tot[j] / W * log (col_tot[j] / W);
3039 for (UXY = P = 0., i = 0; i < n_rows; i++)
3040 for (j = 0; j < n_cols; j++)
3042 double entry = mat[j + i * n_cols];
3047 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3048 UXY -= entry / W * log (entry / W);
3051 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3052 for (j = 0; j < n_cols; j++)
3054 double entry = mat[j + i * n_cols];
3059 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3060 + (UX - UXY) * log (col_tot[j] / W));
3061 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3062 + (UY - UXY) * log (row_tot[i] / W));
3063 ase1_sym += entry * pow2 ((UXY
3064 * log (row_tot[i] * col_tot[j] / (W * W)))
3065 - (UX + UY) * log (entry / W));
3068 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3069 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3070 t[5] = v[5] / ((2. / (W * (UX + UY)))
3071 * sqrt (P - pow2 (UX + UY - UXY) / W));
3073 v[6] = (UX + UY - UXY) / UX;
3074 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3075 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3077 v[7] = (UX + UY - UXY) / UY;
3078 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3079 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3083 if (cmd.a_statistics[CRS_ST_D])
3088 calc_symmetric (NULL, NULL, NULL);
3089 for (i = 0; i < 3; i++)
3091 v[8 + i] = somers_d_v[i];
3092 ase[8 + i] = somers_d_ase[i];
3093 t[8 + i] = somers_d_t[i];
3098 if (cmd.a_statistics[CRS_ST_ETA])
3101 double sum_Xr, sum_X2r;
3105 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3107 sum_Xr += rows[i].f * row_tot[i];
3108 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3110 SX = sum_X2r - sum_Xr * sum_Xr / W;
3112 for (SXW = 0., j = 0; j < n_cols; j++)
3116 for (cum = 0., i = 0; i < n_rows; i++)
3118 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3119 cum += rows[i].f * mat[j + i * n_cols];
3122 SXW -= cum * cum / col_tot[j];
3124 v[11] = sqrt (1. - SXW / SX);
3128 double sum_Yc, sum_Y2c;
3132 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3134 sum_Yc += cols[i].f * col_tot[i];
3135 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3137 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3139 for (SYW = 0., i = 0; i < n_rows; i++)
3143 for (cum = 0., j = 0; j < n_cols; j++)
3145 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3146 cum += cols[j].f * mat[j + i * n_cols];
3149 SYW -= cum * cum / row_tot[i];
3151 v[12] = sqrt (1. - SYW / SY);
3158 /* A wrapper around data_out() that limits string output to short
3159 string width and null terminates the result. */
3161 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3163 struct fmt_spec fmt_subst;
3165 /* Limit to short string width. */
3166 if (formats[fp->type].cat & FCAT_STRING)
3170 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3171 if (fmt_subst.type == FMT_A)
3172 fmt_subst.w = min (8, fmt_subst.w);
3174 fmt_subst.w = min (16, fmt_subst.w);
3180 data_out (s, fp, v);
3182 /* Null terminate. */