1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
41 #include "dictionary.h"
52 #include "value-labels.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) msgid
62 #include "debug-print.h"
68 +missing=miss:!table/include/report;
69 +write[wr_]=none,cells,all;
70 +format=fmt:!labels/nolabels/novallabs,
73 tabl:!tables/notables,
76 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
78 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
84 /* Number of chi-square statistics. */
87 /* Number of symmetric statistics. */
90 /* Number of directional statistics. */
91 #define N_DIRECTIONAL 13
93 /* A single table entry for general mode. */
96 int table; /* Flattened table number. */
99 double freq; /* Frequency count. */
100 double *data; /* Crosstabulation table for integer mode. */
103 union value values[1]; /* Values. */
106 /* A crosstabulation. */
109 int nvar; /* Number of variables. */
110 double missing; /* Missing cases count. */
111 int ofs; /* Integer mode: Offset into sorted_tab[]. */
112 struct variable *vars[2]; /* At least two variables; sorted by
113 larger indices first. */
116 /* Integer mode variable info. */
119 int min; /* Minimum value. */
120 int max; /* Maximum value + 1. */
121 int count; /* max - min. */
124 static inline struct var_range *
125 get_var_range (struct variable *v)
128 assert (v->aux != NULL);
132 /* Indexes into crosstab.v. */
139 /* General mode crosstabulation table. */
140 static struct hsh_table *gen_tab; /* Hash table. */
141 static int n_sorted_tab; /* Number of entries in sorted_tab. */
142 static struct table_entry **sorted_tab; /* Sorted table. */
144 /* Variables specifies on VARIABLES. */
145 static struct variable **variables;
146 static size_t variables_cnt;
149 static struct crosstab **xtab;
152 /* Integer or general mode? */
161 static int num_cells; /* Number of cells requested. */
162 static int cells[8]; /* Cells requested. */
165 static int write; /* One of WR_* that specifies the WRITE style. */
167 /* Command parsing info. */
168 static struct cmd_crosstabs cmd;
171 static struct pool *pl_tc; /* For table cells. */
172 static struct pool *pl_col; /* For column data. */
174 static int internal_cmd_crosstabs (void);
175 static void precalc (void *);
176 static int calc_general (struct ccase *, void *);
177 static int calc_integer (struct ccase *, void *);
178 static void postcalc (void *);
179 static void submit (struct tab_table *);
181 static void format_short (char *s, const struct fmt_spec *fp,
182 const union value *v);
184 /* Parse and execute CROSSTABS, then clean up. */
188 int result = internal_cmd_crosstabs ();
191 pool_destroy (pl_tc);
192 pool_destroy (pl_col);
197 /* Parses and executes the CROSSTABS procedure. */
199 internal_cmd_crosstabs (void)
207 pl_tc = pool_create ();
208 pl_col = pool_create ();
210 if (!parse_crosstabs (&cmd))
213 mode = variables ? INTEGER : GENERAL;
218 cmd.a_cells[CRS_CL_COUNT] = 1;
224 for (i = 0; i < CRS_CL_count; i++)
229 cmd.a_cells[CRS_CL_COUNT] = 1;
230 cmd.a_cells[CRS_CL_ROW] = 1;
231 cmd.a_cells[CRS_CL_COLUMN] = 1;
232 cmd.a_cells[CRS_CL_TOTAL] = 1;
234 if (cmd.a_cells[CRS_CL_ALL])
236 for (i = 0; i < CRS_CL_count; i++)
238 cmd.a_cells[CRS_CL_ALL] = 0;
240 cmd.a_cells[CRS_CL_NONE] = 0;
242 for (num_cells = i = 0; i < CRS_CL_count; i++)
244 cells[num_cells++] = i;
247 if (cmd.sbc_statistics)
252 for (i = 0; i < CRS_ST_count; i++)
253 if (cmd.a_statistics[i])
256 cmd.a_statistics[CRS_ST_CHISQ] = 1;
257 if (cmd.a_statistics[CRS_ST_ALL])
258 for (i = 0; i < CRS_ST_count; i++)
259 cmd.a_statistics[i] = 1;
263 if (cmd.miss == CRS_REPORT && mode == GENERAL)
265 msg (SE, _("Missing mode REPORT not allowed in general mode. "
266 "Assuming MISSING=TABLE."));
267 cmd.miss = CRS_TABLE;
271 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
272 cmd.a_write[CRS_WR_ALL] = 0;
273 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
275 msg (SE, _("Write mode ALL not allowed in general mode. "
276 "Assuming WRITE=CELLS."));
277 cmd.a_write[CRS_WR_CELLS] = 1;
280 && (cmd.a_write[CRS_WR_NONE]
281 + cmd.a_write[CRS_WR_ALL]
282 + cmd.a_write[CRS_WR_CELLS] == 0))
283 cmd.a_write[CRS_WR_CELLS] = 1;
284 if (cmd.a_write[CRS_WR_CELLS])
285 write = CRS_WR_CELLS;
286 else if (cmd.a_write[CRS_WR_ALL])
291 procedure_with_splits (precalc,
292 mode == GENERAL ? calc_general : calc_integer,
298 /* Parses the TABLES subcommand. */
300 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
302 struct var_set *var_set;
304 struct variable ***by = NULL;
309 /* Ensure that this is a TABLES subcommand. */
310 if (!lex_match_id ("TABLES")
311 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
316 if (variables != NULL)
317 var_set = var_set_create_from_array (variables, variables_cnt);
319 var_set = var_set_create_from_dict (default_dict);
320 assert (var_set != NULL);
324 by = xrealloc (by, sizeof *by * (n_by + 1));
325 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
326 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
327 PV_NO_DUPLICATE | PV_NO_SCRATCH))
332 if (!lex_match (T_BY))
336 lex_error (_("expecting BY"));
345 int *by_iter = xcalloc (n_by, sizeof *by_iter);
348 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
349 for (i = 0; i < nx; i++)
353 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
360 for (i = 0; i < n_by; i++)
361 x->vars[i] = by[i][by_iter[i]];
367 for (i = n_by - 1; i >= 0; i--)
369 if (++by_iter[i] < by_nvar[i])
382 /* All return paths lead here. */
386 for (i = 0; i < n_by; i++)
392 var_set_destroy (var_set);
397 /* Parses the VARIABLES subcommand. */
399 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
403 msg (SE, _("VARIABLES must be specified before TABLES."));
411 int orig_nv = variables_cnt;
416 if (!parse_variables (default_dict, &variables, &variables_cnt,
417 (PV_APPEND | PV_NUMERIC
418 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
423 lex_error ("expecting `('");
428 if (!lex_force_int ())
430 min = lex_integer ();
435 if (!lex_force_int ())
437 max = lex_integer ();
440 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
448 lex_error ("expecting `)'");
453 for (i = orig_nv; i < variables_cnt; i++)
455 struct var_range *vr = xmalloc (sizeof *vr);
458 vr->count = max - min + 1;
459 var_attach_aux (variables[i], vr, var_dtor_free);
474 /* Data file processing. */
476 static int compare_table_entry (const void *, const void *, void *);
477 static unsigned hash_table_entry (const void *, void *);
479 /* Set up the crosstabulation tables for processing. */
481 precalc (void *aux UNUSED)
485 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
495 for (i = 0; i < nxtab; i++)
497 struct crosstab *x = xtab[i];
502 x->ofs = n_sorted_tab;
504 for (j = 2; j < x->nvar; j++)
505 count *= get_var_range (x->vars[j - 2])->count;
507 sorted_tab = xrealloc (sorted_tab,
508 sizeof *sorted_tab * (n_sorted_tab + count));
509 v = local_alloc (sizeof *v * x->nvar);
510 for (j = 2; j < x->nvar; j++)
511 v[j] = get_var_range (x->vars[j])->min;
512 for (j = 0; j < count; j++)
514 struct table_entry *te;
517 te = sorted_tab[n_sorted_tab++]
518 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
522 int row_cnt = get_var_range (x->vars[0])->count;
523 int col_cnt = get_var_range (x->vars[1])->count;
524 const int mat_size = row_cnt * col_cnt;
527 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
528 for (m = 0; m < mat_size; m++)
532 for (k = 2; k < x->nvar; k++)
533 te->values[k].f = v[k];
534 for (k = 2; k < x->nvar; k++)
536 struct var_range *vr = get_var_range (x->vars[k]);
537 if (++v[k] >= vr->max)
546 sorted_tab = xrealloc (sorted_tab,
547 sizeof *sorted_tab * (n_sorted_tab + 1));
548 sorted_tab[n_sorted_tab] = NULL;
552 /* Form crosstabulations for general mode. */
554 calc_general (struct ccase *c, void *aux UNUSED)
559 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
561 /* Flattened current table index. */
564 for (t = 0; t < nxtab; t++)
566 struct crosstab *x = xtab[t];
567 const size_t entry_size = (sizeof (struct table_entry)
568 + sizeof (union value) * (x->nvar - 1));
569 struct table_entry *te = local_alloc (entry_size);
571 /* Construct table entry for the current record and table. */
577 for (j = 0; j < x->nvar; j++)
579 if ((cmd.miss == CRS_TABLE
580 && is_missing (case_data (c, x->vars[j]->fv), x->vars[j]))
581 || (cmd.miss == CRS_INCLUDE
582 && is_system_missing (case_data (c, x->vars[j]->fv),
585 x->missing += weight;
589 if (x->vars[j]->type == NUMERIC)
590 te->values[j].f = case_num (c, x->vars[j]->fv);
593 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
596 /* Necessary in order to simplify comparisons. */
597 memset (&te->values[j].s[x->vars[j]->width], 0,
598 sizeof (union value) - x->vars[j]->width);
603 /* Add record to hash table. */
605 struct table_entry **tepp
606 = (struct table_entry **) hsh_probe (gen_tab, te);
609 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
612 memcpy (tep, te, entry_size);
617 (*tepp)->u.freq += weight;
628 calc_integer (struct ccase *c, void *aux UNUSED)
633 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
635 /* Flattened current table index. */
638 for (t = 0; t < nxtab; t++)
640 struct crosstab *x = xtab[t];
645 for (i = 0; i < x->nvar; i++)
647 struct variable *const v = x->vars[i];
648 struct var_range *vr = get_var_range (v);
649 double value = case_num (c, v->fv);
651 /* Note that the first test also rules out SYSMIS. */
652 if ((value < vr->min || value >= vr->max)
653 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
655 x->missing += weight;
661 ofs += fact * ((int) value - vr->min);
667 struct variable *row_var = x->vars[ROW_VAR];
668 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
670 struct variable *col_var = x->vars[COL_VAR];
671 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
673 const int col_dim = get_var_range (col_var)->count;
675 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
684 /* Compare the table_entry's at A and B and return a strcmp()-type
687 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
689 const struct table_entry *a = a_;
690 const struct table_entry *b = b_;
692 if (a->table > b->table)
694 else if (a->table < b->table)
698 const struct crosstab *x = xtab[a->table];
701 for (i = x->nvar - 1; i >= 0; i--)
702 if (x->vars[i]->type == NUMERIC)
704 const double diffnum = a->values[i].f - b->values[i].f;
707 else if (diffnum > 0)
712 assert (x->vars[i]->type == ALPHA);
714 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
725 /* Calculate a hash value from table_entry A. */
727 hash_table_entry (const void *a_, void *foo UNUSED)
729 const struct table_entry *a = a_;
734 for (i = 0; i < xtab[a->table]->nvar; i++)
735 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
740 /* Post-data reading calculations. */
742 static struct table_entry **find_pivot_extent (struct table_entry **,
743 int *cnt, int pivot);
744 static void enum_var_values (struct table_entry **entries, int entry_cnt,
746 union value **values, int *value_cnt);
747 static void output_pivot_table (struct table_entry **, struct table_entry **,
748 double **, double **, double **,
749 int *, int *, int *);
750 static void make_summary_table (void);
753 postcalc (void *aux UNUSED)
757 n_sorted_tab = hsh_count (gen_tab);
758 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
761 make_summary_table ();
763 /* Identify all the individual crosstabulation tables, and deal with
766 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
767 int pc = n_sorted_tab; /* Pivot count. */
769 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
770 int maxrows = 0, maxcols = 0, maxcells = 0;
774 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
778 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
779 &maxrows, &maxcols, &maxcells);
788 hsh_destroy (gen_tab);
791 static void insert_summary (struct tab_table *, int tab_index, double valid);
793 /* Output a table summarizing the cases processed. */
795 make_summary_table (void)
797 struct tab_table *summary;
799 struct table_entry **pb = sorted_tab, **pe;
800 int pc = n_sorted_tab;
803 summary = tab_create (7, 3 + nxtab, 1);
804 tab_title (summary, 0, _("Summary."));
805 tab_headers (summary, 1, 0, 3, 0);
806 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
807 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
808 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
809 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
810 tab_hline (summary, TAL_1, 1, 6, 1);
811 tab_hline (summary, TAL_1, 1, 6, 2);
812 tab_vline (summary, TAL_1, 3, 1, 1);
813 tab_vline (summary, TAL_1, 5, 1, 1);
817 for (i = 0; i < 3; i++)
819 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
820 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
823 tab_offset (summary, 0, 3);
829 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
833 while (cur_tab < (*pb)->table)
834 insert_summary (summary, cur_tab++, 0.);
837 for (valid = 0.; pb < pe; pb++)
838 valid += (*pb)->u.freq;
841 const struct crosstab *const x = xtab[(*pb)->table];
842 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
843 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
844 const int count = n_cols * n_rows;
846 for (valid = 0.; pb < pe; pb++)
848 const double *data = (*pb)->u.data;
851 for (i = 0; i < count; i++)
855 insert_summary (summary, cur_tab++, valid);
860 while (cur_tab < nxtab)
861 insert_summary (summary, cur_tab++, 0.);
866 /* Inserts a line into T describing the crosstabulation at index
867 TAB_INDEX, which has VALID valid observations. */
869 insert_summary (struct tab_table *t, int tab_index, double valid)
871 struct crosstab *x = xtab[tab_index];
873 tab_hline (t, TAL_1, 0, 6, 0);
875 /* Crosstabulation name. */
877 char *buf = local_alloc (128 * x->nvar);
881 for (i = 0; i < x->nvar; i++)
884 cp = stpcpy (cp, " * ");
887 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
889 tab_text (t, 0, 0, TAB_LEFT, buf);
894 /* Counts and percentages. */
904 for (i = 0; i < 3; i++)
906 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
907 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
918 static struct tab_table *table; /* Crosstabulation table. */
919 static struct tab_table *chisq; /* Chi-square table. */
920 static struct tab_table *sym; /* Symmetric measures table. */
921 static struct tab_table *risk; /* Risk estimate table. */
922 static struct tab_table *direct; /* Directional measures table. */
925 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
927 /* Column values, number of columns. */
928 static union value *cols;
931 /* Row values, number of rows. */
932 static union value *rows;
935 /* Number of statistically interesting columns/rows (columns/rows with
937 static int ns_cols, ns_rows;
939 /* Crosstabulation. */
940 static struct crosstab *x;
942 /* Number of variables from the crosstabulation to consider. This is
943 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
946 /* Matrix contents. */
947 static double *mat; /* Matrix proper. */
948 static double *row_tot; /* Row totals. */
949 static double *col_tot; /* Column totals. */
950 static double W; /* Grand total. */
952 static void display_dimensions (struct tab_table *, int first_difference,
953 struct table_entry *);
954 static void display_crosstabulation (void);
955 static void display_chisq (void);
956 static void display_symmetric (void);
957 static void display_risk (void);
958 static void display_directional (void);
959 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
960 static void table_value_missing (struct tab_table *table, int c, int r,
961 unsigned char opt, const union value *v,
962 const struct variable *var);
963 static void delete_missing (void);
965 /* Output pivot table beginning at PB and continuing until PE,
966 exclusive. For efficiency, *MATP is a pointer to a matrix that can
967 hold *MAXROWS entries. */
969 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
970 double **matp, double **row_totp, double **col_totp,
971 int *maxrows, int *maxcols, int *maxcells)
974 struct table_entry **tb = pb, **te; /* Table begin, table end. */
975 int tc = pe - pb; /* Table count. */
977 /* Table entry for header comparison. */
978 struct table_entry *cmp = NULL;
980 x = xtab[(*pb)->table];
981 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
983 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
985 /* Crosstabulation table initialization. */
988 table = tab_create (nvar + n_cols,
989 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
990 tab_headers (table, nvar - 1, 0, 2, 0);
992 /* First header line. */
993 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
994 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
996 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
998 /* Second header line. */
1002 for (i = 2; i < nvar; i++)
1003 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1004 TAB_RIGHT | TAT_TITLE,
1006 ? x->vars[i]->label : x->vars[i]->name));
1007 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1008 x->vars[ROW_VAR]->name);
1009 for (i = 0; i < n_cols; i++)
1010 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1012 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1015 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1016 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1020 char *title = local_alloc (x->nvar * 64 + 128);
1024 if (cmd.pivot == CRS_PIVOT)
1025 for (i = 0; i < nvar; i++)
1028 cp = stpcpy (cp, " by ");
1029 cp = stpcpy (cp, x->vars[i]->name);
1033 cp = spprintf (cp, "%s by %s for",
1034 x->vars[0]->name, x->vars[1]->name);
1035 for (i = 2; i < nvar; i++)
1037 char buf[64], *bufp;
1042 cp = stpcpy (cp, x->vars[i]->name);
1044 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1045 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1047 cp = stpcpy (cp, bufp);
1051 cp = stpcpy (cp, " [");
1052 for (i = 0; i < num_cells; i++)
1060 static const struct tuple cell_names[] =
1062 {CRS_CL_COUNT, N_("count")},
1063 {CRS_CL_ROW, N_("row %")},
1064 {CRS_CL_COLUMN, N_("column %")},
1065 {CRS_CL_TOTAL, N_("total %")},
1066 {CRS_CL_EXPECTED, N_("expected")},
1067 {CRS_CL_RESIDUAL, N_("residual")},
1068 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1069 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1073 const struct tuple *t;
1075 for (t = cell_names; t->value != cells[i]; t++)
1076 assert (t->value != -1);
1078 cp = stpcpy (cp, ", ");
1079 cp = stpcpy (cp, gettext (t->name));
1083 tab_title (table, 0, title);
1087 tab_offset (table, 0, 2);
1092 /* Chi-square table initialization. */
1093 if (cmd.a_statistics[CRS_ST_CHISQ])
1095 chisq = tab_create (6 + (nvar - 2),
1096 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1097 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1099 tab_title (chisq, 0, "Chi-square tests.");
1101 tab_offset (chisq, nvar - 2, 0);
1102 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1103 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1104 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1105 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1106 _("Asymp. Sig. (2-sided)"));
1107 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1108 _("Exact. Sig. (2-sided)"));
1109 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1110 _("Exact. Sig. (1-sided)"));
1112 tab_offset (chisq, 0, 1);
1117 /* Symmetric measures. */
1118 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1119 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1120 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1121 || cmd.a_statistics[CRS_ST_KAPPA])
1123 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1124 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1125 tab_title (sym, 0, "Symmetric measures.");
1127 tab_offset (sym, nvar - 2, 0);
1128 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1129 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1130 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1131 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1132 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1133 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1134 tab_offset (sym, 0, 1);
1139 /* Risk estimate. */
1140 if (cmd.a_statistics[CRS_ST_RISK])
1142 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1143 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1144 tab_title (risk, 0, "Risk estimate.");
1146 tab_offset (risk, nvar - 2, 0);
1147 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1148 _(" 95%% Confidence Interval"));
1149 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1150 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1151 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1152 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1153 tab_hline (risk, TAL_1, 2, 3, 1);
1154 tab_vline (risk, TAL_1, 2, 0, 1);
1155 tab_offset (risk, 0, 2);
1160 /* Directional measures. */
1161 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1162 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1164 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1165 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1166 tab_title (direct, 0, "Directional measures.");
1168 tab_offset (direct, nvar - 2, 0);
1169 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1170 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1171 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1172 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1173 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1174 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1175 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1176 tab_offset (direct, 0, 1);
1183 /* Find pivot subtable if applicable. */
1184 te = find_pivot_extent (tb, &tc, 0);
1188 /* Find all the row variable values. */
1189 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1191 /* Allocate memory space for the column and row totals. */
1192 if (n_rows > *maxrows)
1194 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1195 row_tot = *row_totp;
1198 if (n_cols > *maxcols)
1200 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1201 col_tot = *col_totp;
1205 /* Allocate table space for the matrix. */
1206 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1207 tab_realloc (table, -1,
1208 max (tab_nr (table) + (n_rows + 1) * num_cells,
1209 tab_nr (table) * (pe - pb) / (te - tb)));
1211 if (mode == GENERAL)
1213 /* Allocate memory space for the matrix. */
1214 if (n_cols * n_rows > *maxcells)
1216 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1217 *maxcells = n_cols * n_rows;
1222 /* Build the matrix and calculate column totals. */
1224 union value *cur_col = cols;
1225 union value *cur_row = rows;
1227 double *cp = col_tot;
1228 struct table_entry **p;
1231 for (p = &tb[0]; p < te; p++)
1233 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1237 for (; cur_row < &rows[n_rows]; cur_row++)
1243 mp = &mat[cur_col - cols];
1246 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1253 *cp += *mp = (*p)->u.freq;
1258 /* Zero out the rest of the matrix. */
1259 for (; cur_row < &rows[n_rows]; cur_row++)
1265 if (cur_col < &cols[n_cols])
1267 const int rem_cols = n_cols - (cur_col - cols);
1270 for (c = 0; c < rem_cols; c++)
1272 mp = &mat[cur_col - cols];
1273 for (r = 0; r < n_rows; r++)
1275 for (c = 0; c < rem_cols; c++)
1277 mp += n_cols - rem_cols;
1285 double *tp = col_tot;
1287 assert (mode == INTEGER);
1288 mat = (*tb)->u.data;
1291 /* Calculate column totals. */
1292 for (c = 0; c < n_cols; c++)
1295 double *cp = &mat[c];
1297 for (r = 0; r < n_rows; r++)
1298 cum += cp[r * n_cols];
1306 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1307 ns_cols += *cp != 0.;
1310 /* Calculate row totals. */
1313 double *rp = row_tot;
1316 for (ns_rows = 0, r = n_rows; r--; )
1319 for (c = n_cols; c--; )
1327 /* Calculate grand total. */
1333 if (n_rows < n_cols)
1334 tp = row_tot, n = n_rows;
1336 tp = col_tot, n = n_cols;
1342 /* Find the first variable that differs from the last subtable,
1343 then display the values of the dimensioning variables for
1344 each table that needs it. */
1346 int first_difference = nvar - 1;
1349 for (; ; first_difference--)
1351 assert (first_difference >= 2);
1352 if (memcmp (&cmp->values[first_difference],
1353 &(*tb)->values[first_difference],
1354 sizeof *cmp->values))
1360 display_dimensions (table, first_difference, *tb);
1362 display_dimensions (chisq, first_difference, *tb);
1364 display_dimensions (sym, first_difference, *tb);
1366 display_dimensions (risk, first_difference, *tb);
1368 display_dimensions (direct, first_difference, *tb);
1372 display_crosstabulation ();
1373 if (cmd.miss == CRS_REPORT)
1378 display_symmetric ();
1382 display_directional ();
1393 tab_resize (chisq, 4 + (nvar - 2), -1);
1404 /* Delete missing rows and columns for statistical analysis when
1407 delete_missing (void)
1412 for (r = 0; r < n_rows; r++)
1413 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1417 for (c = 0; c < n_cols; c++)
1418 mat[c + r * n_cols] = 0.;
1426 for (c = 0; c < n_cols; c++)
1427 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1431 for (r = 0; r < n_rows; r++)
1432 mat[c + r * n_cols] = 0.;
1438 /* Prepare table T for submission, and submit it. */
1440 submit (struct tab_table *t)
1447 tab_resize (t, -1, 0);
1448 if (tab_nr (t) == tab_t (t))
1453 tab_offset (t, 0, 0);
1455 for (i = 2; i < nvar; i++)
1456 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1457 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1458 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1459 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1461 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1463 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1464 tab_dim (t, crosstabs_dim);
1468 /* Sets the widths of all the columns and heights of all the rows in
1469 table T for driver D. */
1471 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1475 /* Width of a numerical column. */
1476 int c = outp_string_width (d, "0.000000");
1477 if (cmd.miss == CRS_REPORT)
1478 c += outp_string_width (d, "M");
1480 /* Set width for header columns. */
1483 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1485 if (w < d->prop_em_width * 8)
1486 w = d->prop_em_width * 8;
1488 if (w > d->prop_em_width * 15)
1489 w = d->prop_em_width * 15;
1491 for (i = 0; i < t->l; i++)
1495 for (i = t->l; i < t->nc; i++)
1498 for (i = 0; i < t->nr; i++)
1499 t->h[i] = tab_natural_height (t, d, i);
1502 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1503 int *cnt, int pivot);
1504 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1505 int *cnt, int pivot);
1507 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1509 static struct table_entry **
1510 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1512 return (mode == GENERAL
1513 ? find_pivot_extent_general (tp, cnt, pivot)
1514 : find_pivot_extent_integer (tp, cnt, pivot));
1517 /* Find the extent of a region in TP that contains one table. If
1518 PIVOT != 0 that means a set of table entries with identical table
1519 number; otherwise they also have to have the same values for every
1520 dimension after the row and column dimensions. The table that is
1521 searched starts at TP and has length CNT. Returns the first entry
1522 after the last one in the table; sets *CNT to the number of
1523 remaining values. If there are no entries in TP at all, returns
1524 NULL. A yucky interface, admittedly, but it works. */
1525 static struct table_entry **
1526 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1528 struct table_entry *fp = *tp;
1533 x = xtab[(*tp)->table];
1541 if ((*tp)->table != fp->table)
1546 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1553 /* Integer mode correspondent to find_pivot_extent_general(). This
1554 could be optimized somewhat, but I just don't give a crap about
1555 CROSSTABS performance in integer mode, which is just a
1556 CROSSTABS wart as far as I'm concerned.
1558 That said, feel free to send optimization patches to me. */
1559 static struct table_entry **
1560 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1562 struct table_entry *fp = *tp;
1567 x = xtab[(*tp)->table];
1575 if ((*tp)->table != fp->table)
1580 if (memcmp (&(*tp)->values[2], &fp->values[2],
1581 sizeof (union value) * (x->nvar - 2)))
1588 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1589 result. WIDTH_ points to an int which is either 0 for a
1590 numeric value or a string width for a string value. */
1592 compare_value (const void *a_, const void *b_, void *width_)
1594 const union value *a = a_;
1595 const union value *b = b_;
1596 const int *pwidth = width_;
1597 const int width = *pwidth;
1600 return (a->f < b->f) ? -1 : (a->f > b->f);
1602 return strncmp (a->s, b->s, width);
1605 /* Given an array of ENTRY_CNT table_entry structures starting at
1606 ENTRIES, creates a sorted list of the values that the variable
1607 with index VAR_IDX takes on. The values are returned as a
1608 malloc()'darray stored in *VALUES, with the number of values
1609 stored in *VALUE_CNT.
1612 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1613 union value **values, int *value_cnt)
1615 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1617 if (mode == GENERAL)
1619 int width = v->width;
1622 *values = xmalloc (sizeof **values * entry_cnt);
1623 for (i = 0; i < entry_cnt; i++)
1624 (*values)[i] = entries[i]->values[var_idx];
1625 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1626 compare_value, &width);
1630 struct var_range *vr = get_var_range (v);
1633 assert (mode == INTEGER);
1634 *values = xmalloc (sizeof **values * vr->count);
1635 for (i = 0; i < vr->count; i++)
1636 (*values)[i].f = i + vr->min;
1637 *value_cnt = vr->count;
1641 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1642 from V, displayed with print format spec from variable VAR. When
1643 in REPORT missing-value mode, missing values have an M appended. */
1645 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1646 const union value *v, const struct variable *var)
1648 struct fixed_string s;
1650 const char *label = val_labs_find (var->val_labs, *v);
1653 tab_text (table, c, r, TAB_LEFT, label);
1657 s.string = tab_alloc (table, var->print.w);
1658 format_short (s.string, &var->print, v);
1659 s.length = strlen (s.string);
1660 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1661 s.string[s.length++] = 'M';
1662 while (s.length && *s.string == ' ')
1667 tab_raw (table, c, r, opt, &s);
1670 /* Draws a line across TABLE at the current row to indicate the most
1671 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1672 that changed, and puts the values that changed into the table. TB
1673 and X must be the corresponding table_entry and crosstab,
1676 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1678 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1680 for (; first_difference >= 2; first_difference--)
1681 table_value_missing (table, nvar - first_difference - 1, 0,
1682 TAB_RIGHT, &tb->values[first_difference],
1683 x->vars[first_difference]);
1686 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1687 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1688 additionally suffixed with a letter `M'. */
1690 format_cell_entry (struct tab_table *table, int c, int r, double value,
1691 char suffix, int mark_missing)
1693 const struct fmt_spec f = {FMT_F, 10, 1};
1695 struct fixed_string s;
1698 s.string = tab_alloc (table, 16);
1700 data_out (s.string, &f, &v);
1701 while (*s.string == ' ')
1707 s.string[s.length++] = suffix;
1709 s.string[s.length++] = 'M';
1711 tab_raw (table, c, r, TAB_RIGHT, &s);
1714 /* Displays the crosstabulation table. */
1716 display_crosstabulation (void)
1721 for (r = 0; r < n_rows; r++)
1722 table_value_missing (table, nvar - 2, r * num_cells,
1723 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1725 tab_text (table, nvar - 2, n_rows * num_cells,
1726 TAB_LEFT, _("Total"));
1728 /* Put in the actual cells. */
1733 tab_offset (table, nvar - 1, -1);
1734 for (r = 0; r < n_rows; r++)
1737 tab_hline (table, TAL_1, -1, n_cols, 0);
1738 for (c = 0; c < n_cols; c++)
1740 int mark_missing = 0;
1741 double expected_value = row_tot[r] * col_tot[c] / W;
1742 if (cmd.miss == CRS_REPORT
1743 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1744 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1746 for (i = 0; i < num_cells; i++)
1757 v = *mp / row_tot[r] * 100.;
1761 v = *mp / col_tot[c] * 100.;
1768 case CRS_CL_EXPECTED:
1771 case CRS_CL_RESIDUAL:
1772 v = *mp - expected_value;
1774 case CRS_CL_SRESIDUAL:
1775 v = (*mp - expected_value) / sqrt (expected_value);
1777 case CRS_CL_ASRESIDUAL:
1778 v = ((*mp - expected_value)
1779 / sqrt (expected_value
1780 * (1. - row_tot[r] / W)
1781 * (1. - col_tot[c] / W)));
1788 format_cell_entry (table, c, i, v, suffix, mark_missing);
1794 tab_offset (table, -1, tab_row (table) + num_cells);
1802 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1803 for (r = 0; r < n_rows; r++)
1806 int mark_missing = 0;
1808 if (cmd.miss == CRS_REPORT
1809 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1812 for (i = 0; i < num_cells; i++)
1826 v = row_tot[r] / W * 100.;
1830 v = row_tot[r] / W * 100.;
1833 case CRS_CL_EXPECTED:
1834 case CRS_CL_RESIDUAL:
1835 case CRS_CL_SRESIDUAL:
1836 case CRS_CL_ASRESIDUAL:
1844 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1845 tab_next_row (table);
1850 /* Column totals, grand total. */
1856 tab_hline (table, TAL_1, -1, n_cols, 0);
1857 for (c = 0; c <= n_cols; c++)
1859 double ct = c < n_cols ? col_tot[c] : W;
1860 int mark_missing = 0;
1864 if (cmd.miss == CRS_REPORT && c < n_cols
1865 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1868 for (i = 0; i < num_cells; i++)
1890 case CRS_CL_EXPECTED:
1891 case CRS_CL_RESIDUAL:
1892 case CRS_CL_SRESIDUAL:
1893 case CRS_CL_ASRESIDUAL:
1900 format_cell_entry (table, c, i, v, suffix, mark_missing);
1905 tab_offset (table, -1, tab_row (table) + last_row);
1908 tab_offset (table, 0, -1);
1911 static void calc_r (double *X, double *Y, double *, double *, double *);
1912 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1914 /* Display chi-square statistics. */
1916 display_chisq (void)
1918 static const char *chisq_stats[N_CHISQ] =
1920 N_("Pearson Chi-Square"),
1921 N_("Likelihood Ratio"),
1922 N_("Fisher's Exact Test"),
1923 N_("Continuity Correction"),
1924 N_("Linear-by-Linear Association"),
1926 double chisq_v[N_CHISQ];
1927 double fisher1, fisher2;
1933 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1935 tab_offset (chisq, nvar - 2, -1);
1937 for (i = 0; i < N_CHISQ; i++)
1939 if ((i != 2 && chisq_v[i] == SYSMIS)
1940 || (i == 2 && fisher1 == SYSMIS))
1944 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1947 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1948 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1949 tab_float (chisq, 3, 0, TAB_RIGHT,
1950 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1955 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1956 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1958 tab_next_row (chisq);
1961 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1962 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1963 tab_next_row (chisq);
1965 tab_offset (chisq, 0, -1);
1968 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1969 double[N_SYMMETRIC]);
1971 /* Display symmetric measures. */
1973 display_symmetric (void)
1975 static const char *categories[] =
1977 N_("Nominal by Nominal"),
1978 N_("Ordinal by Ordinal"),
1979 N_("Interval by Interval"),
1980 N_("Measure of Agreement"),
1983 static const char *stats[N_SYMMETRIC] =
1987 N_("Contingency Coefficient"),
1988 N_("Kendall's tau-b"),
1989 N_("Kendall's tau-c"),
1991 N_("Spearman Correlation"),
1996 static const int stats_categories[N_SYMMETRIC] =
1998 0, 0, 0, 1, 1, 1, 1, 2, 3,
2002 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2005 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2008 tab_offset (sym, nvar - 2, -1);
2010 for (i = 0; i < N_SYMMETRIC; i++)
2012 if (sym_v[i] == SYSMIS)
2015 if (stats_categories[i] != last_cat)
2017 last_cat = stats_categories[i];
2018 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2021 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2022 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2023 if (sym_ase[i] != SYSMIS)
2024 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2025 if (sym_t[i] != SYSMIS)
2026 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2027 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2031 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2032 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2035 tab_offset (sym, 0, -1);
2038 static int calc_risk (double[], double[], double[], union value *);
2040 /* Display risk estimate. */
2045 double risk_v[3], lower[3], upper[3];
2049 if (!calc_risk (risk_v, upper, lower, c))
2052 tab_offset (risk, nvar - 2, -1);
2054 for (i = 0; i < 3; i++)
2056 if (risk_v[i] == SYSMIS)
2062 if (x->vars[COL_VAR]->type == NUMERIC)
2063 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2064 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2066 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2067 x->vars[COL_VAR]->name,
2068 x->vars[COL_VAR]->width, c[0].s,
2069 x->vars[COL_VAR]->width, c[1].s);
2073 if (x->vars[ROW_VAR]->type == NUMERIC)
2074 sprintf (buf, _("For cohort %s = %g"),
2075 x->vars[ROW_VAR]->name, rows[i - 1].f);
2077 sprintf (buf, _("For cohort %s = %.*s"),
2078 x->vars[ROW_VAR]->name,
2079 x->vars[ROW_VAR]->width, rows[i - 1].s);
2083 tab_text (risk, 0, 0, TAB_LEFT, buf);
2084 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2085 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2086 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2087 tab_next_row (risk);
2090 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2091 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2092 tab_next_row (risk);
2094 tab_offset (risk, 0, -1);
2097 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2098 double[N_DIRECTIONAL]);
2100 /* Display directional measures. */
2102 display_directional (void)
2104 static const char *categories[] =
2106 N_("Nominal by Nominal"),
2107 N_("Ordinal by Ordinal"),
2108 N_("Nominal by Interval"),
2111 static const char *stats[] =
2114 N_("Goodman and Kruskal tau"),
2115 N_("Uncertainty Coefficient"),
2120 static const char *types[] =
2127 static const int stats_categories[N_DIRECTIONAL] =
2129 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2132 static const int stats_stats[N_DIRECTIONAL] =
2134 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2137 static const int stats_types[N_DIRECTIONAL] =
2139 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2142 static const int *stats_lookup[] =
2149 static const char **stats_names[] =
2161 double direct_v[N_DIRECTIONAL];
2162 double direct_ase[N_DIRECTIONAL];
2163 double direct_t[N_DIRECTIONAL];
2167 if (!calc_directional (direct_v, direct_ase, direct_t))
2170 tab_offset (direct, nvar - 2, -1);
2172 for (i = 0; i < N_DIRECTIONAL; i++)
2174 if (direct_v[i] == SYSMIS)
2180 for (j = 0; j < 3; j++)
2181 if (last[j] != stats_lookup[j][i])
2184 tab_hline (direct, TAL_1, j, 6, 0);
2189 int k = last[j] = stats_lookup[j][i];
2194 string = x->vars[0]->name;
2196 string = x->vars[1]->name;
2198 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2199 gettext (stats_names[j][k]), string);
2204 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2205 if (direct_ase[i] != SYSMIS)
2206 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2207 if (direct_t[i] != SYSMIS)
2208 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2209 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2210 tab_next_row (direct);
2213 tab_offset (direct, 0, -1);
2216 /* Statistical calculations. */
2218 /* Returns the value of the gamma (factorial) function for an integer
2221 gamma_int (double x)
2226 for (i = 2; i < x; i++)
2231 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2233 static inline double
2234 Pr (int a, int b, int c, int d)
2236 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2237 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2238 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2239 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2240 / gamma_int (a + b + c + d + 1.));
2243 /* Swap the contents of A and B. */
2245 swap (int *a, int *b)
2252 /* Calculate significance for Fisher's exact test as specified in
2253 _SPSS Statistical Algorithms_, Appendix 5. */
2255 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2259 if (min (c, d) < min (a, b))
2260 swap (&a, &c), swap (&b, &d);
2261 if (min (b, d) < min (a, c))
2262 swap (&a, &b), swap (&c, &d);
2266 swap (&a, &b), swap (&c, &d);
2268 swap (&a, &c), swap (&b, &d);
2272 for (x = 0; x <= a; x++)
2273 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2275 *fisher2 = *fisher1;
2276 for (x = 1; x <= b; x++)
2277 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2280 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2281 columns with values COLS and N_ROWS rows with values ROWS. Values
2282 in the matrix sum to W. */
2284 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2285 double *fisher1, double *fisher2)
2289 chisq[0] = chisq[1] = 0.;
2290 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2291 *fisher1 = *fisher2 = SYSMIS;
2293 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2295 if (ns_rows <= 1 || ns_cols <= 1)
2297 chisq[0] = chisq[1] = SYSMIS;
2301 for (r = 0; r < n_rows; r++)
2302 for (c = 0; c < n_cols; c++)
2304 const double expected = row_tot[r] * col_tot[c] / W;
2305 const double freq = mat[n_cols * r + c];
2306 const double residual = freq - expected;
2308 chisq[0] += residual * residual / expected;
2310 chisq[1] += freq * log (expected / freq);
2321 /* Calculate Yates and Fisher exact test. */
2322 if (ns_cols == 2 && ns_rows == 2)
2324 double f11, f12, f21, f22;
2330 for (i = j = 0; i < n_cols; i++)
2331 if (col_tot[i] != 0.)
2340 f11 = mat[nz_cols[0]];
2341 f12 = mat[nz_cols[1]];
2342 f21 = mat[nz_cols[0] + n_cols];
2343 f22 = mat[nz_cols[1] + n_cols];
2348 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2351 chisq[3] = (W * x * x
2352 / (f11 + f12) / (f21 + f22)
2353 / (f11 + f21) / (f12 + f22));
2361 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2362 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2365 /* Calculate Mantel-Haenszel. */
2366 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2368 double r, ase_0, ase_1;
2369 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2371 chisq[4] = (W - 1.) * r * r;
2376 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2377 ASE_1, and ase_0 into ASE_0. The row and column values must be
2378 passed in X and Y. */
2380 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2382 double SX, SY, S, T;
2384 double sum_XYf, sum_X2Y2f;
2385 double sum_Xr, sum_X2r;
2386 double sum_Yc, sum_Y2c;
2389 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2390 for (j = 0; j < n_cols; j++)
2392 double fij = mat[j + i * n_cols];
2393 double product = X[i] * Y[j];
2394 double temp = fij * product;
2396 sum_X2Y2f += temp * product;
2399 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2401 sum_Xr += X[i] * row_tot[i];
2402 sum_X2r += X[i] * X[i] * row_tot[i];
2406 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2408 sum_Yc += Y[i] * col_tot[i];
2409 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2413 S = sum_XYf - sum_Xr * sum_Yc / W;
2414 SX = sum_X2r - sum_Xr * sum_Xr / W;
2415 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2418 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2423 for (s = c = 0., i = 0; i < n_rows; i++)
2424 for (j = 0; j < n_cols; j++)
2426 double Xresid, Yresid;
2429 Xresid = X[i] - Xbar;
2430 Yresid = Y[j] - Ybar;
2431 temp = (T * Xresid * Yresid
2433 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2434 y = mat[j + i * n_cols] * temp * temp - c;
2439 *ase_1 = sqrt (s) / (T * T);
2443 static double somers_d_v[3];
2444 static double somers_d_ase[3];
2445 static double somers_d_t[3];
2447 /* Calculate symmetric statistics and their asymptotic standard
2448 errors. Returns 0 if none could be calculated. */
2450 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2451 double t[N_SYMMETRIC])
2453 int q = min (ns_rows, ns_cols);
2462 for (i = 0; i < N_SYMMETRIC; i++)
2463 v[i] = ase[i] = t[i] = SYSMIS;
2466 /* Phi, Cramer's V, contingency coefficient. */
2467 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2469 double Xp = 0.; /* Pearson chi-square. */
2474 for (r = 0; r < n_rows; r++)
2475 for (c = 0; c < n_cols; c++)
2477 const double expected = row_tot[r] * col_tot[c] / W;
2478 const double freq = mat[n_cols * r + c];
2479 const double residual = freq - expected;
2481 Xp += residual * residual / expected;
2485 if (cmd.a_statistics[CRS_ST_PHI])
2487 v[0] = sqrt (Xp / W);
2488 v[1] = sqrt (Xp / (W * (q - 1)));
2490 if (cmd.a_statistics[CRS_ST_CC])
2491 v[2] = sqrt (Xp / (Xp + W));
2494 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2495 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2500 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2507 for (r = 0; r < n_rows; r++)
2508 Dr -= row_tot[r] * row_tot[r];
2509 for (c = 0; c < n_cols; c++)
2510 Dc -= col_tot[c] * col_tot[c];
2516 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2517 for (c = 0; c < n_cols; c++)
2521 for (r = 0; r < n_rows; r++)
2522 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2532 for (i = 0; i < n_rows; i++)
2536 for (j = 1; j < n_cols; j++)
2537 Cij += col_tot[j] - cum[j + i * n_cols];
2540 for (j = 1; j < n_cols; j++)
2541 Dij += cum[j + (i - 1) * n_cols];
2545 double fij = mat[j + i * n_cols];
2551 assert (j < n_cols);
2553 Cij -= col_tot[j] - cum[j + i * n_cols];
2554 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2558 Cij += cum[j - 1 + (i - 1) * n_cols];
2559 Dij -= cum[j + (i - 1) * n_cols];
2565 if (cmd.a_statistics[CRS_ST_BTAU])
2566 v[3] = (P - Q) / sqrt (Dr * Dc);
2567 if (cmd.a_statistics[CRS_ST_CTAU])
2568 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2569 if (cmd.a_statistics[CRS_ST_GAMMA])
2570 v[5] = (P - Q) / (P + Q);
2572 /* ASE for tau-b, tau-c, gamma. Calculations could be
2573 eliminated here, at expense of memory. */
2578 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2579 for (i = 0; i < n_rows; i++)
2583 for (j = 1; j < n_cols; j++)
2584 Cij += col_tot[j] - cum[j + i * n_cols];
2587 for (j = 1; j < n_cols; j++)
2588 Dij += cum[j + (i - 1) * n_cols];
2592 double fij = mat[j + i * n_cols];
2594 if (cmd.a_statistics[CRS_ST_BTAU])
2596 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2597 + v[3] * (row_tot[i] * Dc
2598 + col_tot[j] * Dr));
2599 btau_cum += fij * temp * temp;
2603 const double temp = Cij - Dij;
2604 ctau_cum += fij * temp * temp;
2607 if (cmd.a_statistics[CRS_ST_GAMMA])
2609 const double temp = Q * Cij - P * Dij;
2610 gamma_cum += fij * temp * temp;
2613 if (cmd.a_statistics[CRS_ST_D])
2615 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2616 - (P - Q) * (W - row_tot[i]));
2617 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2618 - (Q - P) * (W - col_tot[j]));
2623 assert (j < n_cols);
2625 Cij -= col_tot[j] - cum[j + i * n_cols];
2626 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2630 Cij += cum[j - 1 + (i - 1) * n_cols];
2631 Dij -= cum[j + (i - 1) * n_cols];
2637 btau_var = ((btau_cum
2638 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2640 if (cmd.a_statistics[CRS_ST_BTAU])
2642 ase[3] = sqrt (btau_var);
2643 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2646 if (cmd.a_statistics[CRS_ST_CTAU])
2648 ase[4] = ((2 * q / ((q - 1) * W * W))
2649 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2650 t[4] = v[4] / ase[4];
2652 if (cmd.a_statistics[CRS_ST_GAMMA])
2654 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2655 t[5] = v[5] / (2. / (P + Q)
2656 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2658 if (cmd.a_statistics[CRS_ST_D])
2660 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2661 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2662 somers_d_t[0] = (somers_d_v[0]
2664 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2665 somers_d_v[1] = (P - Q) / Dc;
2666 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2667 somers_d_t[1] = (somers_d_v[1]
2669 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2670 somers_d_v[2] = (P - Q) / Dr;
2671 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2672 somers_d_t[2] = (somers_d_v[2]
2674 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2680 /* Spearman correlation, Pearson's r. */
2681 if (cmd.a_statistics[CRS_ST_CORR])
2683 double *R = local_alloc (sizeof *R * n_rows);
2684 double *C = local_alloc (sizeof *C * n_cols);
2687 double y, t, c = 0., s = 0.;
2692 R[i] = s + (row_tot[i] + 1.) / 2.;
2699 assert (i < n_rows);
2704 double y, t, c = 0., s = 0.;
2709 C[j] = s + (col_tot[j] + 1.) / 2;
2716 assert (j < n_cols);
2720 calc_r (R, C, &v[6], &t[6], &ase[6]);
2726 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2730 /* Cohen's kappa. */
2731 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2733 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2736 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2737 i < ns_rows; i++, j++)
2741 while (col_tot[j] == 0.)
2744 prod = row_tot[i] * col_tot[j];
2745 sum = row_tot[i] + col_tot[j];
2747 sum_fii += mat[j + i * n_cols];
2749 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2750 sum_riciri_ci += prod * sum;
2752 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2753 for (j = 0; j < ns_cols; j++)
2755 double sum = row_tot[i] + col_tot[j];
2756 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2759 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2761 ase[8] = sqrt ((W * W * sum_rici
2762 + sum_rici * sum_rici
2763 - W * sum_riciri_ci)
2764 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2766 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2767 / pow2 (W * W - sum_rici))
2768 + ((2. * (W - sum_fii)
2769 * (2. * sum_fii * sum_rici
2770 - W * sum_fiiri_ci))
2771 / cube (W * W - sum_rici))
2772 + (pow2 (W - sum_fii)
2773 * (W * sum_fijri_ci2 - 4.
2774 * sum_rici * sum_rici)
2775 / pow4 (W * W - sum_rici))));
2777 t[8] = v[8] / ase[8];
2784 /* Calculate risk estimate. */
2786 calc_risk (double *value, double *upper, double *lower, union value *c)
2788 double f11, f12, f21, f22;
2794 for (i = 0; i < 3; i++)
2795 value[i] = upper[i] = lower[i] = SYSMIS;
2798 if (ns_rows != 2 || ns_cols != 2)
2805 for (i = j = 0; i < n_cols; i++)
2806 if (col_tot[i] != 0.)
2815 f11 = mat[nz_cols[0]];
2816 f12 = mat[nz_cols[1]];
2817 f21 = mat[nz_cols[0] + n_cols];
2818 f22 = mat[nz_cols[1] + n_cols];
2820 c[0] = cols[nz_cols[0]];
2821 c[1] = cols[nz_cols[1]];
2824 value[0] = (f11 * f22) / (f12 * f21);
2825 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2826 lower[0] = value[0] * exp (-1.960 * v);
2827 upper[0] = value[0] * exp (1.960 * v);
2829 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2830 v = sqrt ((f12 / (f11 * (f11 + f12)))
2831 + (f22 / (f21 * (f21 + f22))));
2832 lower[1] = value[1] * exp (-1.960 * v);
2833 upper[1] = value[1] * exp (1.960 * v);
2835 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2836 v = sqrt ((f11 / (f12 * (f11 + f12)))
2837 + (f21 / (f22 * (f21 + f22))));
2838 lower[2] = value[2] * exp (-1.960 * v);
2839 upper[2] = value[2] * exp (1.960 * v);
2844 /* Calculate directional measures. */
2846 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2847 double t[N_DIRECTIONAL])
2852 for (i = 0; i < N_DIRECTIONAL; i++)
2853 v[i] = ase[i] = t[i] = SYSMIS;
2857 if (cmd.a_statistics[CRS_ST_LAMBDA])
2859 double *fim = xmalloc (sizeof *fim * n_rows);
2860 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2861 double *fmj = xmalloc (sizeof *fmj * n_cols);
2862 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2863 double sum_fim, sum_fmj;
2865 int rm_index, cm_index;
2868 /* Find maximum for each row and their sum. */
2869 for (sum_fim = 0., i = 0; i < n_rows; i++)
2871 double max = mat[i * n_cols];
2874 for (j = 1; j < n_cols; j++)
2875 if (mat[j + i * n_cols] > max)
2877 max = mat[j + i * n_cols];
2881 sum_fim += fim[i] = max;
2882 fim_index[i] = index;
2885 /* Find maximum for each column. */
2886 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2888 double max = mat[j];
2891 for (i = 1; i < n_rows; i++)
2892 if (mat[j + i * n_cols] > max)
2894 max = mat[j + i * n_cols];
2898 sum_fmj += fmj[j] = max;
2899 fmj_index[j] = index;
2902 /* Find maximum row total. */
2905 for (i = 1; i < n_rows; i++)
2906 if (row_tot[i] > rm)
2912 /* Find maximum column total. */
2915 for (j = 1; j < n_cols; j++)
2916 if (col_tot[j] > cm)
2922 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2923 v[1] = (sum_fmj - rm) / (W - rm);
2924 v[2] = (sum_fim - cm) / (W - cm);
2926 /* ASE1 for Y given X. */
2930 for (accum = 0., i = 0; i < n_rows; i++)
2931 for (j = 0; j < n_cols; j++)
2933 const int deltaj = j == cm_index;
2934 accum += (mat[j + i * n_cols]
2935 * pow2 ((j == fim_index[i])
2940 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2943 /* ASE0 for Y given X. */
2947 for (accum = 0., i = 0; i < n_rows; i++)
2948 if (cm_index != fim_index[i])
2949 accum += (mat[i * n_cols + fim_index[i]]
2950 + mat[i * n_cols + cm_index]);
2951 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2954 /* ASE1 for X given Y. */
2958 for (accum = 0., i = 0; i < n_rows; i++)
2959 for (j = 0; j < n_cols; j++)
2961 const int deltaj = i == rm_index;
2962 accum += (mat[j + i * n_cols]
2963 * pow2 ((i == fmj_index[j])
2968 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2971 /* ASE0 for X given Y. */
2975 for (accum = 0., j = 0; j < n_cols; j++)
2976 if (rm_index != fmj_index[j])
2977 accum += (mat[j + n_cols * fmj_index[j]]
2978 + mat[j + n_cols * rm_index]);
2979 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2982 /* Symmetric ASE0 and ASE1. */
2987 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2988 for (j = 0; j < n_cols; j++)
2990 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2991 int temp1 = (i == rm_index) + (j == cm_index);
2992 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
2993 accum1 += (mat[j + i * n_cols]
2994 * pow2 (temp0 + (v[0] - 1.) * temp1));
2996 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
2997 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
2998 / (2. * W - rm - cm));
3007 double sum_fij2_ri, sum_fij2_ci;
3008 double sum_ri2, sum_cj2;
3010 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3011 for (j = 0; j < n_cols; j++)
3013 double temp = pow2 (mat[j + i * n_cols]);
3014 sum_fij2_ri += temp / row_tot[i];
3015 sum_fij2_ci += temp / col_tot[j];
3018 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3019 sum_ri2 += row_tot[i] * row_tot[i];
3021 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3022 sum_cj2 += col_tot[j] * col_tot[j];
3024 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3025 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3029 if (cmd.a_statistics[CRS_ST_UC])
3031 double UX, UY, UXY, P;
3032 double ase1_yx, ase1_xy, ase1_sym;
3035 for (UX = 0., i = 0; i < n_rows; i++)
3036 if (row_tot[i] > 0.)
3037 UX -= row_tot[i] / W * log (row_tot[i] / W);
3039 for (UY = 0., j = 0; j < n_cols; j++)
3040 if (col_tot[j] > 0.)
3041 UY -= col_tot[j] / W * log (col_tot[j] / W);
3043 for (UXY = P = 0., i = 0; i < n_rows; i++)
3044 for (j = 0; j < n_cols; j++)
3046 double entry = mat[j + i * n_cols];
3051 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3052 UXY -= entry / W * log (entry / W);
3055 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3056 for (j = 0; j < n_cols; j++)
3058 double entry = mat[j + i * n_cols];
3063 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3064 + (UX - UXY) * log (col_tot[j] / W));
3065 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3066 + (UY - UXY) * log (row_tot[i] / W));
3067 ase1_sym += entry * pow2 ((UXY
3068 * log (row_tot[i] * col_tot[j] / (W * W)))
3069 - (UX + UY) * log (entry / W));
3072 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3073 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3074 t[5] = v[5] / ((2. / (W * (UX + UY)))
3075 * sqrt (P - pow2 (UX + UY - UXY) / W));
3077 v[6] = (UX + UY - UXY) / UX;
3078 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3079 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3081 v[7] = (UX + UY - UXY) / UY;
3082 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3083 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3087 if (cmd.a_statistics[CRS_ST_D])
3092 calc_symmetric (NULL, NULL, NULL);
3093 for (i = 0; i < 3; i++)
3095 v[8 + i] = somers_d_v[i];
3096 ase[8 + i] = somers_d_ase[i];
3097 t[8 + i] = somers_d_t[i];
3102 if (cmd.a_statistics[CRS_ST_ETA])
3105 double sum_Xr, sum_X2r;
3109 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3111 sum_Xr += rows[i].f * row_tot[i];
3112 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3114 SX = sum_X2r - sum_Xr * sum_Xr / W;
3116 for (SXW = 0., j = 0; j < n_cols; j++)
3120 for (cum = 0., i = 0; i < n_rows; i++)
3122 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3123 cum += rows[i].f * mat[j + i * n_cols];
3126 SXW -= cum * cum / col_tot[j];
3128 v[11] = sqrt (1. - SXW / SX);
3132 double sum_Yc, sum_Y2c;
3136 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3138 sum_Yc += cols[i].f * col_tot[i];
3139 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3141 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3143 for (SYW = 0., i = 0; i < n_rows; i++)
3147 for (cum = 0., j = 0; j < n_cols; j++)
3149 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3150 cum += cols[j].f * mat[j + i * n_cols];
3153 SYW -= cum * cum / row_tot[i];
3155 v[12] = sqrt (1. - SYW / SY);
3162 /* A wrapper around data_out() that limits string output to short
3163 string width and null terminates the result. */
3165 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3167 struct fmt_spec fmt_subst;
3169 /* Limit to short string width. */
3170 if (formats[fp->type].cat & FCAT_STRING)
3174 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3175 if (fmt_subst.type == FMT_A)
3176 fmt_subst.w = min (8, fmt_subst.w);
3178 fmt_subst.w = min (16, fmt_subst.w);
3184 data_out (s, fp, v);
3186 /* Null terminate. */