1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
50 #include "value-labels.h"
54 #include "debug-print.h"
60 +missing=miss:!table/include/report;
61 +write[wr_]=none,cells,all;
62 +format=fmt:!labels/nolabels/novallabs,
65 tabl:!tables/notables,
68 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
70 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
76 /* Number of chi-square statistics. */
79 /* Number of symmetric statistics. */
82 /* Number of directional statistics. */
83 #define N_DIRECTIONAL 13
85 /* A single table entry for general mode. */
88 int table; /* Flattened table number. */
91 double freq; /* Frequency count. */
92 double *data; /* Crosstabulation table for integer mode. */
95 union value values[1]; /* Values. */
98 /* A crosstabulation. */
101 int nvar; /* Number of variables. */
102 double missing; /* Missing cases count. */
103 int ofs; /* Integer mode: Offset into sorted_tab[]. */
104 struct variable *vars[2]; /* At least two variables; sorted by
105 larger indices first. */
108 /* Indexes into crosstab.v. */
115 /* General mode crosstabulation table. */
116 static struct hsh_table *gen_tab; /* Hash table. */
117 static int n_sorted_tab; /* Number of entries in sorted_tab. */
118 static struct table_entry **sorted_tab; /* Sorted table. */
120 /* Variables specifies on VARIABLES. */
121 static struct variable **variables;
122 static size_t variables_cnt;
125 static struct crosstab **xtab;
128 /* Integer or general mode? */
137 static int num_cells; /* Number of cells requested. */
138 static int cells[8]; /* Cells requested. */
141 static int write; /* One of WR_* that specifies the WRITE style. */
143 /* Command parsing info. */
144 static struct cmd_crosstabs cmd;
147 static struct pool *pl_tc; /* For table cells. */
148 static struct pool *pl_col; /* For column data. */
150 static int internal_cmd_crosstabs (void);
151 static void precalc (void *);
152 static int calc_general (struct ccase *, void *);
153 static int calc_integer (struct ccase *, void *);
154 static void postcalc (void *);
155 static void submit (struct tab_table *);
157 static void format_short (char *s, const struct fmt_spec *fp,
158 const union value *v);
160 /* Parse and execute CROSSTABS, then clean up. */
164 int result = internal_cmd_crosstabs ();
167 pool_destroy (pl_tc);
168 pool_destroy (pl_col);
173 /* Parses and executes the CROSSTABS procedure. */
175 internal_cmd_crosstabs (void)
183 pl_tc = pool_create ();
184 pl_col = pool_create ();
186 if (!parse_crosstabs (&cmd))
189 mode = variables ? INTEGER : GENERAL;
194 cmd.a_cells[CRS_CL_COUNT] = 1;
200 for (i = 0; i < CRS_CL_count; i++)
205 cmd.a_cells[CRS_CL_COUNT] = 1;
206 cmd.a_cells[CRS_CL_ROW] = 1;
207 cmd.a_cells[CRS_CL_COLUMN] = 1;
208 cmd.a_cells[CRS_CL_TOTAL] = 1;
210 if (cmd.a_cells[CRS_CL_ALL])
212 for (i = 0; i < CRS_CL_count; i++)
214 cmd.a_cells[CRS_CL_ALL] = 0;
216 cmd.a_cells[CRS_CL_NONE] = 0;
218 for (num_cells = i = 0; i < CRS_CL_count; i++)
220 cells[num_cells++] = i;
223 if (cmd.sbc_statistics)
228 for (i = 0; i < CRS_ST_count; i++)
229 if (cmd.a_statistics[i])
232 cmd.a_statistics[CRS_ST_CHISQ] = 1;
233 if (cmd.a_statistics[CRS_ST_ALL])
234 for (i = 0; i < CRS_ST_count; i++)
235 cmd.a_statistics[i] = 1;
239 if (cmd.miss == CRS_REPORT && mode == GENERAL)
241 msg (SE, _("Missing mode REPORT not allowed in general mode. "
242 "Assuming MISSING=TABLE."));
243 cmd.miss = CRS_TABLE;
247 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
248 cmd.a_write[CRS_WR_ALL] = 0;
249 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
251 msg (SE, _("Write mode ALL not allowed in general mode. "
252 "Assuming WRITE=CELLS."));
253 cmd.a_write[CRS_WR_CELLS] = 1;
256 && (cmd.a_write[CRS_WR_NONE]
257 + cmd.a_write[CRS_WR_ALL]
258 + cmd.a_write[CRS_WR_CELLS] == 0))
259 cmd.a_write[CRS_WR_CELLS] = 1;
260 if (cmd.a_write[CRS_WR_CELLS])
261 write = CRS_WR_CELLS;
262 else if (cmd.a_write[CRS_WR_ALL])
267 procedure_with_splits (precalc,
268 mode == GENERAL ? calc_general : calc_integer,
274 /* Parses the TABLES subcommand. */
276 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
278 struct var_set *var_set;
280 struct variable ***by = NULL;
285 /* Ensure that this is a TABLES subcommand. */
286 if (!lex_match_id ("TABLES")
287 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
292 if (variables != NULL)
293 var_set = var_set_create_from_array (variables, variables_cnt);
295 var_set = var_set_create_from_dict (default_dict);
296 assert (var_set != NULL);
300 by = xrealloc (by, sizeof *by * (n_by + 1));
301 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
302 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
303 PV_NO_DUPLICATE | PV_NO_SCRATCH))
308 if (!lex_match (T_BY))
312 lex_error (_("expecting BY"));
321 int *by_iter = xcalloc (sizeof *by_iter * n_by);
324 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
325 for (i = 0; i < nx; i++)
329 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
336 for (i = 0; i < n_by; i++)
337 x->vars[i] = by[i][by_iter[i]];
343 for (i = n_by - 1; i >= 0; i--)
345 if (++by_iter[i] < by_nvar[i])
358 /* All return paths lead here. */
362 for (i = 0; i < n_by; i++)
368 var_set_destroy (var_set);
373 /* Parses the VARIABLES subcommand. */
375 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
379 msg (SE, _("VARIABLES must be specified before TABLES."));
387 int orig_nv = variables_cnt;
392 if (!parse_variables (default_dict, &variables, &variables_cnt,
393 (PV_APPEND | PV_NUMERIC
394 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
399 lex_error ("expecting `('");
404 if (!lex_force_int ())
406 min = lex_integer ();
411 if (!lex_force_int ())
413 max = lex_integer ();
416 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
424 lex_error ("expecting `)'");
429 for (i = orig_nv; i < variables_cnt; i++)
431 variables[i]->p.crs.min = min;
432 variables[i]->p.crs.max = max + 1.;
433 variables[i]->p.crs.count = max - min + 1;
448 /* Data file processing. */
450 static int compare_table_entry (const void *, const void *, void *);
451 static unsigned hash_table_entry (const void *, void *);
453 /* Set up the crosstabulation tables for processing. */
455 precalc (void *aux UNUSED)
459 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
469 for (i = 0; i < nxtab; i++)
471 struct crosstab *x = xtab[i];
476 x->ofs = n_sorted_tab;
478 for (j = 2; j < x->nvar; j++)
479 count *= x->vars[j - 2]->p.crs.count;
481 sorted_tab = xrealloc (sorted_tab,
482 sizeof *sorted_tab * (n_sorted_tab + count));
483 v = local_alloc (sizeof *v * x->nvar);
484 for (j = 2; j < x->nvar; j++)
485 v[j] = x->vars[j]->p.crs.min;
486 for (j = 0; j < count; j++)
488 struct table_entry *te;
491 te = sorted_tab[n_sorted_tab++]
492 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
496 const int mat_size = (x->vars[0]->p.crs.count
497 * x->vars[1]->p.crs.count);
500 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
501 for (m = 0; m < mat_size; m++)
505 for (k = 2; k < x->nvar; k++)
506 te->values[k].f = v[k];
507 for (k = 2; k < x->nvar; k++)
508 if (++v[k] >= x->vars[k]->p.crs.max)
509 v[k] = x->vars[k]->p.crs.min;
516 sorted_tab = xrealloc (sorted_tab,
517 sizeof *sorted_tab * (n_sorted_tab + 1));
518 sorted_tab[n_sorted_tab] = NULL;
522 /* Form crosstabulations for general mode. */
524 calc_general (struct ccase *c, void *aux UNUSED)
529 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
531 /* Flattened current table index. */
534 for (t = 0; t < nxtab; t++)
536 struct crosstab *x = xtab[t];
537 const size_t entry_size = (sizeof (struct table_entry)
538 + sizeof (union value) * (x->nvar - 1));
539 struct table_entry *te = local_alloc (entry_size);
541 /* Construct table entry for the current record and table. */
547 for (j = 0; j < x->nvar; j++)
549 if ((cmd.miss == CRS_TABLE
550 && is_missing (case_data (c, x->vars[j]->fv), x->vars[j]))
551 || (cmd.miss == CRS_INCLUDE
552 && is_system_missing (case_data (c, x->vars[j]->fv),
555 x->missing += weight;
559 if (x->vars[j]->type == NUMERIC)
560 te->values[j].f = case_num (c, x->vars[j]->fv);
563 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
566 /* Necessary in order to simplify comparisons. */
567 memset (&te->values[j].s[x->vars[j]->width], 0,
568 sizeof (union value) - x->vars[j]->width);
573 /* Add record to hash table. */
575 struct table_entry **tepp
576 = (struct table_entry **) hsh_probe (gen_tab, te);
579 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
582 memcpy (tep, te, entry_size);
587 (*tepp)->u.freq += weight;
598 calc_integer (struct ccase *c, void *aux UNUSED)
603 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
605 /* Flattened current table index. */
608 for (t = 0; t < nxtab; t++)
610 struct crosstab *x = xtab[t];
615 for (i = 0; i < x->nvar; i++)
617 struct variable *const v = x->vars[i];
618 double value = case_num (c, v->fv);
620 /* Note that the first test also rules out SYSMIS. */
621 if ((value < v->p.crs.min || value >= v->p.crs.max)
622 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
624 x->missing += weight;
630 ofs += fact * ((int) value - v->p.crs.min);
631 fact *= v->p.crs.count;
636 const int row = case_num (c, x->vars[ROW_VAR]->fv) - x->vars[ROW_VAR]->p.crs.min;
637 const int col = case_num (c, x->vars[COL_VAR]->fv) - x->vars[COL_VAR]->p.crs.min;
638 const int col_dim = x->vars[COL_VAR]->p.crs.count;
640 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
649 /* Compare the table_entry's at A and B and return a strcmp()-type
652 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
654 const struct table_entry *a = a_;
655 const struct table_entry *b = b_;
657 if (a->table > b->table)
659 else if (a->table < b->table)
663 const struct crosstab *x = xtab[a->table];
666 for (i = x->nvar - 1; i >= 0; i--)
667 if (x->vars[i]->type == NUMERIC)
669 const double diffnum = a->values[i].f - b->values[i].f;
672 else if (diffnum > 0)
677 assert (x->vars[i]->type == ALPHA);
679 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
690 /* Calculate a hash value from table_entry A. */
692 hash_table_entry (const void *a_, void *foo UNUSED)
694 const struct table_entry *a = a_;
699 for (i = 0; i < xtab[a->table]->nvar; i++)
700 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
705 /* Post-data reading calculations. */
707 static struct table_entry **find_pivot_extent (struct table_entry **,
708 int *cnt, int pivot);
709 static void enum_var_values (struct table_entry **entries, int entry_cnt,
711 union value **values, int *value_cnt);
712 static void output_pivot_table (struct table_entry **, struct table_entry **,
713 double **, double **, double **,
714 int *, int *, int *);
715 static void make_summary_table (void);
718 postcalc (void *aux UNUSED)
722 n_sorted_tab = hsh_count (gen_tab);
723 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
726 make_summary_table ();
728 /* Identify all the individual crosstabulation tables, and deal with
731 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
732 int pc = n_sorted_tab; /* Pivot count. */
734 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
735 int maxrows = 0, maxcols = 0, maxcells = 0;
739 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
743 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
744 &maxrows, &maxcols, &maxcells);
753 hsh_destroy (gen_tab);
756 static void insert_summary (struct tab_table *, int tab_index, double valid);
758 /* Output a table summarizing the cases processed. */
760 make_summary_table (void)
762 struct tab_table *summary;
764 struct table_entry **pb = sorted_tab, **pe;
765 int pc = n_sorted_tab;
768 summary = tab_create (7, 3 + nxtab, 1);
769 tab_title (summary, 0, _("Summary."));
770 tab_headers (summary, 1, 0, 3, 0);
771 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
772 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
773 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
774 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
775 tab_hline (summary, TAL_1, 1, 6, 1);
776 tab_hline (summary, TAL_1, 1, 6, 2);
777 tab_vline (summary, TAL_1, 3, 1, 1);
778 tab_vline (summary, TAL_1, 5, 1, 1);
782 for (i = 0; i < 3; i++)
784 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
785 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
788 tab_offset (summary, 0, 3);
794 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
798 while (cur_tab < (*pb)->table)
799 insert_summary (summary, cur_tab++, 0.);
802 for (valid = 0.; pb < pe; pb++)
803 valid += (*pb)->u.freq;
806 const struct crosstab *const x = xtab[(*pb)->table];
807 const int n_cols = x->vars[COL_VAR]->p.crs.count;
808 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
809 const int count = n_cols * n_rows;
811 for (valid = 0.; pb < pe; pb++)
813 const double *data = (*pb)->u.data;
816 for (i = 0; i < count; i++)
820 insert_summary (summary, cur_tab++, valid);
825 while (cur_tab < nxtab)
826 insert_summary (summary, cur_tab++, 0.);
831 /* Inserts a line into T describing the crosstabulation at index
832 TAB_INDEX, which has VALID valid observations. */
834 insert_summary (struct tab_table *t, int tab_index, double valid)
836 struct crosstab *x = xtab[tab_index];
838 tab_hline (t, TAL_1, 0, 6, 0);
840 /* Crosstabulation name. */
842 char *buf = local_alloc (128 * x->nvar);
846 for (i = 0; i < x->nvar; i++)
849 cp = stpcpy (cp, " * ");
852 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
854 tab_text (t, 0, 0, TAB_LEFT, buf);
859 /* Counts and percentages. */
869 for (i = 0; i < 3; i++)
871 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
872 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
883 static struct tab_table *table; /* Crosstabulation table. */
884 static struct tab_table *chisq; /* Chi-square table. */
885 static struct tab_table *sym; /* Symmetric measures table. */
886 static struct tab_table *risk; /* Risk estimate table. */
887 static struct tab_table *direct; /* Directional measures table. */
890 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
892 /* Column values, number of columns. */
893 static union value *cols;
896 /* Row values, number of rows. */
897 static union value *rows;
900 /* Number of statistically interesting columns/rows (columns/rows with
902 static int ns_cols, ns_rows;
904 /* Crosstabulation. */
905 static struct crosstab *x;
907 /* Number of variables from the crosstabulation to consider. This is
908 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
911 /* Matrix contents. */
912 static double *mat; /* Matrix proper. */
913 static double *row_tot; /* Row totals. */
914 static double *col_tot; /* Column totals. */
915 static double W; /* Grand total. */
917 static void display_dimensions (struct tab_table *, int first_difference,
918 struct table_entry *);
919 static void display_crosstabulation (void);
920 static void display_chisq (void);
921 static void display_symmetric (void);
922 static void display_risk (void);
923 static void display_directional (void);
924 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
925 static void table_value_missing (struct tab_table *table, int c, int r,
926 unsigned char opt, const union value *v,
927 const struct variable *var);
928 static void delete_missing (void);
930 /* Output pivot table beginning at PB and continuing until PE,
931 exclusive. For efficiency, *MATP is a pointer to a matrix that can
932 hold *MAXROWS entries. */
934 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
935 double **matp, double **row_totp, double **col_totp,
936 int *maxrows, int *maxcols, int *maxcells)
939 struct table_entry **tb = pb, **te; /* Table begin, table end. */
940 int tc = pe - pb; /* Table count. */
942 /* Table entry for header comparison. */
943 struct table_entry *cmp = NULL;
945 x = xtab[(*pb)->table];
946 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
948 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
950 /* Crosstabulation table initialization. */
953 table = tab_create (nvar + n_cols,
954 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
955 tab_headers (table, nvar - 1, 0, 2, 0);
957 /* First header line. */
958 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
959 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
961 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
963 /* Second header line. */
967 for (i = 2; i < nvar; i++)
968 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
969 TAB_RIGHT | TAT_TITLE,
971 ? x->vars[i]->label : x->vars[i]->name));
972 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
973 x->vars[ROW_VAR]->name);
974 for (i = 0; i < n_cols; i++)
975 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
977 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
980 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
981 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
985 char *title = local_alloc (x->nvar * 64 + 128);
989 if (cmd.pivot == CRS_PIVOT)
990 for (i = 0; i < nvar; i++)
993 cp = stpcpy (cp, " by ");
994 cp = stpcpy (cp, x->vars[i]->name);
998 cp = spprintf (cp, "%s by %s for",
999 x->vars[0]->name, x->vars[1]->name);
1000 for (i = 2; i < nvar; i++)
1002 char buf[64], *bufp;
1007 cp = stpcpy (cp, x->vars[i]->name);
1009 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1010 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1012 cp = stpcpy (cp, bufp);
1016 cp = stpcpy (cp, " [");
1017 for (i = 0; i < num_cells; i++)
1025 static const struct tuple cell_names[] =
1027 {CRS_CL_COUNT, N_("count")},
1028 {CRS_CL_ROW, N_("row %")},
1029 {CRS_CL_COLUMN, N_("column %")},
1030 {CRS_CL_TOTAL, N_("total %")},
1031 {CRS_CL_EXPECTED, N_("expected")},
1032 {CRS_CL_RESIDUAL, N_("residual")},
1033 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1034 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1038 const struct tuple *t;
1040 for (t = cell_names; t->value != cells[i]; t++)
1041 assert (t->value != -1);
1043 cp = stpcpy (cp, ", ");
1044 cp = stpcpy (cp, gettext (t->name));
1048 tab_title (table, 0, title);
1052 tab_offset (table, 0, 2);
1057 /* Chi-square table initialization. */
1058 if (cmd.a_statistics[CRS_ST_CHISQ])
1060 chisq = tab_create (6 + (nvar - 2),
1061 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1062 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1064 tab_title (chisq, 0, "Chi-square tests.");
1066 tab_offset (chisq, nvar - 2, 0);
1067 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1068 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1069 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1070 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1071 _("Asymp. Sig. (2-sided)"));
1072 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1073 _("Exact. Sig. (2-sided)"));
1074 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1075 _("Exact. Sig. (1-sided)"));
1077 tab_offset (chisq, 0, 1);
1082 /* Symmetric measures. */
1083 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1084 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1085 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1086 || cmd.a_statistics[CRS_ST_KAPPA])
1088 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1089 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1090 tab_title (sym, 0, "Symmetric measures.");
1092 tab_offset (sym, nvar - 2, 0);
1093 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1094 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1095 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1096 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1097 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1098 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1099 tab_offset (sym, 0, 1);
1104 /* Risk estimate. */
1105 if (cmd.a_statistics[CRS_ST_RISK])
1107 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1108 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1109 tab_title (risk, 0, "Risk estimate.");
1111 tab_offset (risk, nvar - 2, 0);
1112 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1113 _(" 95%% Confidence Interval"));
1114 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1115 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1116 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1117 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1118 tab_hline (risk, TAL_1, 2, 3, 1);
1119 tab_vline (risk, TAL_1, 2, 0, 1);
1120 tab_offset (risk, 0, 2);
1125 /* Directional measures. */
1126 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1127 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1129 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1130 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1131 tab_title (direct, 0, "Directional measures.");
1133 tab_offset (direct, nvar - 2, 0);
1134 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1135 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1136 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1137 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1138 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1139 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1140 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1141 tab_offset (direct, 0, 1);
1148 /* Find pivot subtable if applicable. */
1149 te = find_pivot_extent (tb, &tc, 0);
1153 /* Find all the row variable values. */
1154 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1156 /* Allocate memory space for the column and row totals. */
1157 if (n_rows > *maxrows)
1159 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1160 row_tot = *row_totp;
1163 if (n_cols > *maxcols)
1165 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1166 col_tot = *col_totp;
1170 /* Allocate table space for the matrix. */
1171 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1172 tab_realloc (table, -1,
1173 max (tab_nr (table) + (n_rows + 1) * num_cells,
1174 tab_nr (table) * (pe - pb) / (te - tb)));
1176 if (mode == GENERAL)
1178 /* Allocate memory space for the matrix. */
1179 if (n_cols * n_rows > *maxcells)
1181 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1182 *maxcells = n_cols * n_rows;
1187 /* Build the matrix and calculate column totals. */
1189 union value *cur_col = cols;
1190 union value *cur_row = rows;
1192 double *cp = col_tot;
1193 struct table_entry **p;
1196 for (p = &tb[0]; p < te; p++)
1198 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1202 for (; cur_row < &rows[n_rows]; cur_row++)
1208 mp = &mat[cur_col - cols];
1211 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1218 *cp += *mp = (*p)->u.freq;
1223 /* Zero out the rest of the matrix. */
1224 for (; cur_row < &rows[n_rows]; cur_row++)
1230 if (cur_col < &cols[n_cols])
1232 const int rem_cols = n_cols - (cur_col - cols);
1235 for (c = 0; c < rem_cols; c++)
1237 mp = &mat[cur_col - cols];
1238 for (r = 0; r < n_rows; r++)
1240 for (c = 0; c < rem_cols; c++)
1242 mp += n_cols - rem_cols;
1250 double *tp = col_tot;
1252 assert (mode == INTEGER);
1253 mat = (*tb)->u.data;
1256 /* Calculate column totals. */
1257 for (c = 0; c < n_cols; c++)
1260 double *cp = &mat[c];
1262 for (r = 0; r < n_rows; r++)
1263 cum += cp[r * n_cols];
1271 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1272 ns_cols += *cp != 0.;
1275 /* Calculate row totals. */
1278 double *rp = row_tot;
1281 for (ns_rows = 0, r = n_rows; r--; )
1284 for (c = n_cols; c--; )
1292 /* Calculate grand total. */
1298 if (n_rows < n_cols)
1299 tp = row_tot, n = n_rows;
1301 tp = col_tot, n = n_cols;
1307 /* Find the first variable that differs from the last subtable,
1308 then display the values of the dimensioning variables for
1309 each table that needs it. */
1311 int first_difference = nvar - 1;
1314 for (; ; first_difference--)
1316 assert (first_difference >= 2);
1317 if (memcmp (&cmp->values[first_difference],
1318 &(*tb)->values[first_difference],
1319 sizeof *cmp->values))
1325 display_dimensions (table, first_difference, *tb);
1327 display_dimensions (chisq, first_difference, *tb);
1329 display_dimensions (sym, first_difference, *tb);
1331 display_dimensions (risk, first_difference, *tb);
1333 display_dimensions (direct, first_difference, *tb);
1337 display_crosstabulation ();
1338 if (cmd.miss == CRS_REPORT)
1343 display_symmetric ();
1347 display_directional ();
1358 tab_resize (chisq, 4 + (nvar - 2), -1);
1369 /* Delete missing rows and columns for statistical analysis when
1372 delete_missing (void)
1377 for (r = 0; r < n_rows; r++)
1378 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1382 for (c = 0; c < n_cols; c++)
1383 mat[c + r * n_cols] = 0.;
1391 for (c = 0; c < n_cols; c++)
1392 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1396 for (r = 0; r < n_rows; r++)
1397 mat[c + r * n_cols] = 0.;
1403 /* Prepare table T for submission, and submit it. */
1405 submit (struct tab_table *t)
1412 tab_resize (t, -1, 0);
1413 if (tab_nr (t) == tab_t (t))
1418 tab_offset (t, 0, 0);
1420 for (i = 2; i < nvar; i++)
1421 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1422 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1423 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1424 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1426 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1428 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1429 tab_dim (t, crosstabs_dim);
1433 /* Sets the widths of all the columns and heights of all the rows in
1434 table T for driver D. */
1436 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1440 /* Width of a numerical column. */
1441 int c = outp_string_width (d, "0.000000");
1442 if (cmd.miss == CRS_REPORT)
1443 c += outp_string_width (d, "M");
1445 /* Set width for header columns. */
1448 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1450 if (w < d->prop_em_width * 8)
1451 w = d->prop_em_width * 8;
1453 if (w > d->prop_em_width * 15)
1454 w = d->prop_em_width * 15;
1456 for (i = 0; i < t->l; i++)
1460 for (i = t->l; i < t->nc; i++)
1463 for (i = 0; i < t->nr; i++)
1464 t->h[i] = tab_natural_height (t, d, i);
1467 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1468 int *cnt, int pivot);
1469 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1470 int *cnt, int pivot);
1472 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1474 static struct table_entry **
1475 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1477 return (mode == GENERAL
1478 ? find_pivot_extent_general (tp, cnt, pivot)
1479 : find_pivot_extent_integer (tp, cnt, pivot));
1482 /* Find the extent of a region in TP that contains one table. If
1483 PIVOT != 0 that means a set of table entries with identical table
1484 number; otherwise they also have to have the same values for every
1485 dimension after the row and column dimensions. The table that is
1486 searched starts at TP and has length CNT. Returns the first entry
1487 after the last one in the table; sets *CNT to the number of
1488 remaining values. If there are no entries in TP at all, returns
1489 NULL. A yucky interface, admittedly, but it works. */
1490 static struct table_entry **
1491 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1493 struct table_entry *fp = *tp;
1498 x = xtab[(*tp)->table];
1506 if ((*tp)->table != fp->table)
1511 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1518 /* Integer mode correspondent to find_pivot_extent_general(). This
1519 could be optimized somewhat, but I just don't give a crap about
1520 CROSSTABS performance in integer mode, which is just a
1521 CROSSTABS wart as far as I'm concerned.
1523 That said, feel free to send optimization patches to me. */
1524 static struct table_entry **
1525 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1527 struct table_entry *fp = *tp;
1532 x = xtab[(*tp)->table];
1540 if ((*tp)->table != fp->table)
1545 if (memcmp (&(*tp)->values[2], &fp->values[2],
1546 sizeof (union value) * (x->nvar - 2)))
1553 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1554 result. WIDTH_ points to an int which is either 0 for a
1555 numeric value or a string width for a string value. */
1557 compare_value (const void *a_, const void *b_, void *width_)
1559 const union value *a = a_;
1560 const union value *b = b_;
1561 const int *pwidth = width_;
1562 const int width = *pwidth;
1565 return (a->f < b->f) ? -1 : (a->f > b->f);
1567 return strncmp (a->s, b->s, width);
1570 /* Given an array of ENTRY_CNT table_entry structures starting at
1571 ENTRIES, creates a sorted list of the values that the variable
1572 with index VAR_INDEX takes on. The values are returned as a
1573 malloc()'darray stored in *VALUES, with the number of values
1574 stored in *VALUE_CNT.
1577 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1578 union value **values, int *value_cnt)
1580 if (mode == GENERAL)
1582 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1585 *values = xmalloc (sizeof **values * entry_cnt);
1586 for (i = 0; i < entry_cnt; i++)
1587 (*values)[i] = entries[i]->values[var_idx];
1588 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1589 compare_value, &width);
1593 struct crosstab_proc *crs
1594 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1597 assert (mode == INTEGER);
1598 *values = xmalloc (sizeof **values * crs->count);
1599 for (i = 0; i < crs->count; i++)
1600 (*values)[i].f = i + crs->min;
1601 *value_cnt = crs->count;
1605 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1606 from V, displayed with print format spec from variable VAR. When
1607 in REPORT missing-value mode, missing values have an M appended. */
1609 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1610 const union value *v, const struct variable *var)
1612 struct len_string s;
1614 const char *label = val_labs_find (var->val_labs, *v);
1617 tab_text (table, c, r, TAB_LEFT, label);
1621 s.string = tab_alloc (table, var->print.w);
1622 format_short (s.string, &var->print, v);
1623 s.length = strlen (s.string);
1624 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1625 s.string[s.length++] = 'M';
1626 while (s.length && *s.string == ' ')
1631 tab_raw (table, c, r, opt, &s);
1634 /* Draws a line across TABLE at the current row to indicate the most
1635 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1636 that changed, and puts the values that changed into the table. TB
1637 and X must be the corresponding table_entry and crosstab,
1640 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1642 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1644 for (; first_difference >= 2; first_difference--)
1645 table_value_missing (table, nvar - first_difference - 1, 0,
1646 TAB_RIGHT, &tb->values[first_difference],
1647 x->vars[first_difference]);
1650 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1651 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1652 additionally suffixed with a letter `M'. */
1654 format_cell_entry (struct tab_table *table, int c, int r, double value,
1655 char suffix, int mark_missing)
1657 const struct fmt_spec f = {FMT_F, 10, 1};
1659 struct len_string s;
1662 s.string = tab_alloc (table, 16);
1664 data_out (s.string, &f, &v);
1665 while (*s.string == ' ')
1671 s.string[s.length++] = suffix;
1673 s.string[s.length++] = 'M';
1675 tab_raw (table, c, r, TAB_RIGHT, &s);
1678 /* Displays the crosstabulation table. */
1680 display_crosstabulation (void)
1685 for (r = 0; r < n_rows; r++)
1686 table_value_missing (table, nvar - 2, r * num_cells,
1687 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1689 tab_text (table, nvar - 2, n_rows * num_cells,
1690 TAB_LEFT, _("Total"));
1692 /* Put in the actual cells. */
1697 tab_offset (table, nvar - 1, -1);
1698 for (r = 0; r < n_rows; r++)
1701 tab_hline (table, TAL_1, -1, n_cols, 0);
1702 for (c = 0; c < n_cols; c++)
1704 int mark_missing = 0;
1705 double expected_value = row_tot[r] * col_tot[c] / W;
1706 if (cmd.miss == CRS_REPORT
1707 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1708 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1710 for (i = 0; i < num_cells; i++)
1721 v = *mp / row_tot[r] * 100.;
1725 v = *mp / col_tot[c] * 100.;
1732 case CRS_CL_EXPECTED:
1735 case CRS_CL_RESIDUAL:
1736 v = *mp - expected_value;
1738 case CRS_CL_SRESIDUAL:
1739 v = (*mp - expected_value) / sqrt (expected_value);
1741 case CRS_CL_ASRESIDUAL:
1742 v = ((*mp - expected_value)
1743 / sqrt (expected_value
1744 * (1. - row_tot[r] / W)
1745 * (1. - col_tot[c] / W)));
1752 format_cell_entry (table, c, i, v, suffix, mark_missing);
1758 tab_offset (table, -1, tab_row (table) + num_cells);
1766 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1767 for (r = 0; r < n_rows; r++)
1770 int mark_missing = 0;
1772 if (cmd.miss == CRS_REPORT
1773 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1776 for (i = 0; i < num_cells; i++)
1790 v = row_tot[r] / W * 100.;
1794 v = row_tot[r] / W * 100.;
1797 case CRS_CL_EXPECTED:
1798 case CRS_CL_RESIDUAL:
1799 case CRS_CL_SRESIDUAL:
1800 case CRS_CL_ASRESIDUAL:
1808 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1809 tab_next_row (table);
1814 /* Column totals, grand total. */
1820 tab_hline (table, TAL_1, -1, n_cols, 0);
1821 for (c = 0; c <= n_cols; c++)
1823 double ct = c < n_cols ? col_tot[c] : W;
1824 int mark_missing = 0;
1828 if (cmd.miss == CRS_REPORT && c < n_cols
1829 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1832 for (i = 0; i < num_cells; i++)
1854 case CRS_CL_EXPECTED:
1855 case CRS_CL_RESIDUAL:
1856 case CRS_CL_SRESIDUAL:
1857 case CRS_CL_ASRESIDUAL:
1864 format_cell_entry (table, c, i, v, suffix, mark_missing);
1869 tab_offset (table, -1, tab_row (table) + last_row);
1872 tab_offset (table, 0, -1);
1875 static void calc_r (double *X, double *Y, double *, double *, double *);
1876 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1878 /* Display chi-square statistics. */
1880 display_chisq (void)
1882 static const char *chisq_stats[N_CHISQ] =
1884 N_("Pearson Chi-Square"),
1885 N_("Likelihood Ratio"),
1886 N_("Fisher's Exact Test"),
1887 N_("Continuity Correction"),
1888 N_("Linear-by-Linear Association"),
1890 double chisq_v[N_CHISQ];
1891 double fisher1, fisher2;
1897 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1899 tab_offset (chisq, nvar - 2, -1);
1901 for (i = 0; i < N_CHISQ; i++)
1903 if ((i != 2 && chisq_v[i] == SYSMIS)
1904 || (i == 2 && fisher1 == SYSMIS))
1908 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1911 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1912 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1913 tab_float (chisq, 3, 0, TAB_RIGHT,
1914 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1919 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1920 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1922 tab_next_row (chisq);
1925 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1926 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1927 tab_next_row (chisq);
1929 tab_offset (chisq, 0, -1);
1932 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1933 double[N_SYMMETRIC]);
1935 /* Display symmetric measures. */
1937 display_symmetric (void)
1939 static const char *categories[] =
1941 N_("Nominal by Nominal"),
1942 N_("Ordinal by Ordinal"),
1943 N_("Interval by Interval"),
1944 N_("Measure of Agreement"),
1947 static const char *stats[N_SYMMETRIC] =
1951 N_("Contingency Coefficient"),
1952 N_("Kendall's tau-b"),
1953 N_("Kendall's tau-c"),
1955 N_("Spearman Correlation"),
1960 static const int stats_categories[N_SYMMETRIC] =
1962 0, 0, 0, 1, 1, 1, 1, 2, 3,
1966 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
1969 if (!calc_symmetric (sym_v, sym_ase, sym_t))
1972 tab_offset (sym, nvar - 2, -1);
1974 for (i = 0; i < N_SYMMETRIC; i++)
1976 if (sym_v[i] == SYSMIS)
1979 if (stats_categories[i] != last_cat)
1981 last_cat = stats_categories[i];
1982 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
1985 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
1986 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
1987 if (sym_ase[i] != SYSMIS)
1988 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
1989 if (sym_t[i] != SYSMIS)
1990 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
1991 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
1995 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1996 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
1999 tab_offset (sym, 0, -1);
2002 static int calc_risk (double[], double[], double[], union value *);
2004 /* Display risk estimate. */
2009 double risk_v[3], lower[3], upper[3];
2013 if (!calc_risk (risk_v, upper, lower, c))
2016 tab_offset (risk, nvar - 2, -1);
2018 for (i = 0; i < 3; i++)
2020 if (risk_v[i] == SYSMIS)
2026 if (x->vars[COL_VAR]->type == NUMERIC)
2027 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2028 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2030 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2031 x->vars[COL_VAR]->name,
2032 x->vars[COL_VAR]->width, c[0].s,
2033 x->vars[COL_VAR]->width, c[1].s);
2037 if (x->vars[ROW_VAR]->type == NUMERIC)
2038 sprintf (buf, _("For cohort %s = %g"),
2039 x->vars[ROW_VAR]->name, rows[i - 1].f);
2041 sprintf (buf, _("For cohort %s = %.*s"),
2042 x->vars[ROW_VAR]->name,
2043 x->vars[ROW_VAR]->width, rows[i - 1].s);
2047 tab_text (risk, 0, 0, TAB_LEFT, buf);
2048 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2049 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2050 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2051 tab_next_row (risk);
2054 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2055 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2056 tab_next_row (risk);
2058 tab_offset (risk, 0, -1);
2061 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2062 double[N_DIRECTIONAL]);
2064 /* Display directional measures. */
2066 display_directional (void)
2068 static const char *categories[] =
2070 N_("Nominal by Nominal"),
2071 N_("Ordinal by Ordinal"),
2072 N_("Nominal by Interval"),
2075 static const char *stats[] =
2078 N_("Goodman and Kruskal tau"),
2079 N_("Uncertainty Coefficient"),
2084 static const char *types[] =
2091 static const int stats_categories[N_DIRECTIONAL] =
2093 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2096 static const int stats_stats[N_DIRECTIONAL] =
2098 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2101 static const int stats_types[N_DIRECTIONAL] =
2103 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2106 static const int *stats_lookup[] =
2113 static const char **stats_names[] =
2125 double direct_v[N_DIRECTIONAL];
2126 double direct_ase[N_DIRECTIONAL];
2127 double direct_t[N_DIRECTIONAL];
2131 if (!calc_directional (direct_v, direct_ase, direct_t))
2134 tab_offset (direct, nvar - 2, -1);
2136 for (i = 0; i < N_DIRECTIONAL; i++)
2138 if (direct_v[i] == SYSMIS)
2144 for (j = 0; j < 3; j++)
2145 if (last[j] != stats_lookup[j][i])
2148 tab_hline (direct, TAL_1, j, 6, 0);
2153 int k = last[j] = stats_lookup[j][i];
2158 string = x->vars[0]->name;
2160 string = x->vars[1]->name;
2162 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2163 gettext (stats_names[j][k]), string);
2168 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2169 if (direct_ase[i] != SYSMIS)
2170 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2171 if (direct_t[i] != SYSMIS)
2172 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2173 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2174 tab_next_row (direct);
2177 tab_offset (direct, 0, -1);
2180 /* Statistical calculations. */
2182 /* Returns the value of the gamma (factorial) function for an integer
2185 gamma_int (double x)
2190 for (i = 2; i < x; i++)
2195 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2197 static inline double
2198 Pr (int a, int b, int c, int d)
2200 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2201 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2202 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2203 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2204 / gamma_int (a + b + c + d + 1.));
2207 /* Swap the contents of A and B. */
2209 swap (int *a, int *b)
2216 /* Calculate significance for Fisher's exact test as specified in
2217 _SPSS Statistical Algorithms_, Appendix 5. */
2219 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2223 if (min (c, d) < min (a, b))
2224 swap (&a, &c), swap (&b, &d);
2225 if (min (b, d) < min (a, c))
2226 swap (&a, &b), swap (&c, &d);
2230 swap (&a, &b), swap (&c, &d);
2232 swap (&a, &c), swap (&b, &d);
2236 for (x = 0; x <= a; x++)
2237 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2239 *fisher2 = *fisher1;
2240 for (x = 1; x <= b; x++)
2241 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2244 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2245 columns with values COLS and N_ROWS rows with values ROWS. Values
2246 in the matrix sum to W. */
2248 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2249 double *fisher1, double *fisher2)
2253 chisq[0] = chisq[1] = 0.;
2254 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2255 *fisher1 = *fisher2 = SYSMIS;
2257 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2259 if (ns_rows <= 1 || ns_cols <= 1)
2261 chisq[0] = chisq[1] = SYSMIS;
2265 for (r = 0; r < n_rows; r++)
2266 for (c = 0; c < n_cols; c++)
2268 const double expected = row_tot[r] * col_tot[c] / W;
2269 const double freq = mat[n_cols * r + c];
2270 const double residual = freq - expected;
2272 chisq[0] += residual * residual / expected;
2274 chisq[1] += freq * log (expected / freq);
2285 /* Calculate Yates and Fisher exact test. */
2286 if (ns_cols == 2 && ns_rows == 2)
2288 double f11, f12, f21, f22;
2294 for (i = j = 0; i < n_cols; i++)
2295 if (col_tot[i] != 0.)
2304 f11 = mat[nz_cols[0]];
2305 f12 = mat[nz_cols[1]];
2306 f21 = mat[nz_cols[0] + n_cols];
2307 f22 = mat[nz_cols[1] + n_cols];
2312 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2315 chisq[3] = (W * x * x
2316 / (f11 + f12) / (f21 + f22)
2317 / (f11 + f21) / (f12 + f22));
2325 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2326 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2329 /* Calculate Mantel-Haenszel. */
2330 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2332 double r, ase_0, ase_1;
2333 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2335 chisq[4] = (W - 1.) * r * r;
2340 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2341 ASE_1, and ase_0 into ASE_0. The row and column values must be
2342 passed in X and Y. */
2344 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2346 double SX, SY, S, T;
2348 double sum_XYf, sum_X2Y2f;
2349 double sum_Xr, sum_X2r;
2350 double sum_Yc, sum_Y2c;
2353 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2354 for (j = 0; j < n_cols; j++)
2356 double fij = mat[j + i * n_cols];
2357 double product = X[i] * Y[j];
2358 double temp = fij * product;
2360 sum_X2Y2f += temp * product;
2363 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2365 sum_Xr += X[i] * row_tot[i];
2366 sum_X2r += X[i] * X[i] * row_tot[i];
2370 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2372 sum_Yc += Y[i] * col_tot[i];
2373 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2377 S = sum_XYf - sum_Xr * sum_Yc / W;
2378 SX = sum_X2r - sum_Xr * sum_Xr / W;
2379 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2382 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2387 for (s = c = 0., i = 0; i < n_rows; i++)
2388 for (j = 0; j < n_cols; j++)
2390 double Xresid, Yresid;
2393 Xresid = X[i] - Xbar;
2394 Yresid = Y[j] - Ybar;
2395 temp = (T * Xresid * Yresid
2397 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2398 y = mat[j + i * n_cols] * temp * temp - c;
2403 *ase_1 = sqrt (s) / (T * T);
2407 static double somers_d_v[3];
2408 static double somers_d_ase[3];
2409 static double somers_d_t[3];
2411 /* Calculate symmetric statistics and their asymptotic standard
2412 errors. Returns 0 if none could be calculated. */
2414 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2415 double t[N_SYMMETRIC])
2417 int q = min (ns_rows, ns_cols);
2426 for (i = 0; i < N_SYMMETRIC; i++)
2427 v[i] = ase[i] = t[i] = SYSMIS;
2430 /* Phi, Cramer's V, contingency coefficient. */
2431 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2433 double Xp = 0.; /* Pearson chi-square. */
2438 for (r = 0; r < n_rows; r++)
2439 for (c = 0; c < n_cols; c++)
2441 const double expected = row_tot[r] * col_tot[c] / W;
2442 const double freq = mat[n_cols * r + c];
2443 const double residual = freq - expected;
2445 Xp += residual * residual / expected;
2449 if (cmd.a_statistics[CRS_ST_PHI])
2451 v[0] = sqrt (Xp / W);
2452 v[1] = sqrt (Xp / (W * (q - 1)));
2454 if (cmd.a_statistics[CRS_ST_CC])
2455 v[2] = sqrt (Xp / (Xp + W));
2458 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2459 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2464 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2471 for (r = 0; r < n_rows; r++)
2472 Dr -= row_tot[r] * row_tot[r];
2473 for (c = 0; c < n_cols; c++)
2474 Dc -= col_tot[c] * col_tot[c];
2480 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2481 for (c = 0; c < n_cols; c++)
2485 for (r = 0; r < n_rows; r++)
2486 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2496 for (i = 0; i < n_rows; i++)
2500 for (j = 1; j < n_cols; j++)
2501 Cij += col_tot[j] - cum[j + i * n_cols];
2504 for (j = 1; j < n_cols; j++)
2505 Dij += cum[j + (i - 1) * n_cols];
2509 double fij = mat[j + i * n_cols];
2515 assert (j < n_cols);
2517 Cij -= col_tot[j] - cum[j + i * n_cols];
2518 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2522 Cij += cum[j - 1 + (i - 1) * n_cols];
2523 Dij -= cum[j + (i - 1) * n_cols];
2529 if (cmd.a_statistics[CRS_ST_BTAU])
2530 v[3] = (P - Q) / sqrt (Dr * Dc);
2531 if (cmd.a_statistics[CRS_ST_CTAU])
2532 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2533 if (cmd.a_statistics[CRS_ST_GAMMA])
2534 v[5] = (P - Q) / (P + Q);
2536 /* ASE for tau-b, tau-c, gamma. Calculations could be
2537 eliminated here, at expense of memory. */
2542 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2543 for (i = 0; i < n_rows; i++)
2547 for (j = 1; j < n_cols; j++)
2548 Cij += col_tot[j] - cum[j + i * n_cols];
2551 for (j = 1; j < n_cols; j++)
2552 Dij += cum[j + (i - 1) * n_cols];
2556 double fij = mat[j + i * n_cols];
2558 if (cmd.a_statistics[CRS_ST_BTAU])
2560 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2561 + v[3] * (row_tot[i] * Dc
2562 + col_tot[j] * Dr));
2563 btau_cum += fij * temp * temp;
2567 const double temp = Cij - Dij;
2568 ctau_cum += fij * temp * temp;
2571 if (cmd.a_statistics[CRS_ST_GAMMA])
2573 const double temp = Q * Cij - P * Dij;
2574 gamma_cum += fij * temp * temp;
2577 if (cmd.a_statistics[CRS_ST_D])
2579 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2580 - (P - Q) * (W - row_tot[i]));
2581 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2582 - (Q - P) * (W - col_tot[j]));
2587 assert (j < n_cols);
2589 Cij -= col_tot[j] - cum[j + i * n_cols];
2590 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2594 Cij += cum[j - 1 + (i - 1) * n_cols];
2595 Dij -= cum[j + (i - 1) * n_cols];
2601 btau_var = ((btau_cum
2602 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2604 if (cmd.a_statistics[CRS_ST_BTAU])
2606 ase[3] = sqrt (btau_var);
2607 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2610 if (cmd.a_statistics[CRS_ST_CTAU])
2612 ase[4] = ((2 * q / ((q - 1) * W * W))
2613 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2614 t[4] = v[4] / ase[4];
2616 if (cmd.a_statistics[CRS_ST_GAMMA])
2618 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2619 t[5] = v[5] / (2. / (P + Q)
2620 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2622 if (cmd.a_statistics[CRS_ST_D])
2624 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2625 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2626 somers_d_t[0] = (somers_d_v[0]
2628 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2629 somers_d_v[1] = (P - Q) / Dc;
2630 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2631 somers_d_t[1] = (somers_d_v[1]
2633 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2634 somers_d_v[2] = (P - Q) / Dr;
2635 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2636 somers_d_t[2] = (somers_d_v[2]
2638 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2644 /* Spearman correlation, Pearson's r. */
2645 if (cmd.a_statistics[CRS_ST_CORR])
2647 double *R = local_alloc (sizeof *R * n_rows);
2648 double *C = local_alloc (sizeof *C * n_cols);
2651 double y, t, c = 0., s = 0.;
2656 R[i] = s + (row_tot[i] + 1.) / 2.;
2663 assert (i < n_rows);
2668 double y, t, c = 0., s = 0.;
2673 C[j] = s + (col_tot[j] + 1.) / 2;
2680 assert (j < n_cols);
2684 calc_r (R, C, &v[6], &t[6], &ase[6]);
2690 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2694 /* Cohen's kappa. */
2695 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2697 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2700 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2701 i < ns_rows; i++, j++)
2705 while (col_tot[j] == 0.)
2708 prod = row_tot[i] * col_tot[j];
2709 sum = row_tot[i] + col_tot[j];
2711 sum_fii += mat[j + i * n_cols];
2713 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2714 sum_riciri_ci += prod * sum;
2716 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2717 for (j = 0; j < ns_cols; j++)
2719 double sum = row_tot[i] + col_tot[j];
2720 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2723 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2725 ase[8] = sqrt ((W * W * sum_rici
2726 + sum_rici * sum_rici
2727 - W * sum_riciri_ci)
2728 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2730 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2731 / pow2 (W * W - sum_rici))
2732 + ((2. * (W - sum_fii)
2733 * (2. * sum_fii * sum_rici
2734 - W * sum_fiiri_ci))
2735 / cube (W * W - sum_rici))
2736 + (pow2 (W - sum_fii)
2737 * (W * sum_fijri_ci2 - 4.
2738 * sum_rici * sum_rici)
2739 / pow4 (W * W - sum_rici))));
2741 t[8] = v[8] / ase[8];
2748 /* Calculate risk estimate. */
2750 calc_risk (double *value, double *upper, double *lower, union value *c)
2752 double f11, f12, f21, f22;
2758 for (i = 0; i < 3; i++)
2759 value[i] = upper[i] = lower[i] = SYSMIS;
2762 if (ns_rows != 2 || ns_cols != 2)
2769 for (i = j = 0; i < n_cols; i++)
2770 if (col_tot[i] != 0.)
2779 f11 = mat[nz_cols[0]];
2780 f12 = mat[nz_cols[1]];
2781 f21 = mat[nz_cols[0] + n_cols];
2782 f22 = mat[nz_cols[1] + n_cols];
2784 c[0] = cols[nz_cols[0]];
2785 c[1] = cols[nz_cols[1]];
2788 value[0] = (f11 * f22) / (f12 * f21);
2789 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2790 lower[0] = value[0] * exp (-1.960 * v);
2791 upper[0] = value[0] * exp (1.960 * v);
2793 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2794 v = sqrt ((f12 / (f11 * (f11 + f12)))
2795 + (f22 / (f21 * (f21 + f22))));
2796 lower[1] = value[1] * exp (-1.960 * v);
2797 upper[1] = value[1] * exp (1.960 * v);
2799 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2800 v = sqrt ((f11 / (f12 * (f11 + f12)))
2801 + (f21 / (f22 * (f21 + f22))));
2802 lower[2] = value[2] * exp (-1.960 * v);
2803 upper[2] = value[2] * exp (1.960 * v);
2808 /* Calculate directional measures. */
2810 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2811 double t[N_DIRECTIONAL])
2816 for (i = 0; i < N_DIRECTIONAL; i++)
2817 v[i] = ase[i] = t[i] = SYSMIS;
2821 if (cmd.a_statistics[CRS_ST_LAMBDA])
2823 double *fim = xmalloc (sizeof *fim * n_rows);
2824 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2825 double *fmj = xmalloc (sizeof *fmj * n_cols);
2826 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2827 double sum_fim, sum_fmj;
2829 int rm_index, cm_index;
2832 /* Find maximum for each row and their sum. */
2833 for (sum_fim = 0., i = 0; i < n_rows; i++)
2835 double max = mat[i * n_cols];
2838 for (j = 1; j < n_cols; j++)
2839 if (mat[j + i * n_cols] > max)
2841 max = mat[j + i * n_cols];
2845 sum_fim += fim[i] = max;
2846 fim_index[i] = index;
2849 /* Find maximum for each column. */
2850 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2852 double max = mat[j];
2855 for (i = 1; i < n_rows; i++)
2856 if (mat[j + i * n_cols] > max)
2858 max = mat[j + i * n_cols];
2862 sum_fmj += fmj[j] = max;
2863 fmj_index[j] = index;
2866 /* Find maximum row total. */
2869 for (i = 1; i < n_rows; i++)
2870 if (row_tot[i] > rm)
2876 /* Find maximum column total. */
2879 for (j = 1; j < n_cols; j++)
2880 if (col_tot[j] > cm)
2886 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2887 v[1] = (sum_fmj - rm) / (W - rm);
2888 v[2] = (sum_fim - cm) / (W - cm);
2890 /* ASE1 for Y given X. */
2894 for (accum = 0., i = 0; i < n_rows; i++)
2895 for (j = 0; j < n_cols; j++)
2897 const int deltaj = j == cm_index;
2898 accum += (mat[j + i * n_cols]
2899 * pow2 ((j == fim_index[i])
2904 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2907 /* ASE0 for Y given X. */
2911 for (accum = 0., i = 0; i < n_rows; i++)
2912 if (cm_index != fim_index[i])
2913 accum += (mat[i * n_cols + fim_index[i]]
2914 + mat[i * n_cols + cm_index]);
2915 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2918 /* ASE1 for X given Y. */
2922 for (accum = 0., i = 0; i < n_rows; i++)
2923 for (j = 0; j < n_cols; j++)
2925 const int deltaj = i == rm_index;
2926 accum += (mat[j + i * n_cols]
2927 * pow2 ((i == fmj_index[j])
2932 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2935 /* ASE0 for X given Y. */
2939 for (accum = 0., j = 0; j < n_cols; j++)
2940 if (rm_index != fmj_index[j])
2941 accum += (mat[j + n_cols * fmj_index[j]]
2942 + mat[j + n_cols * rm_index]);
2943 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2946 /* Symmetric ASE0 and ASE1. */
2951 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2952 for (j = 0; j < n_cols; j++)
2954 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2955 int temp1 = (i == rm_index) + (j == cm_index);
2956 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
2957 accum1 += (mat[j + i * n_cols]
2958 * pow2 (temp0 + (v[0] - 1.) * temp1));
2960 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
2961 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
2962 / (2. * W - rm - cm));
2971 double sum_fij2_ri, sum_fij2_ci;
2972 double sum_ri2, sum_cj2;
2974 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
2975 for (j = 0; j < n_cols; j++)
2977 double temp = pow2 (mat[j + i * n_cols]);
2978 sum_fij2_ri += temp / row_tot[i];
2979 sum_fij2_ci += temp / col_tot[j];
2982 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
2983 sum_ri2 += row_tot[i] * row_tot[i];
2985 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
2986 sum_cj2 += col_tot[j] * col_tot[j];
2988 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
2989 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
2993 if (cmd.a_statistics[CRS_ST_UC])
2995 double UX, UY, UXY, P;
2996 double ase1_yx, ase1_xy, ase1_sym;
2999 for (UX = 0., i = 0; i < n_rows; i++)
3000 if (row_tot[i] > 0.)
3001 UX -= row_tot[i] / W * log (row_tot[i] / W);
3003 for (UY = 0., j = 0; j < n_cols; j++)
3004 if (col_tot[j] > 0.)
3005 UY -= col_tot[j] / W * log (col_tot[j] / W);
3007 for (UXY = P = 0., i = 0; i < n_rows; i++)
3008 for (j = 0; j < n_cols; j++)
3010 double entry = mat[j + i * n_cols];
3015 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3016 UXY -= entry / W * log (entry / W);
3019 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3020 for (j = 0; j < n_cols; j++)
3022 double entry = mat[j + i * n_cols];
3027 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3028 + (UX - UXY) * log (col_tot[j] / W));
3029 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3030 + (UY - UXY) * log (row_tot[i] / W));
3031 ase1_sym += entry * pow2 ((UXY
3032 * log (row_tot[i] * col_tot[j] / (W * W)))
3033 - (UX + UY) * log (entry / W));
3036 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3037 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3038 t[5] = v[5] / ((2. / (W * (UX + UY)))
3039 * sqrt (P - pow2 (UX + UY - UXY) / W));
3041 v[6] = (UX + UY - UXY) / UX;
3042 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3043 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3045 v[7] = (UX + UY - UXY) / UY;
3046 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3047 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3051 if (cmd.a_statistics[CRS_ST_D])
3056 calc_symmetric (NULL, NULL, NULL);
3057 for (i = 0; i < 3; i++)
3059 v[8 + i] = somers_d_v[i];
3060 ase[8 + i] = somers_d_ase[i];
3061 t[8 + i] = somers_d_t[i];
3066 if (cmd.a_statistics[CRS_ST_ETA])
3069 double sum_Xr, sum_X2r;
3073 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3075 sum_Xr += rows[i].f * row_tot[i];
3076 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3078 SX = sum_X2r - sum_Xr * sum_Xr / W;
3080 for (SXW = 0., j = 0; j < n_cols; j++)
3084 for (cum = 0., i = 0; i < n_rows; i++)
3086 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3087 cum += rows[i].f * mat[j + i * n_cols];
3090 SXW -= cum * cum / col_tot[j];
3092 v[11] = sqrt (1. - SXW / SX);
3096 double sum_Yc, sum_Y2c;
3100 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3102 sum_Yc += cols[i].f * col_tot[i];
3103 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3105 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3107 for (SYW = 0., i = 0; i < n_rows; i++)
3111 for (cum = 0., j = 0; j < n_cols; j++)
3113 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3114 cum += cols[j].f * mat[j + i * n_cols];
3117 SYW -= cum * cum / row_tot[i];
3119 v[12] = sqrt (1. - SYW / SY);
3126 /* A wrapper around data_out() that limits string output to short
3127 string width and null terminates the result. */
3129 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3131 struct fmt_spec fmt_subst;
3133 /* Limit to short string width. */
3134 if (formats[fp->type].cat & FCAT_STRING)
3138 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3139 if (fmt_subst.type == FMT_A)
3140 fmt_subst.w = min (8, fmt_subst.w);
3142 fmt_subst.w = min (16, fmt_subst.w);
3148 data_out (s, fp, v);
3150 /* Null terminate. */