1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
41 #include "dictionary.h"
52 #include "value-labels.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) msgid
62 #include "debug-print.h"
68 +missing=miss:!table/include/report;
69 +write[wr_]=none,cells,all;
70 +format=fmt:!labels/nolabels/novallabs,
73 tabl:!tables/notables,
76 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
78 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
84 /* Number of chi-square statistics. */
87 /* Number of symmetric statistics. */
90 /* Number of directional statistics. */
91 #define N_DIRECTIONAL 13
93 /* A single table entry for general mode. */
96 int table; /* Flattened table number. */
99 double freq; /* Frequency count. */
100 double *data; /* Crosstabulation table for integer mode. */
103 union value values[1]; /* Values. */
106 /* A crosstabulation. */
109 int nvar; /* Number of variables. */
110 double missing; /* Missing cases count. */
111 int ofs; /* Integer mode: Offset into sorted_tab[]. */
112 struct variable *vars[2]; /* At least two variables; sorted by
113 larger indices first. */
116 /* Integer mode variable info. */
119 int min; /* Minimum value. */
120 int max; /* Maximum value + 1. */
121 int count; /* max - min. */
124 static inline struct var_range *
125 get_var_range (struct variable *v)
128 assert (v->aux != NULL);
132 /* Indexes into crosstab.v. */
139 /* General mode crosstabulation table. */
140 static struct hsh_table *gen_tab; /* Hash table. */
141 static int n_sorted_tab; /* Number of entries in sorted_tab. */
142 static struct table_entry **sorted_tab; /* Sorted table. */
144 /* Variables specifies on VARIABLES. */
145 static struct variable **variables;
146 static size_t variables_cnt;
149 static struct crosstab **xtab;
152 /* Integer or general mode? */
161 static int num_cells; /* Number of cells requested. */
162 static int cells[8]; /* Cells requested. */
165 static int write; /* One of WR_* that specifies the WRITE style. */
167 /* Command parsing info. */
168 static struct cmd_crosstabs cmd;
171 static struct pool *pl_tc; /* For table cells. */
172 static struct pool *pl_col; /* For column data. */
174 static int internal_cmd_crosstabs (void);
175 static void precalc (void *);
176 static int calc_general (struct ccase *, void *);
177 static int calc_integer (struct ccase *, void *);
178 static void postcalc (void *);
179 static void submit (struct tab_table *);
181 static void format_short (char *s, const struct fmt_spec *fp,
182 const union value *v);
184 /* Parse and execute CROSSTABS, then clean up. */
188 int result = internal_cmd_crosstabs ();
191 pool_destroy (pl_tc);
192 pool_destroy (pl_col);
197 /* Parses and executes the CROSSTABS procedure. */
199 internal_cmd_crosstabs (void)
207 pl_tc = pool_create ();
208 pl_col = pool_create ();
210 if (!parse_crosstabs (&cmd))
213 mode = variables ? INTEGER : GENERAL;
218 cmd.a_cells[CRS_CL_COUNT] = 1;
224 for (i = 0; i < CRS_CL_count; i++)
229 cmd.a_cells[CRS_CL_COUNT] = 1;
230 cmd.a_cells[CRS_CL_ROW] = 1;
231 cmd.a_cells[CRS_CL_COLUMN] = 1;
232 cmd.a_cells[CRS_CL_TOTAL] = 1;
234 if (cmd.a_cells[CRS_CL_ALL])
236 for (i = 0; i < CRS_CL_count; i++)
238 cmd.a_cells[CRS_CL_ALL] = 0;
240 cmd.a_cells[CRS_CL_NONE] = 0;
242 for (num_cells = i = 0; i < CRS_CL_count; i++)
244 cells[num_cells++] = i;
247 if (cmd.sbc_statistics)
252 for (i = 0; i < CRS_ST_count; i++)
253 if (cmd.a_statistics[i])
256 cmd.a_statistics[CRS_ST_CHISQ] = 1;
257 if (cmd.a_statistics[CRS_ST_ALL])
258 for (i = 0; i < CRS_ST_count; i++)
259 cmd.a_statistics[i] = 1;
263 if (cmd.miss == CRS_REPORT && mode == GENERAL)
265 msg (SE, _("Missing mode REPORT not allowed in general mode. "
266 "Assuming MISSING=TABLE."));
267 cmd.miss = CRS_TABLE;
271 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
272 cmd.a_write[CRS_WR_ALL] = 0;
273 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
275 msg (SE, _("Write mode ALL not allowed in general mode. "
276 "Assuming WRITE=CELLS."));
277 cmd.a_write[CRS_WR_CELLS] = 1;
280 && (cmd.a_write[CRS_WR_NONE]
281 + cmd.a_write[CRS_WR_ALL]
282 + cmd.a_write[CRS_WR_CELLS] == 0))
283 cmd.a_write[CRS_WR_CELLS] = 1;
284 if (cmd.a_write[CRS_WR_CELLS])
285 write = CRS_WR_CELLS;
286 else if (cmd.a_write[CRS_WR_ALL])
291 procedure_with_splits (precalc,
292 mode == GENERAL ? calc_general : calc_integer,
298 /* Parses the TABLES subcommand. */
300 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
302 struct var_set *var_set;
304 struct variable ***by = NULL;
305 size_t *by_nvar = NULL;
309 /* Ensure that this is a TABLES subcommand. */
310 if (!lex_match_id ("TABLES")
311 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
316 if (variables != NULL)
317 var_set = var_set_create_from_array (variables, variables_cnt);
319 var_set = var_set_create_from_dict (default_dict);
320 assert (var_set != NULL);
324 by = xrealloc (by, sizeof *by * (n_by + 1));
325 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
326 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
327 PV_NO_DUPLICATE | PV_NO_SCRATCH))
329 if (xalloc_oversized (nx, by_nvar[n_by]))
331 msg (SE, _("Too many crosstabulation variables or dimensions."));
337 if (!lex_match (T_BY))
341 lex_error (_("expecting BY"));
350 int *by_iter = xcalloc (n_by, sizeof *by_iter);
353 xtab = xnrealloc (xtab, sizeof *xtab, nxtab + nx);
354 for (i = 0; i < nx; i++)
358 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
365 for (i = 0; i < n_by; i++)
366 x->vars[i] = by[i][by_iter[i]];
372 for (i = n_by - 1; i >= 0; i--)
374 if (++by_iter[i] < by_nvar[i])
387 /* All return paths lead here. */
391 for (i = 0; i < n_by; i++)
397 var_set_destroy (var_set);
402 /* Parses the VARIABLES subcommand. */
404 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
408 msg (SE, _("VARIABLES must be specified before TABLES."));
416 size_t orig_nv = variables_cnt;
421 if (!parse_variables (default_dict, &variables, &variables_cnt,
422 (PV_APPEND | PV_NUMERIC
423 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
428 lex_error ("expecting `('");
433 if (!lex_force_int ())
435 min = lex_integer ();
440 if (!lex_force_int ())
442 max = lex_integer ();
445 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
453 lex_error ("expecting `)'");
458 for (i = orig_nv; i < variables_cnt; i++)
460 struct var_range *vr = xmalloc (sizeof *vr);
463 vr->count = max - min + 1;
464 var_attach_aux (variables[i], vr, var_dtor_free);
479 /* Data file processing. */
481 static int compare_table_entry (const void *, const void *, void *);
482 static unsigned hash_table_entry (const void *, void *);
484 /* Set up the crosstabulation tables for processing. */
486 precalc (void *aux UNUSED)
490 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
500 for (i = 0; i < nxtab; i++)
502 struct crosstab *x = xtab[i];
507 x->ofs = n_sorted_tab;
509 for (j = 2; j < x->nvar; j++)
510 count *= get_var_range (x->vars[j - 2])->count;
512 sorted_tab = xrealloc (sorted_tab,
513 sizeof *sorted_tab * (n_sorted_tab + count));
514 v = local_alloc (sizeof *v * x->nvar);
515 for (j = 2; j < x->nvar; j++)
516 v[j] = get_var_range (x->vars[j])->min;
517 for (j = 0; j < count; j++)
519 struct table_entry *te;
522 te = sorted_tab[n_sorted_tab++]
523 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
527 int row_cnt = get_var_range (x->vars[0])->count;
528 int col_cnt = get_var_range (x->vars[1])->count;
529 const int mat_size = row_cnt * col_cnt;
532 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
533 for (m = 0; m < mat_size; m++)
537 for (k = 2; k < x->nvar; k++)
538 te->values[k].f = v[k];
539 for (k = 2; k < x->nvar; k++)
541 struct var_range *vr = get_var_range (x->vars[k]);
542 if (++v[k] >= vr->max)
551 sorted_tab = xrealloc (sorted_tab,
552 sizeof *sorted_tab * (n_sorted_tab + 1));
553 sorted_tab[n_sorted_tab] = NULL;
557 /* Form crosstabulations for general mode. */
559 calc_general (struct ccase *c, void *aux UNUSED)
564 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
566 /* Flattened current table index. */
569 for (t = 0; t < nxtab; t++)
571 struct crosstab *x = xtab[t];
572 const size_t entry_size = (sizeof (struct table_entry)
573 + sizeof (union value) * (x->nvar - 1));
574 struct table_entry *te = local_alloc (entry_size);
576 /* Construct table entry for the current record and table. */
582 for (j = 0; j < x->nvar; j++)
584 const union value *v = case_data (c, x->vars[j]->fv);
585 const struct missing_values *mv = &x->vars[j]->miss;
586 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
587 || (cmd.miss == CRS_INCLUDE
588 && mv_is_value_system_missing (mv, v)))
590 x->missing += weight;
594 if (x->vars[j]->type == NUMERIC)
595 te->values[j].f = case_num (c, x->vars[j]->fv);
598 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
601 /* Necessary in order to simplify comparisons. */
602 memset (&te->values[j].s[x->vars[j]->width], 0,
603 sizeof (union value) - x->vars[j]->width);
608 /* Add record to hash table. */
610 struct table_entry **tepp
611 = (struct table_entry **) hsh_probe (gen_tab, te);
614 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
617 memcpy (tep, te, entry_size);
622 (*tepp)->u.freq += weight;
633 calc_integer (struct ccase *c, void *aux UNUSED)
638 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
640 /* Flattened current table index. */
643 for (t = 0; t < nxtab; t++)
645 struct crosstab *x = xtab[t];
650 for (i = 0; i < x->nvar; i++)
652 struct variable *const v = x->vars[i];
653 struct var_range *vr = get_var_range (v);
654 double value = case_num (c, v->fv);
656 /* Note that the first test also rules out SYSMIS. */
657 if ((value < vr->min || value >= vr->max)
658 || (cmd.miss == CRS_TABLE
659 && mv_is_num_user_missing (&v->miss, value)))
661 x->missing += weight;
667 ofs += fact * ((int) value - vr->min);
673 struct variable *row_var = x->vars[ROW_VAR];
674 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
676 struct variable *col_var = x->vars[COL_VAR];
677 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
679 const int col_dim = get_var_range (col_var)->count;
681 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
690 /* Compare the table_entry's at A and B and return a strcmp()-type
693 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
695 const struct table_entry *a = a_;
696 const struct table_entry *b = b_;
698 if (a->table > b->table)
700 else if (a->table < b->table)
704 const struct crosstab *x = xtab[a->table];
707 for (i = x->nvar - 1; i >= 0; i--)
708 if (x->vars[i]->type == NUMERIC)
710 const double diffnum = a->values[i].f - b->values[i].f;
713 else if (diffnum > 0)
718 assert (x->vars[i]->type == ALPHA);
720 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
731 /* Calculate a hash value from table_entry A. */
733 hash_table_entry (const void *a_, void *foo UNUSED)
735 const struct table_entry *a = a_;
740 for (i = 0; i < xtab[a->table]->nvar; i++)
741 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
746 /* Post-data reading calculations. */
748 static struct table_entry **find_pivot_extent (struct table_entry **,
749 int *cnt, int pivot);
750 static void enum_var_values (struct table_entry **entries, int entry_cnt,
752 union value **values, int *value_cnt);
753 static void output_pivot_table (struct table_entry **, struct table_entry **,
754 double **, double **, double **,
755 int *, int *, int *);
756 static void make_summary_table (void);
759 postcalc (void *aux UNUSED)
763 n_sorted_tab = hsh_count (gen_tab);
764 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
767 make_summary_table ();
769 /* Identify all the individual crosstabulation tables, and deal with
772 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
773 int pc = n_sorted_tab; /* Pivot count. */
775 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
776 int maxrows = 0, maxcols = 0, maxcells = 0;
780 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
784 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
785 &maxrows, &maxcols, &maxcells);
794 hsh_destroy (gen_tab);
797 static void insert_summary (struct tab_table *, int tab_index, double valid);
799 /* Output a table summarizing the cases processed. */
801 make_summary_table (void)
803 struct tab_table *summary;
805 struct table_entry **pb = sorted_tab, **pe;
806 int pc = n_sorted_tab;
809 summary = tab_create (7, 3 + nxtab, 1);
810 tab_title (summary, 0, _("Summary."));
811 tab_headers (summary, 1, 0, 3, 0);
812 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
813 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
814 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
815 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
816 tab_hline (summary, TAL_1, 1, 6, 1);
817 tab_hline (summary, TAL_1, 1, 6, 2);
818 tab_vline (summary, TAL_1, 3, 1, 1);
819 tab_vline (summary, TAL_1, 5, 1, 1);
823 for (i = 0; i < 3; i++)
825 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
826 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
829 tab_offset (summary, 0, 3);
835 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
839 while (cur_tab < (*pb)->table)
840 insert_summary (summary, cur_tab++, 0.);
843 for (valid = 0.; pb < pe; pb++)
844 valid += (*pb)->u.freq;
847 const struct crosstab *const x = xtab[(*pb)->table];
848 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
849 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
850 const int count = n_cols * n_rows;
852 for (valid = 0.; pb < pe; pb++)
854 const double *data = (*pb)->u.data;
857 for (i = 0; i < count; i++)
861 insert_summary (summary, cur_tab++, valid);
866 while (cur_tab < nxtab)
867 insert_summary (summary, cur_tab++, 0.);
872 /* Inserts a line into T describing the crosstabulation at index
873 TAB_INDEX, which has VALID valid observations. */
875 insert_summary (struct tab_table *t, int tab_index, double valid)
877 struct crosstab *x = xtab[tab_index];
879 tab_hline (t, TAL_1, 0, 6, 0);
881 /* Crosstabulation name. */
883 char *buf = local_alloc (128 * x->nvar);
887 for (i = 0; i < x->nvar; i++)
890 cp = stpcpy (cp, " * ");
893 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
895 tab_text (t, 0, 0, TAB_LEFT, buf);
900 /* Counts and percentages. */
910 for (i = 0; i < 3; i++)
912 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
913 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
924 static struct tab_table *table; /* Crosstabulation table. */
925 static struct tab_table *chisq; /* Chi-square table. */
926 static struct tab_table *sym; /* Symmetric measures table. */
927 static struct tab_table *risk; /* Risk estimate table. */
928 static struct tab_table *direct; /* Directional measures table. */
931 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
933 /* Column values, number of columns. */
934 static union value *cols;
937 /* Row values, number of rows. */
938 static union value *rows;
941 /* Number of statistically interesting columns/rows (columns/rows with
943 static int ns_cols, ns_rows;
945 /* Crosstabulation. */
946 static struct crosstab *x;
948 /* Number of variables from the crosstabulation to consider. This is
949 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
952 /* Matrix contents. */
953 static double *mat; /* Matrix proper. */
954 static double *row_tot; /* Row totals. */
955 static double *col_tot; /* Column totals. */
956 static double W; /* Grand total. */
958 static void display_dimensions (struct tab_table *, int first_difference,
959 struct table_entry *);
960 static void display_crosstabulation (void);
961 static void display_chisq (void);
962 static void display_symmetric (void);
963 static void display_risk (void);
964 static void display_directional (void);
965 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
966 static void table_value_missing (struct tab_table *table, int c, int r,
967 unsigned char opt, const union value *v,
968 const struct variable *var);
969 static void delete_missing (void);
971 /* Output pivot table beginning at PB and continuing until PE,
972 exclusive. For efficiency, *MATP is a pointer to a matrix that can
973 hold *MAXROWS entries. */
975 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
976 double **matp, double **row_totp, double **col_totp,
977 int *maxrows, int *maxcols, int *maxcells)
980 struct table_entry **tb = pb, **te; /* Table begin, table end. */
981 int tc = pe - pb; /* Table count. */
983 /* Table entry for header comparison. */
984 struct table_entry *cmp = NULL;
986 x = xtab[(*pb)->table];
987 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
989 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
991 /* Crosstabulation table initialization. */
994 table = tab_create (nvar + n_cols,
995 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
996 tab_headers (table, nvar - 1, 0, 2, 0);
998 /* First header line. */
999 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1000 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1002 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1004 /* Second header line. */
1008 for (i = 2; i < nvar; i++)
1009 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1010 TAB_RIGHT | TAT_TITLE,
1012 ? x->vars[i]->label : x->vars[i]->name));
1013 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1014 x->vars[ROW_VAR]->name);
1015 for (i = 0; i < n_cols; i++)
1016 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1018 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1021 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1022 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1026 char *title = local_alloc (x->nvar * 64 + 128);
1030 if (cmd.pivot == CRS_PIVOT)
1031 for (i = 0; i < nvar; i++)
1034 cp = stpcpy (cp, " by ");
1035 cp = stpcpy (cp, x->vars[i]->name);
1039 cp = spprintf (cp, "%s by %s for",
1040 x->vars[0]->name, x->vars[1]->name);
1041 for (i = 2; i < nvar; i++)
1043 char buf[64], *bufp;
1048 cp = stpcpy (cp, x->vars[i]->name);
1050 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1051 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1053 cp = stpcpy (cp, bufp);
1057 cp = stpcpy (cp, " [");
1058 for (i = 0; i < num_cells; i++)
1066 static const struct tuple cell_names[] =
1068 {CRS_CL_COUNT, N_("count")},
1069 {CRS_CL_ROW, N_("row %")},
1070 {CRS_CL_COLUMN, N_("column %")},
1071 {CRS_CL_TOTAL, N_("total %")},
1072 {CRS_CL_EXPECTED, N_("expected")},
1073 {CRS_CL_RESIDUAL, N_("residual")},
1074 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1075 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1079 const struct tuple *t;
1081 for (t = cell_names; t->value != cells[i]; t++)
1082 assert (t->value != -1);
1084 cp = stpcpy (cp, ", ");
1085 cp = stpcpy (cp, gettext (t->name));
1089 tab_title (table, 0, title);
1093 tab_offset (table, 0, 2);
1098 /* Chi-square table initialization. */
1099 if (cmd.a_statistics[CRS_ST_CHISQ])
1101 chisq = tab_create (6 + (nvar - 2),
1102 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1103 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1105 tab_title (chisq, 0, "Chi-square tests.");
1107 tab_offset (chisq, nvar - 2, 0);
1108 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1109 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1110 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1111 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1112 _("Asymp. Sig. (2-sided)"));
1113 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1114 _("Exact. Sig. (2-sided)"));
1115 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1116 _("Exact. Sig. (1-sided)"));
1118 tab_offset (chisq, 0, 1);
1123 /* Symmetric measures. */
1124 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1125 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1126 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1127 || cmd.a_statistics[CRS_ST_KAPPA])
1129 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1130 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1131 tab_title (sym, 0, "Symmetric measures.");
1133 tab_offset (sym, nvar - 2, 0);
1134 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1135 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1136 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1137 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1138 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1139 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1140 tab_offset (sym, 0, 1);
1145 /* Risk estimate. */
1146 if (cmd.a_statistics[CRS_ST_RISK])
1148 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1149 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1150 tab_title (risk, 0, "Risk estimate.");
1152 tab_offset (risk, nvar - 2, 0);
1153 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1154 _(" 95%% Confidence Interval"));
1155 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1156 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1157 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1158 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1159 tab_hline (risk, TAL_1, 2, 3, 1);
1160 tab_vline (risk, TAL_1, 2, 0, 1);
1161 tab_offset (risk, 0, 2);
1166 /* Directional measures. */
1167 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1168 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1170 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1171 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1172 tab_title (direct, 0, "Directional measures.");
1174 tab_offset (direct, nvar - 2, 0);
1175 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1176 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1177 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1178 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1179 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1180 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1181 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1182 tab_offset (direct, 0, 1);
1189 /* Find pivot subtable if applicable. */
1190 te = find_pivot_extent (tb, &tc, 0);
1194 /* Find all the row variable values. */
1195 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1197 /* Allocate memory space for the column and row totals. */
1198 if (n_rows > *maxrows)
1200 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1201 row_tot = *row_totp;
1204 if (n_cols > *maxcols)
1206 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1207 col_tot = *col_totp;
1211 /* Allocate table space for the matrix. */
1212 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1213 tab_realloc (table, -1,
1214 max (tab_nr (table) + (n_rows + 1) * num_cells,
1215 tab_nr (table) * (pe - pb) / (te - tb)));
1217 if (mode == GENERAL)
1219 /* Allocate memory space for the matrix. */
1220 if (n_cols * n_rows > *maxcells)
1222 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1223 *maxcells = n_cols * n_rows;
1228 /* Build the matrix and calculate column totals. */
1230 union value *cur_col = cols;
1231 union value *cur_row = rows;
1233 double *cp = col_tot;
1234 struct table_entry **p;
1237 for (p = &tb[0]; p < te; p++)
1239 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1243 for (; cur_row < &rows[n_rows]; cur_row++)
1249 mp = &mat[cur_col - cols];
1252 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1259 *cp += *mp = (*p)->u.freq;
1264 /* Zero out the rest of the matrix. */
1265 for (; cur_row < &rows[n_rows]; cur_row++)
1271 if (cur_col < &cols[n_cols])
1273 const int rem_cols = n_cols - (cur_col - cols);
1276 for (c = 0; c < rem_cols; c++)
1278 mp = &mat[cur_col - cols];
1279 for (r = 0; r < n_rows; r++)
1281 for (c = 0; c < rem_cols; c++)
1283 mp += n_cols - rem_cols;
1291 double *tp = col_tot;
1293 assert (mode == INTEGER);
1294 mat = (*tb)->u.data;
1297 /* Calculate column totals. */
1298 for (c = 0; c < n_cols; c++)
1301 double *cp = &mat[c];
1303 for (r = 0; r < n_rows; r++)
1304 cum += cp[r * n_cols];
1312 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1313 ns_cols += *cp != 0.;
1316 /* Calculate row totals. */
1319 double *rp = row_tot;
1322 for (ns_rows = 0, r = n_rows; r--; )
1325 for (c = n_cols; c--; )
1333 /* Calculate grand total. */
1339 if (n_rows < n_cols)
1340 tp = row_tot, n = n_rows;
1342 tp = col_tot, n = n_cols;
1348 /* Find the first variable that differs from the last subtable,
1349 then display the values of the dimensioning variables for
1350 each table that needs it. */
1352 int first_difference = nvar - 1;
1355 for (; ; first_difference--)
1357 assert (first_difference >= 2);
1358 if (memcmp (&cmp->values[first_difference],
1359 &(*tb)->values[first_difference],
1360 sizeof *cmp->values))
1366 display_dimensions (table, first_difference, *tb);
1368 display_dimensions (chisq, first_difference, *tb);
1370 display_dimensions (sym, first_difference, *tb);
1372 display_dimensions (risk, first_difference, *tb);
1374 display_dimensions (direct, first_difference, *tb);
1378 display_crosstabulation ();
1379 if (cmd.miss == CRS_REPORT)
1384 display_symmetric ();
1388 display_directional ();
1399 tab_resize (chisq, 4 + (nvar - 2), -1);
1410 /* Delete missing rows and columns for statistical analysis when
1413 delete_missing (void)
1418 for (r = 0; r < n_rows; r++)
1419 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1423 for (c = 0; c < n_cols; c++)
1424 mat[c + r * n_cols] = 0.;
1432 for (c = 0; c < n_cols; c++)
1433 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1437 for (r = 0; r < n_rows; r++)
1438 mat[c + r * n_cols] = 0.;
1444 /* Prepare table T for submission, and submit it. */
1446 submit (struct tab_table *t)
1453 tab_resize (t, -1, 0);
1454 if (tab_nr (t) == tab_t (t))
1459 tab_offset (t, 0, 0);
1461 for (i = 2; i < nvar; i++)
1462 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1463 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1464 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1465 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1467 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1469 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1470 tab_dim (t, crosstabs_dim);
1474 /* Sets the widths of all the columns and heights of all the rows in
1475 table T for driver D. */
1477 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1481 /* Width of a numerical column. */
1482 int c = outp_string_width (d, "0.000000");
1483 if (cmd.miss == CRS_REPORT)
1484 c += outp_string_width (d, "M");
1486 /* Set width for header columns. */
1489 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1491 if (w < d->prop_em_width * 8)
1492 w = d->prop_em_width * 8;
1494 if (w > d->prop_em_width * 15)
1495 w = d->prop_em_width * 15;
1497 for (i = 0; i < t->l; i++)
1501 for (i = t->l; i < t->nc; i++)
1504 for (i = 0; i < t->nr; i++)
1505 t->h[i] = tab_natural_height (t, d, i);
1508 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1509 int *cnt, int pivot);
1510 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1511 int *cnt, int pivot);
1513 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1515 static struct table_entry **
1516 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1518 return (mode == GENERAL
1519 ? find_pivot_extent_general (tp, cnt, pivot)
1520 : find_pivot_extent_integer (tp, cnt, pivot));
1523 /* Find the extent of a region in TP that contains one table. If
1524 PIVOT != 0 that means a set of table entries with identical table
1525 number; otherwise they also have to have the same values for every
1526 dimension after the row and column dimensions. The table that is
1527 searched starts at TP and has length CNT. Returns the first entry
1528 after the last one in the table; sets *CNT to the number of
1529 remaining values. If there are no entries in TP at all, returns
1530 NULL. A yucky interface, admittedly, but it works. */
1531 static struct table_entry **
1532 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1534 struct table_entry *fp = *tp;
1539 x = xtab[(*tp)->table];
1547 if ((*tp)->table != fp->table)
1552 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1559 /* Integer mode correspondent to find_pivot_extent_general(). This
1560 could be optimized somewhat, but I just don't give a crap about
1561 CROSSTABS performance in integer mode, which is just a
1562 CROSSTABS wart as far as I'm concerned.
1564 That said, feel free to send optimization patches to me. */
1565 static struct table_entry **
1566 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1568 struct table_entry *fp = *tp;
1573 x = xtab[(*tp)->table];
1581 if ((*tp)->table != fp->table)
1586 if (memcmp (&(*tp)->values[2], &fp->values[2],
1587 sizeof (union value) * (x->nvar - 2)))
1594 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1595 result. WIDTH_ points to an int which is either 0 for a
1596 numeric value or a string width for a string value. */
1598 compare_value (const void *a_, const void *b_, void *width_)
1600 const union value *a = a_;
1601 const union value *b = b_;
1602 const int *pwidth = width_;
1603 const int width = *pwidth;
1606 return (a->f < b->f) ? -1 : (a->f > b->f);
1608 return strncmp (a->s, b->s, width);
1611 /* Given an array of ENTRY_CNT table_entry structures starting at
1612 ENTRIES, creates a sorted list of the values that the variable
1613 with index VAR_IDX takes on. The values are returned as a
1614 malloc()'darray stored in *VALUES, with the number of values
1615 stored in *VALUE_CNT.
1618 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1619 union value **values, int *value_cnt)
1621 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1623 if (mode == GENERAL)
1625 int width = v->width;
1628 *values = xmalloc (sizeof **values * entry_cnt);
1629 for (i = 0; i < entry_cnt; i++)
1630 (*values)[i] = entries[i]->values[var_idx];
1631 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1632 compare_value, &width);
1636 struct var_range *vr = get_var_range (v);
1639 assert (mode == INTEGER);
1640 *values = xmalloc (sizeof **values * vr->count);
1641 for (i = 0; i < vr->count; i++)
1642 (*values)[i].f = i + vr->min;
1643 *value_cnt = vr->count;
1647 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1648 from V, displayed with print format spec from variable VAR. When
1649 in REPORT missing-value mode, missing values have an M appended. */
1651 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1652 const union value *v, const struct variable *var)
1654 struct fixed_string s;
1656 const char *label = val_labs_find (var->val_labs, *v);
1659 tab_text (table, c, r, TAB_LEFT, label);
1663 s.string = tab_alloc (table, var->print.w);
1664 format_short (s.string, &var->print, v);
1665 s.length = strlen (s.string);
1666 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1667 s.string[s.length++] = 'M';
1668 while (s.length && *s.string == ' ')
1673 tab_raw (table, c, r, opt, &s);
1676 /* Draws a line across TABLE at the current row to indicate the most
1677 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1678 that changed, and puts the values that changed into the table. TB
1679 and X must be the corresponding table_entry and crosstab,
1682 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1684 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1686 for (; first_difference >= 2; first_difference--)
1687 table_value_missing (table, nvar - first_difference - 1, 0,
1688 TAB_RIGHT, &tb->values[first_difference],
1689 x->vars[first_difference]);
1692 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1693 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1694 additionally suffixed with a letter `M'. */
1696 format_cell_entry (struct tab_table *table, int c, int r, double value,
1697 char suffix, int mark_missing)
1699 const struct fmt_spec f = {FMT_F, 10, 1};
1701 struct fixed_string s;
1704 s.string = tab_alloc (table, 16);
1706 data_out (s.string, &f, &v);
1707 while (*s.string == ' ')
1713 s.string[s.length++] = suffix;
1715 s.string[s.length++] = 'M';
1717 tab_raw (table, c, r, TAB_RIGHT, &s);
1720 /* Displays the crosstabulation table. */
1722 display_crosstabulation (void)
1727 for (r = 0; r < n_rows; r++)
1728 table_value_missing (table, nvar - 2, r * num_cells,
1729 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1731 tab_text (table, nvar - 2, n_rows * num_cells,
1732 TAB_LEFT, _("Total"));
1734 /* Put in the actual cells. */
1739 tab_offset (table, nvar - 1, -1);
1740 for (r = 0; r < n_rows; r++)
1743 tab_hline (table, TAL_1, -1, n_cols, 0);
1744 for (c = 0; c < n_cols; c++)
1746 int mark_missing = 0;
1747 double expected_value = row_tot[r] * col_tot[c] / W;
1748 if (cmd.miss == CRS_REPORT
1749 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1750 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1753 for (i = 0; i < num_cells; i++)
1764 v = *mp / row_tot[r] * 100.;
1768 v = *mp / col_tot[c] * 100.;
1775 case CRS_CL_EXPECTED:
1778 case CRS_CL_RESIDUAL:
1779 v = *mp - expected_value;
1781 case CRS_CL_SRESIDUAL:
1782 v = (*mp - expected_value) / sqrt (expected_value);
1784 case CRS_CL_ASRESIDUAL:
1785 v = ((*mp - expected_value)
1786 / sqrt (expected_value
1787 * (1. - row_tot[r] / W)
1788 * (1. - col_tot[c] / W)));
1795 format_cell_entry (table, c, i, v, suffix, mark_missing);
1801 tab_offset (table, -1, tab_row (table) + num_cells);
1809 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1810 for (r = 0; r < n_rows; r++)
1813 int mark_missing = 0;
1815 if (cmd.miss == CRS_REPORT
1816 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1819 for (i = 0; i < num_cells; i++)
1833 v = row_tot[r] / W * 100.;
1837 v = row_tot[r] / W * 100.;
1840 case CRS_CL_EXPECTED:
1841 case CRS_CL_RESIDUAL:
1842 case CRS_CL_SRESIDUAL:
1843 case CRS_CL_ASRESIDUAL:
1851 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1852 tab_next_row (table);
1857 /* Column totals, grand total. */
1863 tab_hline (table, TAL_1, -1, n_cols, 0);
1864 for (c = 0; c <= n_cols; c++)
1866 double ct = c < n_cols ? col_tot[c] : W;
1867 int mark_missing = 0;
1871 if (cmd.miss == CRS_REPORT && c < n_cols
1872 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1875 for (i = 0; i < num_cells; i++)
1897 case CRS_CL_EXPECTED:
1898 case CRS_CL_RESIDUAL:
1899 case CRS_CL_SRESIDUAL:
1900 case CRS_CL_ASRESIDUAL:
1907 format_cell_entry (table, c, i, v, suffix, mark_missing);
1912 tab_offset (table, -1, tab_row (table) + last_row);
1915 tab_offset (table, 0, -1);
1918 static void calc_r (double *X, double *Y, double *, double *, double *);
1919 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1921 /* Display chi-square statistics. */
1923 display_chisq (void)
1925 static const char *chisq_stats[N_CHISQ] =
1927 N_("Pearson Chi-Square"),
1928 N_("Likelihood Ratio"),
1929 N_("Fisher's Exact Test"),
1930 N_("Continuity Correction"),
1931 N_("Linear-by-Linear Association"),
1933 double chisq_v[N_CHISQ];
1934 double fisher1, fisher2;
1940 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1942 tab_offset (chisq, nvar - 2, -1);
1944 for (i = 0; i < N_CHISQ; i++)
1946 if ((i != 2 && chisq_v[i] == SYSMIS)
1947 || (i == 2 && fisher1 == SYSMIS))
1951 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1954 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1955 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1956 tab_float (chisq, 3, 0, TAB_RIGHT,
1957 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1962 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1963 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1965 tab_next_row (chisq);
1968 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1969 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1970 tab_next_row (chisq);
1972 tab_offset (chisq, 0, -1);
1975 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1976 double[N_SYMMETRIC]);
1978 /* Display symmetric measures. */
1980 display_symmetric (void)
1982 static const char *categories[] =
1984 N_("Nominal by Nominal"),
1985 N_("Ordinal by Ordinal"),
1986 N_("Interval by Interval"),
1987 N_("Measure of Agreement"),
1990 static const char *stats[N_SYMMETRIC] =
1994 N_("Contingency Coefficient"),
1995 N_("Kendall's tau-b"),
1996 N_("Kendall's tau-c"),
1998 N_("Spearman Correlation"),
2003 static const int stats_categories[N_SYMMETRIC] =
2005 0, 0, 0, 1, 1, 1, 1, 2, 3,
2009 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2012 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2015 tab_offset (sym, nvar - 2, -1);
2017 for (i = 0; i < N_SYMMETRIC; i++)
2019 if (sym_v[i] == SYSMIS)
2022 if (stats_categories[i] != last_cat)
2024 last_cat = stats_categories[i];
2025 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2028 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2029 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2030 if (sym_ase[i] != SYSMIS)
2031 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2032 if (sym_t[i] != SYSMIS)
2033 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2034 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2038 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2039 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2042 tab_offset (sym, 0, -1);
2045 static int calc_risk (double[], double[], double[], union value *);
2047 /* Display risk estimate. */
2052 double risk_v[3], lower[3], upper[3];
2056 if (!calc_risk (risk_v, upper, lower, c))
2059 tab_offset (risk, nvar - 2, -1);
2061 for (i = 0; i < 3; i++)
2063 if (risk_v[i] == SYSMIS)
2069 if (x->vars[COL_VAR]->type == NUMERIC)
2070 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2071 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2073 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2074 x->vars[COL_VAR]->name,
2075 x->vars[COL_VAR]->width, c[0].s,
2076 x->vars[COL_VAR]->width, c[1].s);
2080 if (x->vars[ROW_VAR]->type == NUMERIC)
2081 sprintf (buf, _("For cohort %s = %g"),
2082 x->vars[ROW_VAR]->name, rows[i - 1].f);
2084 sprintf (buf, _("For cohort %s = %.*s"),
2085 x->vars[ROW_VAR]->name,
2086 x->vars[ROW_VAR]->width, rows[i - 1].s);
2090 tab_text (risk, 0, 0, TAB_LEFT, buf);
2091 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2092 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2093 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2094 tab_next_row (risk);
2097 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2098 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2099 tab_next_row (risk);
2101 tab_offset (risk, 0, -1);
2104 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2105 double[N_DIRECTIONAL]);
2107 /* Display directional measures. */
2109 display_directional (void)
2111 static const char *categories[] =
2113 N_("Nominal by Nominal"),
2114 N_("Ordinal by Ordinal"),
2115 N_("Nominal by Interval"),
2118 static const char *stats[] =
2121 N_("Goodman and Kruskal tau"),
2122 N_("Uncertainty Coefficient"),
2127 static const char *types[] =
2134 static const int stats_categories[N_DIRECTIONAL] =
2136 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2139 static const int stats_stats[N_DIRECTIONAL] =
2141 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2144 static const int stats_types[N_DIRECTIONAL] =
2146 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2149 static const int *stats_lookup[] =
2156 static const char **stats_names[] =
2168 double direct_v[N_DIRECTIONAL];
2169 double direct_ase[N_DIRECTIONAL];
2170 double direct_t[N_DIRECTIONAL];
2174 if (!calc_directional (direct_v, direct_ase, direct_t))
2177 tab_offset (direct, nvar - 2, -1);
2179 for (i = 0; i < N_DIRECTIONAL; i++)
2181 if (direct_v[i] == SYSMIS)
2187 for (j = 0; j < 3; j++)
2188 if (last[j] != stats_lookup[j][i])
2191 tab_hline (direct, TAL_1, j, 6, 0);
2196 int k = last[j] = stats_lookup[j][i];
2201 string = x->vars[0]->name;
2203 string = x->vars[1]->name;
2205 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2206 gettext (stats_names[j][k]), string);
2211 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2212 if (direct_ase[i] != SYSMIS)
2213 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2214 if (direct_t[i] != SYSMIS)
2215 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2216 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2217 tab_next_row (direct);
2220 tab_offset (direct, 0, -1);
2223 /* Statistical calculations. */
2225 /* Returns the value of the gamma (factorial) function for an integer
2228 gamma_int (double x)
2233 for (i = 2; i < x; i++)
2238 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2240 static inline double
2241 Pr (int a, int b, int c, int d)
2243 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2244 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2245 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2246 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2247 / gamma_int (a + b + c + d + 1.));
2250 /* Swap the contents of A and B. */
2252 swap (int *a, int *b)
2259 /* Calculate significance for Fisher's exact test as specified in
2260 _SPSS Statistical Algorithms_, Appendix 5. */
2262 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2266 if (min (c, d) < min (a, b))
2267 swap (&a, &c), swap (&b, &d);
2268 if (min (b, d) < min (a, c))
2269 swap (&a, &b), swap (&c, &d);
2273 swap (&a, &b), swap (&c, &d);
2275 swap (&a, &c), swap (&b, &d);
2279 for (x = 0; x <= a; x++)
2280 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2282 *fisher2 = *fisher1;
2283 for (x = 1; x <= b; x++)
2284 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2287 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2288 columns with values COLS and N_ROWS rows with values ROWS. Values
2289 in the matrix sum to W. */
2291 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2292 double *fisher1, double *fisher2)
2296 chisq[0] = chisq[1] = 0.;
2297 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2298 *fisher1 = *fisher2 = SYSMIS;
2300 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2302 if (ns_rows <= 1 || ns_cols <= 1)
2304 chisq[0] = chisq[1] = SYSMIS;
2308 for (r = 0; r < n_rows; r++)
2309 for (c = 0; c < n_cols; c++)
2311 const double expected = row_tot[r] * col_tot[c] / W;
2312 const double freq = mat[n_cols * r + c];
2313 const double residual = freq - expected;
2315 chisq[0] += residual * residual / expected;
2317 chisq[1] += freq * log (expected / freq);
2328 /* Calculate Yates and Fisher exact test. */
2329 if (ns_cols == 2 && ns_rows == 2)
2331 double f11, f12, f21, f22;
2337 for (i = j = 0; i < n_cols; i++)
2338 if (col_tot[i] != 0.)
2347 f11 = mat[nz_cols[0]];
2348 f12 = mat[nz_cols[1]];
2349 f21 = mat[nz_cols[0] + n_cols];
2350 f22 = mat[nz_cols[1] + n_cols];
2355 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2358 chisq[3] = (W * x * x
2359 / (f11 + f12) / (f21 + f22)
2360 / (f11 + f21) / (f12 + f22));
2368 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2369 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2372 /* Calculate Mantel-Haenszel. */
2373 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2375 double r, ase_0, ase_1;
2376 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2378 chisq[4] = (W - 1.) * r * r;
2383 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2384 ASE_1, and ase_0 into ASE_0. The row and column values must be
2385 passed in X and Y. */
2387 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2389 double SX, SY, S, T;
2391 double sum_XYf, sum_X2Y2f;
2392 double sum_Xr, sum_X2r;
2393 double sum_Yc, sum_Y2c;
2396 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2397 for (j = 0; j < n_cols; j++)
2399 double fij = mat[j + i * n_cols];
2400 double product = X[i] * Y[j];
2401 double temp = fij * product;
2403 sum_X2Y2f += temp * product;
2406 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2408 sum_Xr += X[i] * row_tot[i];
2409 sum_X2r += X[i] * X[i] * row_tot[i];
2413 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2415 sum_Yc += Y[i] * col_tot[i];
2416 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2420 S = sum_XYf - sum_Xr * sum_Yc / W;
2421 SX = sum_X2r - sum_Xr * sum_Xr / W;
2422 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2425 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2430 for (s = c = 0., i = 0; i < n_rows; i++)
2431 for (j = 0; j < n_cols; j++)
2433 double Xresid, Yresid;
2436 Xresid = X[i] - Xbar;
2437 Yresid = Y[j] - Ybar;
2438 temp = (T * Xresid * Yresid
2440 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2441 y = mat[j + i * n_cols] * temp * temp - c;
2446 *ase_1 = sqrt (s) / (T * T);
2450 static double somers_d_v[3];
2451 static double somers_d_ase[3];
2452 static double somers_d_t[3];
2454 /* Calculate symmetric statistics and their asymptotic standard
2455 errors. Returns 0 if none could be calculated. */
2457 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2458 double t[N_SYMMETRIC])
2460 int q = min (ns_rows, ns_cols);
2469 for (i = 0; i < N_SYMMETRIC; i++)
2470 v[i] = ase[i] = t[i] = SYSMIS;
2473 /* Phi, Cramer's V, contingency coefficient. */
2474 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2476 double Xp = 0.; /* Pearson chi-square. */
2481 for (r = 0; r < n_rows; r++)
2482 for (c = 0; c < n_cols; c++)
2484 const double expected = row_tot[r] * col_tot[c] / W;
2485 const double freq = mat[n_cols * r + c];
2486 const double residual = freq - expected;
2488 Xp += residual * residual / expected;
2492 if (cmd.a_statistics[CRS_ST_PHI])
2494 v[0] = sqrt (Xp / W);
2495 v[1] = sqrt (Xp / (W * (q - 1)));
2497 if (cmd.a_statistics[CRS_ST_CC])
2498 v[2] = sqrt (Xp / (Xp + W));
2501 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2502 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2507 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2514 for (r = 0; r < n_rows; r++)
2515 Dr -= row_tot[r] * row_tot[r];
2516 for (c = 0; c < n_cols; c++)
2517 Dc -= col_tot[c] * col_tot[c];
2523 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2524 for (c = 0; c < n_cols; c++)
2528 for (r = 0; r < n_rows; r++)
2529 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2539 for (i = 0; i < n_rows; i++)
2543 for (j = 1; j < n_cols; j++)
2544 Cij += col_tot[j] - cum[j + i * n_cols];
2547 for (j = 1; j < n_cols; j++)
2548 Dij += cum[j + (i - 1) * n_cols];
2552 double fij = mat[j + i * n_cols];
2558 assert (j < n_cols);
2560 Cij -= col_tot[j] - cum[j + i * n_cols];
2561 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2565 Cij += cum[j - 1 + (i - 1) * n_cols];
2566 Dij -= cum[j + (i - 1) * n_cols];
2572 if (cmd.a_statistics[CRS_ST_BTAU])
2573 v[3] = (P - Q) / sqrt (Dr * Dc);
2574 if (cmd.a_statistics[CRS_ST_CTAU])
2575 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2576 if (cmd.a_statistics[CRS_ST_GAMMA])
2577 v[5] = (P - Q) / (P + Q);
2579 /* ASE for tau-b, tau-c, gamma. Calculations could be
2580 eliminated here, at expense of memory. */
2585 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2586 for (i = 0; i < n_rows; i++)
2590 for (j = 1; j < n_cols; j++)
2591 Cij += col_tot[j] - cum[j + i * n_cols];
2594 for (j = 1; j < n_cols; j++)
2595 Dij += cum[j + (i - 1) * n_cols];
2599 double fij = mat[j + i * n_cols];
2601 if (cmd.a_statistics[CRS_ST_BTAU])
2603 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2604 + v[3] * (row_tot[i] * Dc
2605 + col_tot[j] * Dr));
2606 btau_cum += fij * temp * temp;
2610 const double temp = Cij - Dij;
2611 ctau_cum += fij * temp * temp;
2614 if (cmd.a_statistics[CRS_ST_GAMMA])
2616 const double temp = Q * Cij - P * Dij;
2617 gamma_cum += fij * temp * temp;
2620 if (cmd.a_statistics[CRS_ST_D])
2622 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2623 - (P - Q) * (W - row_tot[i]));
2624 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2625 - (Q - P) * (W - col_tot[j]));
2630 assert (j < n_cols);
2632 Cij -= col_tot[j] - cum[j + i * n_cols];
2633 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2637 Cij += cum[j - 1 + (i - 1) * n_cols];
2638 Dij -= cum[j + (i - 1) * n_cols];
2644 btau_var = ((btau_cum
2645 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2647 if (cmd.a_statistics[CRS_ST_BTAU])
2649 ase[3] = sqrt (btau_var);
2650 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2653 if (cmd.a_statistics[CRS_ST_CTAU])
2655 ase[4] = ((2 * q / ((q - 1) * W * W))
2656 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2657 t[4] = v[4] / ase[4];
2659 if (cmd.a_statistics[CRS_ST_GAMMA])
2661 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2662 t[5] = v[5] / (2. / (P + Q)
2663 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2665 if (cmd.a_statistics[CRS_ST_D])
2667 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2668 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2669 somers_d_t[0] = (somers_d_v[0]
2671 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2672 somers_d_v[1] = (P - Q) / Dc;
2673 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2674 somers_d_t[1] = (somers_d_v[1]
2676 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2677 somers_d_v[2] = (P - Q) / Dr;
2678 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2679 somers_d_t[2] = (somers_d_v[2]
2681 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2687 /* Spearman correlation, Pearson's r. */
2688 if (cmd.a_statistics[CRS_ST_CORR])
2690 double *R = local_alloc (sizeof *R * n_rows);
2691 double *C = local_alloc (sizeof *C * n_cols);
2694 double y, t, c = 0., s = 0.;
2699 R[i] = s + (row_tot[i] + 1.) / 2.;
2706 assert (i < n_rows);
2711 double y, t, c = 0., s = 0.;
2716 C[j] = s + (col_tot[j] + 1.) / 2;
2723 assert (j < n_cols);
2727 calc_r (R, C, &v[6], &t[6], &ase[6]);
2733 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2737 /* Cohen's kappa. */
2738 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2740 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2743 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2744 i < ns_rows; i++, j++)
2748 while (col_tot[j] == 0.)
2751 prod = row_tot[i] * col_tot[j];
2752 sum = row_tot[i] + col_tot[j];
2754 sum_fii += mat[j + i * n_cols];
2756 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2757 sum_riciri_ci += prod * sum;
2759 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2760 for (j = 0; j < ns_cols; j++)
2762 double sum = row_tot[i] + col_tot[j];
2763 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2766 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2768 ase[8] = sqrt ((W * W * sum_rici
2769 + sum_rici * sum_rici
2770 - W * sum_riciri_ci)
2771 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2773 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2774 / pow2 (W * W - sum_rici))
2775 + ((2. * (W - sum_fii)
2776 * (2. * sum_fii * sum_rici
2777 - W * sum_fiiri_ci))
2778 / cube (W * W - sum_rici))
2779 + (pow2 (W - sum_fii)
2780 * (W * sum_fijri_ci2 - 4.
2781 * sum_rici * sum_rici)
2782 / pow4 (W * W - sum_rici))));
2784 t[8] = v[8] / ase[8];
2791 /* Calculate risk estimate. */
2793 calc_risk (double *value, double *upper, double *lower, union value *c)
2795 double f11, f12, f21, f22;
2801 for (i = 0; i < 3; i++)
2802 value[i] = upper[i] = lower[i] = SYSMIS;
2805 if (ns_rows != 2 || ns_cols != 2)
2812 for (i = j = 0; i < n_cols; i++)
2813 if (col_tot[i] != 0.)
2822 f11 = mat[nz_cols[0]];
2823 f12 = mat[nz_cols[1]];
2824 f21 = mat[nz_cols[0] + n_cols];
2825 f22 = mat[nz_cols[1] + n_cols];
2827 c[0] = cols[nz_cols[0]];
2828 c[1] = cols[nz_cols[1]];
2831 value[0] = (f11 * f22) / (f12 * f21);
2832 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2833 lower[0] = value[0] * exp (-1.960 * v);
2834 upper[0] = value[0] * exp (1.960 * v);
2836 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2837 v = sqrt ((f12 / (f11 * (f11 + f12)))
2838 + (f22 / (f21 * (f21 + f22))));
2839 lower[1] = value[1] * exp (-1.960 * v);
2840 upper[1] = value[1] * exp (1.960 * v);
2842 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2843 v = sqrt ((f11 / (f12 * (f11 + f12)))
2844 + (f21 / (f22 * (f21 + f22))));
2845 lower[2] = value[2] * exp (-1.960 * v);
2846 upper[2] = value[2] * exp (1.960 * v);
2851 /* Calculate directional measures. */
2853 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2854 double t[N_DIRECTIONAL])
2859 for (i = 0; i < N_DIRECTIONAL; i++)
2860 v[i] = ase[i] = t[i] = SYSMIS;
2864 if (cmd.a_statistics[CRS_ST_LAMBDA])
2866 double *fim = xmalloc (sizeof *fim * n_rows);
2867 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2868 double *fmj = xmalloc (sizeof *fmj * n_cols);
2869 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2870 double sum_fim, sum_fmj;
2872 int rm_index, cm_index;
2875 /* Find maximum for each row and their sum. */
2876 for (sum_fim = 0., i = 0; i < n_rows; i++)
2878 double max = mat[i * n_cols];
2881 for (j = 1; j < n_cols; j++)
2882 if (mat[j + i * n_cols] > max)
2884 max = mat[j + i * n_cols];
2888 sum_fim += fim[i] = max;
2889 fim_index[i] = index;
2892 /* Find maximum for each column. */
2893 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2895 double max = mat[j];
2898 for (i = 1; i < n_rows; i++)
2899 if (mat[j + i * n_cols] > max)
2901 max = mat[j + i * n_cols];
2905 sum_fmj += fmj[j] = max;
2906 fmj_index[j] = index;
2909 /* Find maximum row total. */
2912 for (i = 1; i < n_rows; i++)
2913 if (row_tot[i] > rm)
2919 /* Find maximum column total. */
2922 for (j = 1; j < n_cols; j++)
2923 if (col_tot[j] > cm)
2929 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2930 v[1] = (sum_fmj - rm) / (W - rm);
2931 v[2] = (sum_fim - cm) / (W - cm);
2933 /* ASE1 for Y given X. */
2937 for (accum = 0., i = 0; i < n_rows; i++)
2938 for (j = 0; j < n_cols; j++)
2940 const int deltaj = j == cm_index;
2941 accum += (mat[j + i * n_cols]
2942 * pow2 ((j == fim_index[i])
2947 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2950 /* ASE0 for Y given X. */
2954 for (accum = 0., i = 0; i < n_rows; i++)
2955 if (cm_index != fim_index[i])
2956 accum += (mat[i * n_cols + fim_index[i]]
2957 + mat[i * n_cols + cm_index]);
2958 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2961 /* ASE1 for X given Y. */
2965 for (accum = 0., i = 0; i < n_rows; i++)
2966 for (j = 0; j < n_cols; j++)
2968 const int deltaj = i == rm_index;
2969 accum += (mat[j + i * n_cols]
2970 * pow2 ((i == fmj_index[j])
2975 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2978 /* ASE0 for X given Y. */
2982 for (accum = 0., j = 0; j < n_cols; j++)
2983 if (rm_index != fmj_index[j])
2984 accum += (mat[j + n_cols * fmj_index[j]]
2985 + mat[j + n_cols * rm_index]);
2986 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2989 /* Symmetric ASE0 and ASE1. */
2994 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2995 for (j = 0; j < n_cols; j++)
2997 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2998 int temp1 = (i == rm_index) + (j == cm_index);
2999 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3000 accum1 += (mat[j + i * n_cols]
3001 * pow2 (temp0 + (v[0] - 1.) * temp1));
3003 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3004 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3005 / (2. * W - rm - cm));
3014 double sum_fij2_ri, sum_fij2_ci;
3015 double sum_ri2, sum_cj2;
3017 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3018 for (j = 0; j < n_cols; j++)
3020 double temp = pow2 (mat[j + i * n_cols]);
3021 sum_fij2_ri += temp / row_tot[i];
3022 sum_fij2_ci += temp / col_tot[j];
3025 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3026 sum_ri2 += row_tot[i] * row_tot[i];
3028 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3029 sum_cj2 += col_tot[j] * col_tot[j];
3031 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3032 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3036 if (cmd.a_statistics[CRS_ST_UC])
3038 double UX, UY, UXY, P;
3039 double ase1_yx, ase1_xy, ase1_sym;
3042 for (UX = 0., i = 0; i < n_rows; i++)
3043 if (row_tot[i] > 0.)
3044 UX -= row_tot[i] / W * log (row_tot[i] / W);
3046 for (UY = 0., j = 0; j < n_cols; j++)
3047 if (col_tot[j] > 0.)
3048 UY -= col_tot[j] / W * log (col_tot[j] / W);
3050 for (UXY = P = 0., i = 0; i < n_rows; i++)
3051 for (j = 0; j < n_cols; j++)
3053 double entry = mat[j + i * n_cols];
3058 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3059 UXY -= entry / W * log (entry / W);
3062 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3063 for (j = 0; j < n_cols; j++)
3065 double entry = mat[j + i * n_cols];
3070 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3071 + (UX - UXY) * log (col_tot[j] / W));
3072 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3073 + (UY - UXY) * log (row_tot[i] / W));
3074 ase1_sym += entry * pow2 ((UXY
3075 * log (row_tot[i] * col_tot[j] / (W * W)))
3076 - (UX + UY) * log (entry / W));
3079 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3080 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3081 t[5] = v[5] / ((2. / (W * (UX + UY)))
3082 * sqrt (P - pow2 (UX + UY - UXY) / W));
3084 v[6] = (UX + UY - UXY) / UX;
3085 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3086 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3088 v[7] = (UX + UY - UXY) / UY;
3089 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3090 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3094 if (cmd.a_statistics[CRS_ST_D])
3099 calc_symmetric (NULL, NULL, NULL);
3100 for (i = 0; i < 3; i++)
3102 v[8 + i] = somers_d_v[i];
3103 ase[8 + i] = somers_d_ase[i];
3104 t[8 + i] = somers_d_t[i];
3109 if (cmd.a_statistics[CRS_ST_ETA])
3112 double sum_Xr, sum_X2r;
3116 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3118 sum_Xr += rows[i].f * row_tot[i];
3119 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3121 SX = sum_X2r - sum_Xr * sum_Xr / W;
3123 for (SXW = 0., j = 0; j < n_cols; j++)
3127 for (cum = 0., i = 0; i < n_rows; i++)
3129 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3130 cum += rows[i].f * mat[j + i * n_cols];
3133 SXW -= cum * cum / col_tot[j];
3135 v[11] = sqrt (1. - SXW / SX);
3139 double sum_Yc, sum_Y2c;
3143 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3145 sum_Yc += cols[i].f * col_tot[i];
3146 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3148 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3150 for (SYW = 0., i = 0; i < n_rows; i++)
3154 for (cum = 0., j = 0; j < n_cols; j++)
3156 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3157 cum += cols[j].f * mat[j + i * n_cols];
3160 SYW -= cum * cum / row_tot[i];
3162 v[12] = sqrt (1. - SYW / SY);
3169 /* A wrapper around data_out() that limits string output to short
3170 string width and null terminates the result. */
3172 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3174 struct fmt_spec fmt_subst;
3176 /* Limit to short string width. */
3177 if (formats[fp->type].cat & FCAT_STRING)
3181 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3182 if (fmt_subst.type == FMT_A)
3183 fmt_subst.w = min (8, fmt_subst.w);
3185 fmt_subst.w = min (16, fmt_subst.w);
3191 data_out (s, fp, v);
3193 /* Null terminate. */