1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 - Pearson's R (but not Spearman!) is off a little.
22 - T values for Spearman's R and Pearson's R are wrong.
23 - How to calculate significance of symmetric and directional measures?
24 - Asymmetric ASEs and T values for lambda are wrong.
25 - ASE of Goodman and Kruskal's tau is not calculated.
26 - ASE of symmetric somers' d is wrong.
27 - Approx. T of uncertainty coefficient is wrong.
34 #include <gsl/gsl_cdf.h>
38 #include <data/case.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (const struct ccase *, void *, const struct dataset *);
181 static bool calc_general (const struct ccase *, void *, const struct dataset *);
182 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
183 static bool postcalc (void *, const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
213 pl_tc = pool_create ();
214 pl_col = pool_create ();
216 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
219 mode = variables ? INTEGER : GENERAL;
224 cmd.a_cells[CRS_CL_COUNT] = 1;
230 for (i = 0; i < CRS_CL_count; i++)
235 cmd.a_cells[CRS_CL_COUNT] = 1;
236 cmd.a_cells[CRS_CL_ROW] = 1;
237 cmd.a_cells[CRS_CL_COLUMN] = 1;
238 cmd.a_cells[CRS_CL_TOTAL] = 1;
240 if (cmd.a_cells[CRS_CL_ALL])
242 for (i = 0; i < CRS_CL_count; i++)
244 cmd.a_cells[CRS_CL_ALL] = 0;
246 cmd.a_cells[CRS_CL_NONE] = 0;
248 for (num_cells = i = 0; i < CRS_CL_count; i++)
250 cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
297 ok = procedure_with_splits (ds, precalc,
298 mode == GENERAL ? calc_general : calc_integer,
301 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
304 /* Parses the TABLES subcommand. */
306 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
308 struct var_set *var_set;
310 struct variable ***by = NULL;
311 size_t *by_nvar = NULL;
315 /* Ensure that this is a TABLES subcommand. */
316 if (!lex_match_id (lexer, "TABLES")
317 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
318 && lex_token (lexer) != T_ALL)
320 lex_match (lexer, '=');
322 if (variables != NULL)
323 var_set = var_set_create_from_array (variables, variables_cnt);
325 var_set = var_set_create_from_dict (dataset_dict (ds));
326 assert (var_set != NULL);
330 by = xnrealloc (by, n_by + 1, sizeof *by);
331 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
332 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
333 PV_NO_DUPLICATE | PV_NO_SCRATCH))
335 if (xalloc_oversized (nx, by_nvar[n_by]))
337 msg (SE, _("Too many crosstabulation variables or dimensions."));
343 if (!lex_match (lexer, T_BY))
347 lex_error (lexer, _("expecting BY"));
356 int *by_iter = xcalloc (n_by, sizeof *by_iter);
359 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
360 for (i = 0; i < nx; i++)
364 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
371 for (i = 0; i < n_by; i++)
372 x->vars[i] = by[i][by_iter[i]];
378 for (i = n_by - 1; i >= 0; i--)
380 if (++by_iter[i] < by_nvar[i])
393 /* All return paths lead here. */
397 for (i = 0; i < n_by; i++)
403 var_set_destroy (var_set);
408 /* Parses the VARIABLES subcommand. */
410 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
414 msg (SE, _("VARIABLES must be specified before TABLES."));
418 lex_match (lexer, '=');
422 size_t orig_nv = variables_cnt;
427 if (!parse_variables (lexer, dataset_dict (ds),
428 &variables, &variables_cnt,
429 (PV_APPEND | PV_NUMERIC
430 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 if (lex_token (lexer) != '(')
435 lex_error (lexer, "expecting `('");
440 if (!lex_force_int (lexer))
442 min = lex_integer (lexer);
445 lex_match (lexer, ',');
447 if (!lex_force_int (lexer))
449 max = lex_integer (lexer);
452 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 if (lex_token (lexer) != ')')
460 lex_error (lexer, "expecting `)'");
465 for (i = orig_nv; i < variables_cnt; i++)
467 struct var_range *vr = xmalloc (sizeof *vr);
470 vr->count = max - min + 1;
471 var_attach_aux (variables[i], vr, var_dtor_free);
474 if (lex_token (lexer) == '/')
486 /* Data file processing. */
488 static int compare_table_entry (const void *, const void *, const void *);
489 static unsigned hash_table_entry (const void *, const void *);
491 /* Set up the crosstabulation tables for processing. */
493 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
495 output_split_file_values (ds, first);
498 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
508 for (i = 0; i < nxtab; i++)
510 struct crosstab *x = xtab[i];
515 x->ofs = n_sorted_tab;
517 for (j = 2; j < x->nvar; j++)
518 count *= get_var_range (x->vars[j - 2])->count;
520 sorted_tab = xnrealloc (sorted_tab,
521 n_sorted_tab + count, sizeof *sorted_tab);
522 v = local_alloc (sizeof *v * x->nvar);
523 for (j = 2; j < x->nvar; j++)
524 v[j] = get_var_range (x->vars[j])->min;
525 for (j = 0; j < count; j++)
527 struct table_entry *te;
530 te = sorted_tab[n_sorted_tab++]
531 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
535 int row_cnt = get_var_range (x->vars[0])->count;
536 int col_cnt = get_var_range (x->vars[1])->count;
537 const int mat_size = row_cnt * col_cnt;
540 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
541 for (m = 0; m < mat_size; m++)
545 for (k = 2; k < x->nvar; k++)
546 te->values[k].f = v[k];
547 for (k = 2; k < x->nvar; k++)
549 struct var_range *vr = get_var_range (x->vars[k]);
550 if (++v[k] >= vr->max)
559 sorted_tab = xnrealloc (sorted_tab,
560 n_sorted_tab + 1, sizeof *sorted_tab);
561 sorted_tab[n_sorted_tab] = NULL;
566 /* Form crosstabulations for general mode. */
568 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
570 bool bad_warn = true;
572 /* Missing values to exclude. */
573 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
574 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
578 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
580 /* Flattened current table index. */
583 for (t = 0; t < nxtab; t++)
585 struct crosstab *x = xtab[t];
586 const size_t entry_size = (sizeof (struct table_entry)
587 + sizeof (union value) * (x->nvar - 1));
588 struct table_entry *te = local_alloc (entry_size);
590 /* Construct table entry for the current record and table. */
596 for (j = 0; j < x->nvar; j++)
598 const union value *v = case_data (c, x->vars[j]);
599 if (var_is_value_missing (x->vars[j], v, exclude))
601 x->missing += weight;
605 if (var_is_numeric (x->vars[j]))
606 te->values[j].f = case_num (c, x->vars[j]);
609 memcpy (te->values[j].s, case_str (c, x->vars[j]),
610 var_get_width (x->vars[j]));
612 /* Necessary in order to simplify comparisons. */
613 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
614 sizeof (union value) - var_get_width (x->vars[j]));
619 /* Add record to hash table. */
621 struct table_entry **tepp
622 = (struct table_entry **) hsh_probe (gen_tab, te);
625 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
628 memcpy (tep, te, entry_size);
633 (*tepp)->u.freq += weight;
644 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
646 bool bad_warn = true;
649 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
651 /* Flattened current table index. */
654 for (t = 0; t < nxtab; t++)
656 struct crosstab *x = xtab[t];
661 for (i = 0; i < x->nvar; i++)
663 struct variable *const v = x->vars[i];
664 struct var_range *vr = get_var_range (v);
665 double value = case_num (c, v);
667 /* Note that the first test also rules out SYSMIS. */
668 if ((value < vr->min || value >= vr->max)
669 || (cmd.miss == CRS_TABLE
670 && var_is_num_missing (v, value, MV_USER)))
672 x->missing += weight;
678 ofs += fact * ((int) value - vr->min);
684 struct variable *row_var = x->vars[ROW_VAR];
685 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
687 struct variable *col_var = x->vars[COL_VAR];
688 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
690 const int col_dim = get_var_range (col_var)->count;
692 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
701 /* Compare the table_entry's at A and B and return a strcmp()-type
704 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
706 const struct table_entry *a = a_;
707 const struct table_entry *b = b_;
709 if (a->table > b->table)
711 else if (a->table < b->table)
715 const struct crosstab *x = xtab[a->table];
718 for (i = x->nvar - 1; i >= 0; i--)
719 if (var_is_numeric (x->vars[i]))
721 const double diffnum = a->values[i].f - b->values[i].f;
724 else if (diffnum > 0)
729 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
730 var_get_width (x->vars[i]));
739 /* Calculate a hash value from table_entry A. */
741 hash_table_entry (const void *a_, const void *aux UNUSED)
743 const struct table_entry *a = a_;
748 for (i = 0; i < xtab[a->table]->nvar; i++)
749 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
754 /* Post-data reading calculations. */
756 static struct table_entry **find_pivot_extent (struct table_entry **,
757 int *cnt, int pivot);
758 static void enum_var_values (struct table_entry **entries, int entry_cnt,
760 union value **values, int *value_cnt);
761 static void output_pivot_table (struct table_entry **, struct table_entry **,
762 double **, double **, double **,
763 int *, int *, int *);
764 static void make_summary_table (void);
767 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
771 n_sorted_tab = hsh_count (gen_tab);
772 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
775 make_summary_table ();
777 /* Identify all the individual crosstabulation tables, and deal with
780 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
781 int pc = n_sorted_tab; /* Pivot count. */
783 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
784 int maxrows = 0, maxcols = 0, maxcells = 0;
788 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
792 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
793 &maxrows, &maxcols, &maxcells);
802 hsh_destroy (gen_tab);
807 static void insert_summary (struct tab_table *, int tab_index, double valid);
809 /* Output a table summarizing the cases processed. */
811 make_summary_table (void)
813 struct tab_table *summary;
815 struct table_entry **pb = sorted_tab, **pe;
816 int pc = n_sorted_tab;
819 summary = tab_create (7, 3 + nxtab, 1);
820 tab_title (summary, _("Summary."));
821 tab_headers (summary, 1, 0, 3, 0);
822 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
823 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
824 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
825 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
826 tab_hline (summary, TAL_1, 1, 6, 1);
827 tab_hline (summary, TAL_1, 1, 6, 2);
828 tab_vline (summary, TAL_1, 3, 1, 1);
829 tab_vline (summary, TAL_1, 5, 1, 1);
833 for (i = 0; i < 3; i++)
835 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
836 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
839 tab_offset (summary, 0, 3);
845 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
849 while (cur_tab < (*pb)->table)
850 insert_summary (summary, cur_tab++, 0.);
853 for (valid = 0.; pb < pe; pb++)
854 valid += (*pb)->u.freq;
857 const struct crosstab *const x = xtab[(*pb)->table];
858 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
859 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
860 const int count = n_cols * n_rows;
862 for (valid = 0.; pb < pe; pb++)
864 const double *data = (*pb)->u.data;
867 for (i = 0; i < count; i++)
871 insert_summary (summary, cur_tab++, valid);
876 while (cur_tab < nxtab)
877 insert_summary (summary, cur_tab++, 0.);
882 /* Inserts a line into T describing the crosstabulation at index
883 TAB_INDEX, which has VALID valid observations. */
885 insert_summary (struct tab_table *t, int tab_index, double valid)
887 struct crosstab *x = xtab[tab_index];
889 tab_hline (t, TAL_1, 0, 6, 0);
891 /* Crosstabulation name. */
893 char *buf = local_alloc (128 * x->nvar);
897 for (i = 0; i < x->nvar; i++)
900 cp = stpcpy (cp, " * ");
902 cp = stpcpy (cp, var_to_string (x->vars[i]));
904 tab_text (t, 0, 0, TAB_LEFT, buf);
909 /* Counts and percentages. */
919 for (i = 0; i < 3; i++)
921 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
922 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
933 static struct tab_table *table; /* Crosstabulation table. */
934 static struct tab_table *chisq; /* Chi-square table. */
935 static struct tab_table *sym; /* Symmetric measures table. */
936 static struct tab_table *risk; /* Risk estimate table. */
937 static struct tab_table *direct; /* Directional measures table. */
940 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
942 /* Column values, number of columns. */
943 static union value *cols;
946 /* Row values, number of rows. */
947 static union value *rows;
950 /* Number of statistically interesting columns/rows (columns/rows with
952 static int ns_cols, ns_rows;
954 /* Crosstabulation. */
955 static const struct crosstab *x;
957 /* Number of variables from the crosstabulation to consider. This is
958 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
961 /* Matrix contents. */
962 static double *mat; /* Matrix proper. */
963 static double *row_tot; /* Row totals. */
964 static double *col_tot; /* Column totals. */
965 static double W; /* Grand total. */
967 static void display_dimensions (struct tab_table *, int first_difference,
968 struct table_entry *);
969 static void display_crosstabulation (void);
970 static void display_chisq (void);
971 static void display_symmetric (void);
972 static void display_risk (void);
973 static void display_directional (void);
974 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
975 static void table_value_missing (struct tab_table *table, int c, int r,
976 unsigned char opt, const union value *v,
977 const struct variable *var);
978 static void delete_missing (void);
980 /* Output pivot table beginning at PB and continuing until PE,
981 exclusive. For efficiency, *MATP is a pointer to a matrix that can
982 hold *MAXROWS entries. */
984 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
985 double **matp, double **row_totp, double **col_totp,
986 int *maxrows, int *maxcols, int *maxcells)
989 struct table_entry **tb = pb, **te; /* Table begin, table end. */
990 int tc = pe - pb; /* Table count. */
992 /* Table entry for header comparison. */
993 struct table_entry *cmp = NULL;
995 x = xtab[(*pb)->table];
996 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
998 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1000 /* Crosstabulation table initialization. */
1003 table = tab_create (nvar + n_cols,
1004 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1005 tab_headers (table, nvar - 1, 0, 2, 0);
1007 /* First header line. */
1008 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1009 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1011 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1013 /* Second header line. */
1017 for (i = 2; i < nvar; i++)
1018 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1019 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1020 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1021 var_get_name (x->vars[ROW_VAR]));
1022 for (i = 0; i < n_cols; i++)
1023 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1025 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1028 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1029 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1033 char *title = local_alloc (x->nvar * 64 + 128);
1037 if (cmd.pivot == CRS_PIVOT)
1038 for (i = 0; i < nvar; i++)
1041 cp = stpcpy (cp, " by ");
1042 cp = stpcpy (cp, var_get_name (x->vars[i]));
1046 cp = spprintf (cp, "%s by %s for",
1047 var_get_name (x->vars[0]),
1048 var_get_name (x->vars[1]));
1049 for (i = 2; i < nvar; i++)
1051 char buf[64], *bufp;
1056 cp = stpcpy (cp, var_get_name (x->vars[i]));
1058 format_short (buf, var_get_print_format (x->vars[i]),
1060 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1062 cp = stpcpy (cp, bufp);
1066 cp = stpcpy (cp, " [");
1067 for (i = 0; i < num_cells; i++)
1075 static const struct tuple cell_names[] =
1077 {CRS_CL_COUNT, N_("count")},
1078 {CRS_CL_ROW, N_("row %")},
1079 {CRS_CL_COLUMN, N_("column %")},
1080 {CRS_CL_TOTAL, N_("total %")},
1081 {CRS_CL_EXPECTED, N_("expected")},
1082 {CRS_CL_RESIDUAL, N_("residual")},
1083 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1084 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1088 const struct tuple *t;
1090 for (t = cell_names; t->value != cells[i]; t++)
1091 assert (t->value != -1);
1093 cp = stpcpy (cp, ", ");
1094 cp = stpcpy (cp, gettext (t->name));
1098 tab_title (table, "%s", title);
1102 tab_offset (table, 0, 2);
1107 /* Chi-square table initialization. */
1108 if (cmd.a_statistics[CRS_ST_CHISQ])
1110 chisq = tab_create (6 + (nvar - 2),
1111 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1112 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1114 tab_title (chisq, _("Chi-square tests."));
1116 tab_offset (chisq, nvar - 2, 0);
1117 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1118 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1119 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1120 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Asymp. Sig. (2-sided)"));
1122 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1123 _("Exact. Sig. (2-sided)"));
1124 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1125 _("Exact. Sig. (1-sided)"));
1127 tab_offset (chisq, 0, 1);
1132 /* Symmetric measures. */
1133 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1134 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1135 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1136 || cmd.a_statistics[CRS_ST_KAPPA])
1138 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1139 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1140 tab_title (sym, _("Symmetric measures."));
1142 tab_offset (sym, nvar - 2, 0);
1143 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1144 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1145 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1146 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1147 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1148 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1149 tab_offset (sym, 0, 1);
1154 /* Risk estimate. */
1155 if (cmd.a_statistics[CRS_ST_RISK])
1157 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1158 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1159 tab_title (risk, _("Risk estimate."));
1161 tab_offset (risk, nvar - 2, 0);
1162 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1163 _("95%% Confidence Interval"));
1164 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1165 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1166 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1167 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1168 tab_hline (risk, TAL_1, 2, 3, 1);
1169 tab_vline (risk, TAL_1, 2, 0, 1);
1170 tab_offset (risk, 0, 2);
1175 /* Directional measures. */
1176 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1177 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1179 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1180 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1181 tab_title (direct, _("Directional measures."));
1183 tab_offset (direct, nvar - 2, 0);
1184 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1185 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1186 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1187 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1188 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1189 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1190 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1191 tab_offset (direct, 0, 1);
1198 /* Find pivot subtable if applicable. */
1199 te = find_pivot_extent (tb, &tc, 0);
1203 /* Find all the row variable values. */
1204 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1206 /* Allocate memory space for the column and row totals. */
1207 if (n_rows > *maxrows)
1209 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1210 row_tot = *row_totp;
1213 if (n_cols > *maxcols)
1215 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1216 col_tot = *col_totp;
1220 /* Allocate table space for the matrix. */
1221 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1222 tab_realloc (table, -1,
1223 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1224 tab_nr (table) * (pe - pb) / (te - tb)));
1226 if (mode == GENERAL)
1228 /* Allocate memory space for the matrix. */
1229 if (n_cols * n_rows > *maxcells)
1231 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1232 *maxcells = n_cols * n_rows;
1237 /* Build the matrix and calculate column totals. */
1239 union value *cur_col = cols;
1240 union value *cur_row = rows;
1242 double *cp = col_tot;
1243 struct table_entry **p;
1246 for (p = &tb[0]; p < te; p++)
1248 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1252 for (; cur_row < &rows[n_rows]; cur_row++)
1258 mp = &mat[cur_col - cols];
1261 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1268 *cp += *mp = (*p)->u.freq;
1273 /* Zero out the rest of the matrix. */
1274 for (; cur_row < &rows[n_rows]; cur_row++)
1280 if (cur_col < &cols[n_cols])
1282 const int rem_cols = n_cols - (cur_col - cols);
1285 for (c = 0; c < rem_cols; c++)
1287 mp = &mat[cur_col - cols];
1288 for (r = 0; r < n_rows; r++)
1290 for (c = 0; c < rem_cols; c++)
1292 mp += n_cols - rem_cols;
1300 double *tp = col_tot;
1302 assert (mode == INTEGER);
1303 mat = (*tb)->u.data;
1306 /* Calculate column totals. */
1307 for (c = 0; c < n_cols; c++)
1310 double *cp = &mat[c];
1312 for (r = 0; r < n_rows; r++)
1313 cum += cp[r * n_cols];
1321 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1322 ns_cols += *cp != 0.;
1325 /* Calculate row totals. */
1328 double *rp = row_tot;
1331 for (ns_rows = 0, r = n_rows; r--; )
1334 for (c = n_cols; c--; )
1342 /* Calculate grand total. */
1348 if (n_rows < n_cols)
1349 tp = row_tot, n = n_rows;
1351 tp = col_tot, n = n_cols;
1357 /* Find the first variable that differs from the last subtable,
1358 then display the values of the dimensioning variables for
1359 each table that needs it. */
1361 int first_difference = nvar - 1;
1364 for (; ; first_difference--)
1366 assert (first_difference >= 2);
1367 if (memcmp (&cmp->values[first_difference],
1368 &(*tb)->values[first_difference],
1369 sizeof *cmp->values))
1375 display_dimensions (table, first_difference, *tb);
1377 display_dimensions (chisq, first_difference, *tb);
1379 display_dimensions (sym, first_difference, *tb);
1381 display_dimensions (risk, first_difference, *tb);
1383 display_dimensions (direct, first_difference, *tb);
1387 display_crosstabulation ();
1388 if (cmd.miss == CRS_REPORT)
1393 display_symmetric ();
1397 display_directional ();
1408 tab_resize (chisq, 4 + (nvar - 2), -1);
1419 /* Delete missing rows and columns for statistical analysis when
1422 delete_missing (void)
1427 for (r = 0; r < n_rows; r++)
1428 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1432 for (c = 0; c < n_cols; c++)
1433 mat[c + r * n_cols] = 0.;
1441 for (c = 0; c < n_cols; c++)
1442 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1446 for (r = 0; r < n_rows; r++)
1447 mat[c + r * n_cols] = 0.;
1453 /* Prepare table T for submission, and submit it. */
1455 submit (struct tab_table *t)
1462 tab_resize (t, -1, 0);
1463 if (tab_nr (t) == tab_t (t))
1468 tab_offset (t, 0, 0);
1470 for (i = 2; i < nvar; i++)
1471 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1472 var_to_string (x->vars[i]));
1473 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1474 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1476 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1478 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1479 tab_dim (t, crosstabs_dim);
1483 /* Sets the widths of all the columns and heights of all the rows in
1484 table T for driver D. */
1486 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1490 /* Width of a numerical column. */
1491 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1492 if (cmd.miss == CRS_REPORT)
1493 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1495 /* Set width for header columns. */
1501 w = d->width - c * (t->nc - t->l);
1502 for (i = 0; i <= t->nc; i++)
1506 if (w < d->prop_em_width * 8)
1507 w = d->prop_em_width * 8;
1509 if (w > d->prop_em_width * 15)
1510 w = d->prop_em_width * 15;
1512 for (i = 0; i < t->l; i++)
1516 for (i = t->l; i < t->nc; i++)
1519 for (i = 0; i < t->nr; i++)
1520 t->h[i] = tab_natural_height (t, d, i);
1523 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1524 int *cnt, int pivot);
1525 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1526 int *cnt, int pivot);
1528 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1530 static struct table_entry **
1531 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1533 return (mode == GENERAL
1534 ? find_pivot_extent_general (tp, cnt, pivot)
1535 : find_pivot_extent_integer (tp, cnt, pivot));
1538 /* Find the extent of a region in TP that contains one table. If
1539 PIVOT != 0 that means a set of table entries with identical table
1540 number; otherwise they also have to have the same values for every
1541 dimension after the row and column dimensions. The table that is
1542 searched starts at TP and has length CNT. Returns the first entry
1543 after the last one in the table; sets *CNT to the number of
1544 remaining values. If there are no entries in TP at all, returns
1545 NULL. A yucky interface, admittedly, but it works. */
1546 static struct table_entry **
1547 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1549 struct table_entry *fp = *tp;
1554 x = xtab[(*tp)->table];
1562 if ((*tp)->table != fp->table)
1567 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1574 /* Integer mode correspondent to find_pivot_extent_general(). This
1575 could be optimized somewhat, but I just don't give a crap about
1576 CROSSTABS performance in integer mode, which is just a
1577 CROSSTABS wart as far as I'm concerned.
1579 That said, feel free to send optimization patches to me. */
1580 static struct table_entry **
1581 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1583 struct table_entry *fp = *tp;
1588 x = xtab[(*tp)->table];
1596 if ((*tp)->table != fp->table)
1601 if (memcmp (&(*tp)->values[2], &fp->values[2],
1602 sizeof (union value) * (x->nvar - 2)))
1609 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1610 result. WIDTH_ points to an int which is either 0 for a
1611 numeric value or a string width for a string value. */
1613 compare_value (const void *a_, const void *b_, const void *width_)
1615 const union value *a = a_;
1616 const union value *b = b_;
1617 const int *pwidth = width_;
1618 const int width = *pwidth;
1621 return (a->f < b->f) ? -1 : (a->f > b->f);
1623 return strncmp (a->s, b->s, width);
1626 /* Given an array of ENTRY_CNT table_entry structures starting at
1627 ENTRIES, creates a sorted list of the values that the variable
1628 with index VAR_IDX takes on. The values are returned as a
1629 malloc()'darray stored in *VALUES, with the number of values
1630 stored in *VALUE_CNT.
1633 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1634 union value **values, int *value_cnt)
1636 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1638 if (mode == GENERAL)
1640 int width = var_get_width (v);
1643 *values = xnmalloc (entry_cnt, sizeof **values);
1644 for (i = 0; i < entry_cnt; i++)
1645 (*values)[i] = entries[i]->values[var_idx];
1646 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1647 compare_value, &width);
1651 struct var_range *vr = get_var_range (v);
1654 assert (mode == INTEGER);
1655 *values = xnmalloc (vr->count, sizeof **values);
1656 for (i = 0; i < vr->count; i++)
1657 (*values)[i].f = i + vr->min;
1658 *value_cnt = vr->count;
1662 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1663 from V, displayed with print format spec from variable VAR. When
1664 in REPORT missing-value mode, missing values have an M appended. */
1666 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1667 const union value *v, const struct variable *var)
1670 const struct fmt_spec *print = var_get_print_format (var);
1672 const char *label = var_lookup_value_label (var, v);
1675 tab_text (table, c, r, TAB_LEFT, label);
1679 s.string = tab_alloc (table, print->w);
1680 format_short (s.string, print, v);
1681 s.length = strlen (s.string);
1682 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1683 s.string[s.length++] = 'M';
1684 while (s.length && *s.string == ' ')
1689 tab_raw (table, c, r, opt, &s);
1692 /* Draws a line across TABLE at the current row to indicate the most
1693 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1694 that changed, and puts the values that changed into the table. TB
1695 and X must be the corresponding table_entry and crosstab,
1698 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1700 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1702 for (; first_difference >= 2; first_difference--)
1703 table_value_missing (table, nvar - first_difference - 1, 0,
1704 TAB_RIGHT, &tb->values[first_difference],
1705 x->vars[first_difference]);
1708 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1709 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1710 additionally suffixed with a letter `M'. */
1712 format_cell_entry (struct tab_table *table, int c, int r, double value,
1713 char suffix, bool mark_missing)
1715 const struct fmt_spec f = {FMT_F, 10, 1};
1720 s.string = tab_alloc (table, 16);
1722 data_out (&v, &f, s.string);
1723 while (*s.string == ' ')
1729 s.string[s.length++] = suffix;
1731 s.string[s.length++] = 'M';
1733 tab_raw (table, c, r, TAB_RIGHT, &s);
1736 /* Displays the crosstabulation table. */
1738 display_crosstabulation (void)
1743 for (r = 0; r < n_rows; r++)
1744 table_value_missing (table, nvar - 2, r * num_cells,
1745 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1747 tab_text (table, nvar - 2, n_rows * num_cells,
1748 TAB_LEFT, _("Total"));
1750 /* Put in the actual cells. */
1755 tab_offset (table, nvar - 1, -1);
1756 for (r = 0; r < n_rows; r++)
1759 tab_hline (table, TAL_1, -1, n_cols, 0);
1760 for (c = 0; c < n_cols; c++)
1762 bool mark_missing = false;
1763 double expected_value = row_tot[r] * col_tot[c] / W;
1764 if (cmd.miss == CRS_REPORT
1765 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1766 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1768 mark_missing = true;
1769 for (i = 0; i < num_cells; i++)
1780 v = *mp / row_tot[r] * 100.;
1784 v = *mp / col_tot[c] * 100.;
1791 case CRS_CL_EXPECTED:
1794 case CRS_CL_RESIDUAL:
1795 v = *mp - expected_value;
1797 case CRS_CL_SRESIDUAL:
1798 v = (*mp - expected_value) / sqrt (expected_value);
1800 case CRS_CL_ASRESIDUAL:
1801 v = ((*mp - expected_value)
1802 / sqrt (expected_value
1803 * (1. - row_tot[r] / W)
1804 * (1. - col_tot[c] / W)));
1810 format_cell_entry (table, c, i, v, suffix, mark_missing);
1816 tab_offset (table, -1, tab_row (table) + num_cells);
1824 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1825 for (r = 0; r < n_rows; r++)
1828 bool mark_missing = false;
1830 if (cmd.miss == CRS_REPORT
1831 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1832 mark_missing = true;
1834 for (i = 0; i < num_cells; i++)
1848 v = row_tot[r] / W * 100.;
1852 v = row_tot[r] / W * 100.;
1855 case CRS_CL_EXPECTED:
1856 case CRS_CL_RESIDUAL:
1857 case CRS_CL_SRESIDUAL:
1858 case CRS_CL_ASRESIDUAL:
1865 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1866 tab_next_row (table);
1871 /* Column totals, grand total. */
1877 tab_hline (table, TAL_1, -1, n_cols, 0);
1878 for (c = 0; c <= n_cols; c++)
1880 double ct = c < n_cols ? col_tot[c] : W;
1881 bool mark_missing = false;
1885 if (cmd.miss == CRS_REPORT && c < n_cols
1886 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1887 mark_missing = true;
1889 for (i = 0; i < num_cells; i++)
1911 case CRS_CL_EXPECTED:
1912 case CRS_CL_RESIDUAL:
1913 case CRS_CL_SRESIDUAL:
1914 case CRS_CL_ASRESIDUAL:
1920 format_cell_entry (table, c, i, v, suffix, mark_missing);
1925 tab_offset (table, -1, tab_row (table) + last_row);
1928 tab_offset (table, 0, -1);
1931 static void calc_r (double *X, double *Y, double *, double *, double *);
1932 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1934 /* Display chi-square statistics. */
1936 display_chisq (void)
1938 static const char *chisq_stats[N_CHISQ] =
1940 N_("Pearson Chi-Square"),
1941 N_("Likelihood Ratio"),
1942 N_("Fisher's Exact Test"),
1943 N_("Continuity Correction"),
1944 N_("Linear-by-Linear Association"),
1946 double chisq_v[N_CHISQ];
1947 double fisher1, fisher2;
1953 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1955 tab_offset (chisq, nvar - 2, -1);
1957 for (i = 0; i < N_CHISQ; i++)
1959 if ((i != 2 && chisq_v[i] == SYSMIS)
1960 || (i == 2 && fisher1 == SYSMIS))
1964 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1967 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1968 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1969 tab_float (chisq, 3, 0, TAB_RIGHT,
1970 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1975 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1976 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1978 tab_next_row (chisq);
1981 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1982 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1983 tab_next_row (chisq);
1985 tab_offset (chisq, 0, -1);
1988 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1989 double[N_SYMMETRIC]);
1991 /* Display symmetric measures. */
1993 display_symmetric (void)
1995 static const char *categories[] =
1997 N_("Nominal by Nominal"),
1998 N_("Ordinal by Ordinal"),
1999 N_("Interval by Interval"),
2000 N_("Measure of Agreement"),
2003 static const char *stats[N_SYMMETRIC] =
2007 N_("Contingency Coefficient"),
2008 N_("Kendall's tau-b"),
2009 N_("Kendall's tau-c"),
2011 N_("Spearman Correlation"),
2016 static const int stats_categories[N_SYMMETRIC] =
2018 0, 0, 0, 1, 1, 1, 1, 2, 3,
2022 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2025 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2028 tab_offset (sym, nvar - 2, -1);
2030 for (i = 0; i < N_SYMMETRIC; i++)
2032 if (sym_v[i] == SYSMIS)
2035 if (stats_categories[i] != last_cat)
2037 last_cat = stats_categories[i];
2038 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2041 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2042 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2043 if (sym_ase[i] != SYSMIS)
2044 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2045 if (sym_t[i] != SYSMIS)
2046 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2047 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2051 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2052 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2055 tab_offset (sym, 0, -1);
2058 static int calc_risk (double[], double[], double[], union value *);
2060 /* Display risk estimate. */
2065 double risk_v[3], lower[3], upper[3];
2069 if (!calc_risk (risk_v, upper, lower, c))
2072 tab_offset (risk, nvar - 2, -1);
2074 for (i = 0; i < 3; i++)
2076 if (risk_v[i] == SYSMIS)
2082 if (var_is_numeric (x->vars[COL_VAR]))
2083 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2084 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2086 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2087 var_get_name (x->vars[COL_VAR]),
2088 var_get_width (x->vars[COL_VAR]), c[0].s,
2089 var_get_width (x->vars[COL_VAR]), c[1].s);
2093 if (var_is_numeric (x->vars[ROW_VAR]))
2094 sprintf (buf, _("For cohort %s = %g"),
2095 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2097 sprintf (buf, _("For cohort %s = %.*s"),
2098 var_get_name (x->vars[ROW_VAR]),
2099 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2103 tab_text (risk, 0, 0, TAB_LEFT, buf);
2104 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2105 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2106 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2107 tab_next_row (risk);
2110 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2111 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2112 tab_next_row (risk);
2114 tab_offset (risk, 0, -1);
2117 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2118 double[N_DIRECTIONAL]);
2120 /* Display directional measures. */
2122 display_directional (void)
2124 static const char *categories[] =
2126 N_("Nominal by Nominal"),
2127 N_("Ordinal by Ordinal"),
2128 N_("Nominal by Interval"),
2131 static const char *stats[] =
2134 N_("Goodman and Kruskal tau"),
2135 N_("Uncertainty Coefficient"),
2140 static const char *types[] =
2147 static const int stats_categories[N_DIRECTIONAL] =
2149 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2152 static const int stats_stats[N_DIRECTIONAL] =
2154 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2157 static const int stats_types[N_DIRECTIONAL] =
2159 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2162 static const int *stats_lookup[] =
2169 static const char **stats_names[] =
2181 double direct_v[N_DIRECTIONAL];
2182 double direct_ase[N_DIRECTIONAL];
2183 double direct_t[N_DIRECTIONAL];
2187 if (!calc_directional (direct_v, direct_ase, direct_t))
2190 tab_offset (direct, nvar - 2, -1);
2192 for (i = 0; i < N_DIRECTIONAL; i++)
2194 if (direct_v[i] == SYSMIS)
2200 for (j = 0; j < 3; j++)
2201 if (last[j] != stats_lookup[j][i])
2204 tab_hline (direct, TAL_1, j, 6, 0);
2209 int k = last[j] = stats_lookup[j][i];
2214 string = var_get_name (x->vars[0]);
2216 string = var_get_name (x->vars[1]);
2218 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2219 gettext (stats_names[j][k]), string);
2224 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2225 if (direct_ase[i] != SYSMIS)
2226 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2227 if (direct_t[i] != SYSMIS)
2228 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2229 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2230 tab_next_row (direct);
2233 tab_offset (direct, 0, -1);
2236 /* Statistical calculations. */
2238 /* Returns the value of the gamma (factorial) function for an integer
2241 gamma_int (double x)
2246 for (i = 2; i < x; i++)
2251 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2253 static inline double
2254 Pr (int a, int b, int c, int d)
2256 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2257 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2258 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2259 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2260 / gamma_int (a + b + c + d + 1.));
2263 /* Swap the contents of A and B. */
2265 swap (int *a, int *b)
2272 /* Calculate significance for Fisher's exact test as specified in
2273 _SPSS Statistical Algorithms_, Appendix 5. */
2275 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2279 if (MIN (c, d) < MIN (a, b))
2280 swap (&a, &c), swap (&b, &d);
2281 if (MIN (b, d) < MIN (a, c))
2282 swap (&a, &b), swap (&c, &d);
2286 swap (&a, &b), swap (&c, &d);
2288 swap (&a, &c), swap (&b, &d);
2292 for (x = 0; x <= a; x++)
2293 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2295 *fisher2 = *fisher1;
2296 for (x = 1; x <= b; x++)
2297 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2300 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2301 columns with values COLS and N_ROWS rows with values ROWS. Values
2302 in the matrix sum to W. */
2304 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2305 double *fisher1, double *fisher2)
2309 chisq[0] = chisq[1] = 0.;
2310 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2311 *fisher1 = *fisher2 = SYSMIS;
2313 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2315 if (ns_rows <= 1 || ns_cols <= 1)
2317 chisq[0] = chisq[1] = SYSMIS;
2321 for (r = 0; r < n_rows; r++)
2322 for (c = 0; c < n_cols; c++)
2324 const double expected = row_tot[r] * col_tot[c] / W;
2325 const double freq = mat[n_cols * r + c];
2326 const double residual = freq - expected;
2328 chisq[0] += residual * residual / expected;
2330 chisq[1] += freq * log (expected / freq);
2341 /* Calculate Yates and Fisher exact test. */
2342 if (ns_cols == 2 && ns_rows == 2)
2344 double f11, f12, f21, f22;
2350 for (i = j = 0; i < n_cols; i++)
2351 if (col_tot[i] != 0.)
2360 f11 = mat[nz_cols[0]];
2361 f12 = mat[nz_cols[1]];
2362 f21 = mat[nz_cols[0] + n_cols];
2363 f22 = mat[nz_cols[1] + n_cols];
2368 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2371 chisq[3] = (W * x * x
2372 / (f11 + f12) / (f21 + f22)
2373 / (f11 + f21) / (f12 + f22));
2381 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2382 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2385 /* Calculate Mantel-Haenszel. */
2386 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2388 double r, ase_0, ase_1;
2389 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2391 chisq[4] = (W - 1.) * r * r;
2396 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2397 ASE_1, and ase_0 into ASE_0. The row and column values must be
2398 passed in X and Y. */
2400 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2402 double SX, SY, S, T;
2404 double sum_XYf, sum_X2Y2f;
2405 double sum_Xr, sum_X2r;
2406 double sum_Yc, sum_Y2c;
2409 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2410 for (j = 0; j < n_cols; j++)
2412 double fij = mat[j + i * n_cols];
2413 double product = X[i] * Y[j];
2414 double temp = fij * product;
2416 sum_X2Y2f += temp * product;
2419 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2421 sum_Xr += X[i] * row_tot[i];
2422 sum_X2r += X[i] * X[i] * row_tot[i];
2426 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2428 sum_Yc += Y[i] * col_tot[i];
2429 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2433 S = sum_XYf - sum_Xr * sum_Yc / W;
2434 SX = sum_X2r - sum_Xr * sum_Xr / W;
2435 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2438 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2443 for (s = c = 0., i = 0; i < n_rows; i++)
2444 for (j = 0; j < n_cols; j++)
2446 double Xresid, Yresid;
2449 Xresid = X[i] - Xbar;
2450 Yresid = Y[j] - Ybar;
2451 temp = (T * Xresid * Yresid
2453 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2454 y = mat[j + i * n_cols] * temp * temp - c;
2459 *ase_1 = sqrt (s) / (T * T);
2463 static double somers_d_v[3];
2464 static double somers_d_ase[3];
2465 static double somers_d_t[3];
2467 /* Calculate symmetric statistics and their asymptotic standard
2468 errors. Returns 0 if none could be calculated. */
2470 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2471 double t[N_SYMMETRIC])
2473 int q = MIN (ns_rows, ns_cols);
2482 for (i = 0; i < N_SYMMETRIC; i++)
2483 v[i] = ase[i] = t[i] = SYSMIS;
2486 /* Phi, Cramer's V, contingency coefficient. */
2487 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2489 double Xp = 0.; /* Pearson chi-square. */
2494 for (r = 0; r < n_rows; r++)
2495 for (c = 0; c < n_cols; c++)
2497 const double expected = row_tot[r] * col_tot[c] / W;
2498 const double freq = mat[n_cols * r + c];
2499 const double residual = freq - expected;
2501 Xp += residual * residual / expected;
2505 if (cmd.a_statistics[CRS_ST_PHI])
2507 v[0] = sqrt (Xp / W);
2508 v[1] = sqrt (Xp / (W * (q - 1)));
2510 if (cmd.a_statistics[CRS_ST_CC])
2511 v[2] = sqrt (Xp / (Xp + W));
2514 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2515 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2520 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2527 for (r = 0; r < n_rows; r++)
2528 Dr -= row_tot[r] * row_tot[r];
2529 for (c = 0; c < n_cols; c++)
2530 Dc -= col_tot[c] * col_tot[c];
2536 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2537 for (c = 0; c < n_cols; c++)
2541 for (r = 0; r < n_rows; r++)
2542 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2552 for (i = 0; i < n_rows; i++)
2556 for (j = 1; j < n_cols; j++)
2557 Cij += col_tot[j] - cum[j + i * n_cols];
2560 for (j = 1; j < n_cols; j++)
2561 Dij += cum[j + (i - 1) * n_cols];
2565 double fij = mat[j + i * n_cols];
2571 assert (j < n_cols);
2573 Cij -= col_tot[j] - cum[j + i * n_cols];
2574 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2578 Cij += cum[j - 1 + (i - 1) * n_cols];
2579 Dij -= cum[j + (i - 1) * n_cols];
2585 if (cmd.a_statistics[CRS_ST_BTAU])
2586 v[3] = (P - Q) / sqrt (Dr * Dc);
2587 if (cmd.a_statistics[CRS_ST_CTAU])
2588 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2589 if (cmd.a_statistics[CRS_ST_GAMMA])
2590 v[5] = (P - Q) / (P + Q);
2592 /* ASE for tau-b, tau-c, gamma. Calculations could be
2593 eliminated here, at expense of memory. */
2598 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2599 for (i = 0; i < n_rows; i++)
2603 for (j = 1; j < n_cols; j++)
2604 Cij += col_tot[j] - cum[j + i * n_cols];
2607 for (j = 1; j < n_cols; j++)
2608 Dij += cum[j + (i - 1) * n_cols];
2612 double fij = mat[j + i * n_cols];
2614 if (cmd.a_statistics[CRS_ST_BTAU])
2616 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2617 + v[3] * (row_tot[i] * Dc
2618 + col_tot[j] * Dr));
2619 btau_cum += fij * temp * temp;
2623 const double temp = Cij - Dij;
2624 ctau_cum += fij * temp * temp;
2627 if (cmd.a_statistics[CRS_ST_GAMMA])
2629 const double temp = Q * Cij - P * Dij;
2630 gamma_cum += fij * temp * temp;
2633 if (cmd.a_statistics[CRS_ST_D])
2635 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2636 - (P - Q) * (W - row_tot[i]));
2637 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2638 - (Q - P) * (W - col_tot[j]));
2643 assert (j < n_cols);
2645 Cij -= col_tot[j] - cum[j + i * n_cols];
2646 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2650 Cij += cum[j - 1 + (i - 1) * n_cols];
2651 Dij -= cum[j + (i - 1) * n_cols];
2657 btau_var = ((btau_cum
2658 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2660 if (cmd.a_statistics[CRS_ST_BTAU])
2662 ase[3] = sqrt (btau_var);
2663 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2666 if (cmd.a_statistics[CRS_ST_CTAU])
2668 ase[4] = ((2 * q / ((q - 1) * W * W))
2669 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2670 t[4] = v[4] / ase[4];
2672 if (cmd.a_statistics[CRS_ST_GAMMA])
2674 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2675 t[5] = v[5] / (2. / (P + Q)
2676 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2678 if (cmd.a_statistics[CRS_ST_D])
2680 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2681 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2682 somers_d_t[0] = (somers_d_v[0]
2684 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2685 somers_d_v[1] = (P - Q) / Dc;
2686 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2687 somers_d_t[1] = (somers_d_v[1]
2689 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2690 somers_d_v[2] = (P - Q) / Dr;
2691 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2692 somers_d_t[2] = (somers_d_v[2]
2694 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2700 /* Spearman correlation, Pearson's r. */
2701 if (cmd.a_statistics[CRS_ST_CORR])
2703 double *R = local_alloc (sizeof *R * n_rows);
2704 double *C = local_alloc (sizeof *C * n_cols);
2707 double y, t, c = 0., s = 0.;
2712 R[i] = s + (row_tot[i] + 1.) / 2.;
2719 assert (i < n_rows);
2724 double y, t, c = 0., s = 0.;
2729 C[j] = s + (col_tot[j] + 1.) / 2;
2736 assert (j < n_cols);
2740 calc_r (R, C, &v[6], &t[6], &ase[6]);
2746 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2750 /* Cohen's kappa. */
2751 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2753 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2756 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2757 i < ns_rows; i++, j++)
2761 while (col_tot[j] == 0.)
2764 prod = row_tot[i] * col_tot[j];
2765 sum = row_tot[i] + col_tot[j];
2767 sum_fii += mat[j + i * n_cols];
2769 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2770 sum_riciri_ci += prod * sum;
2772 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2773 for (j = 0; j < ns_cols; j++)
2775 double sum = row_tot[i] + col_tot[j];
2776 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2779 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2781 ase[8] = sqrt ((W * W * sum_rici
2782 + sum_rici * sum_rici
2783 - W * sum_riciri_ci)
2784 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2786 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2787 / pow2 (W * W - sum_rici))
2788 + ((2. * (W - sum_fii)
2789 * (2. * sum_fii * sum_rici
2790 - W * sum_fiiri_ci))
2791 / cube (W * W - sum_rici))
2792 + (pow2 (W - sum_fii)
2793 * (W * sum_fijri_ci2 - 4.
2794 * sum_rici * sum_rici)
2795 / pow4 (W * W - sum_rici))));
2797 t[8] = v[8] / ase[8];
2804 /* Calculate risk estimate. */
2806 calc_risk (double *value, double *upper, double *lower, union value *c)
2808 double f11, f12, f21, f22;
2814 for (i = 0; i < 3; i++)
2815 value[i] = upper[i] = lower[i] = SYSMIS;
2818 if (ns_rows != 2 || ns_cols != 2)
2825 for (i = j = 0; i < n_cols; i++)
2826 if (col_tot[i] != 0.)
2835 f11 = mat[nz_cols[0]];
2836 f12 = mat[nz_cols[1]];
2837 f21 = mat[nz_cols[0] + n_cols];
2838 f22 = mat[nz_cols[1] + n_cols];
2840 c[0] = cols[nz_cols[0]];
2841 c[1] = cols[nz_cols[1]];
2844 value[0] = (f11 * f22) / (f12 * f21);
2845 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2846 lower[0] = value[0] * exp (-1.960 * v);
2847 upper[0] = value[0] * exp (1.960 * v);
2849 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2850 v = sqrt ((f12 / (f11 * (f11 + f12)))
2851 + (f22 / (f21 * (f21 + f22))));
2852 lower[1] = value[1] * exp (-1.960 * v);
2853 upper[1] = value[1] * exp (1.960 * v);
2855 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2856 v = sqrt ((f11 / (f12 * (f11 + f12)))
2857 + (f21 / (f22 * (f21 + f22))));
2858 lower[2] = value[2] * exp (-1.960 * v);
2859 upper[2] = value[2] * exp (1.960 * v);
2864 /* Calculate directional measures. */
2866 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2867 double t[N_DIRECTIONAL])
2872 for (i = 0; i < N_DIRECTIONAL; i++)
2873 v[i] = ase[i] = t[i] = SYSMIS;
2877 if (cmd.a_statistics[CRS_ST_LAMBDA])
2879 double *fim = xnmalloc (n_rows, sizeof *fim);
2880 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2881 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2882 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2883 double sum_fim, sum_fmj;
2885 int rm_index, cm_index;
2888 /* Find maximum for each row and their sum. */
2889 for (sum_fim = 0., i = 0; i < n_rows; i++)
2891 double max = mat[i * n_cols];
2894 for (j = 1; j < n_cols; j++)
2895 if (mat[j + i * n_cols] > max)
2897 max = mat[j + i * n_cols];
2901 sum_fim += fim[i] = max;
2902 fim_index[i] = index;
2905 /* Find maximum for each column. */
2906 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2908 double max = mat[j];
2911 for (i = 1; i < n_rows; i++)
2912 if (mat[j + i * n_cols] > max)
2914 max = mat[j + i * n_cols];
2918 sum_fmj += fmj[j] = max;
2919 fmj_index[j] = index;
2922 /* Find maximum row total. */
2925 for (i = 1; i < n_rows; i++)
2926 if (row_tot[i] > rm)
2932 /* Find maximum column total. */
2935 for (j = 1; j < n_cols; j++)
2936 if (col_tot[j] > cm)
2942 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2943 v[1] = (sum_fmj - rm) / (W - rm);
2944 v[2] = (sum_fim - cm) / (W - cm);
2946 /* ASE1 for Y given X. */
2950 for (accum = 0., i = 0; i < n_rows; i++)
2951 for (j = 0; j < n_cols; j++)
2953 const int deltaj = j == cm_index;
2954 accum += (mat[j + i * n_cols]
2955 * pow2 ((j == fim_index[i])
2960 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2963 /* ASE0 for Y given X. */
2967 for (accum = 0., i = 0; i < n_rows; i++)
2968 if (cm_index != fim_index[i])
2969 accum += (mat[i * n_cols + fim_index[i]]
2970 + mat[i * n_cols + cm_index]);
2971 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2974 /* ASE1 for X given Y. */
2978 for (accum = 0., i = 0; i < n_rows; i++)
2979 for (j = 0; j < n_cols; j++)
2981 const int deltaj = i == rm_index;
2982 accum += (mat[j + i * n_cols]
2983 * pow2 ((i == fmj_index[j])
2988 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2991 /* ASE0 for X given Y. */
2995 for (accum = 0., j = 0; j < n_cols; j++)
2996 if (rm_index != fmj_index[j])
2997 accum += (mat[j + n_cols * fmj_index[j]]
2998 + mat[j + n_cols * rm_index]);
2999 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3002 /* Symmetric ASE0 and ASE1. */
3007 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3008 for (j = 0; j < n_cols; j++)
3010 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3011 int temp1 = (i == rm_index) + (j == cm_index);
3012 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3013 accum1 += (mat[j + i * n_cols]
3014 * pow2 (temp0 + (v[0] - 1.) * temp1));
3016 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3017 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3018 / (2. * W - rm - cm));
3027 double sum_fij2_ri, sum_fij2_ci;
3028 double sum_ri2, sum_cj2;
3030 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3031 for (j = 0; j < n_cols; j++)
3033 double temp = pow2 (mat[j + i * n_cols]);
3034 sum_fij2_ri += temp / row_tot[i];
3035 sum_fij2_ci += temp / col_tot[j];
3038 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3039 sum_ri2 += row_tot[i] * row_tot[i];
3041 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3042 sum_cj2 += col_tot[j] * col_tot[j];
3044 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3045 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3049 if (cmd.a_statistics[CRS_ST_UC])
3051 double UX, UY, UXY, P;
3052 double ase1_yx, ase1_xy, ase1_sym;
3055 for (UX = 0., i = 0; i < n_rows; i++)
3056 if (row_tot[i] > 0.)
3057 UX -= row_tot[i] / W * log (row_tot[i] / W);
3059 for (UY = 0., j = 0; j < n_cols; j++)
3060 if (col_tot[j] > 0.)
3061 UY -= col_tot[j] / W * log (col_tot[j] / W);
3063 for (UXY = P = 0., i = 0; i < n_rows; i++)
3064 for (j = 0; j < n_cols; j++)
3066 double entry = mat[j + i * n_cols];
3071 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3072 UXY -= entry / W * log (entry / W);
3075 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3076 for (j = 0; j < n_cols; j++)
3078 double entry = mat[j + i * n_cols];
3083 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3084 + (UX - UXY) * log (col_tot[j] / W));
3085 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3086 + (UY - UXY) * log (row_tot[i] / W));
3087 ase1_sym += entry * pow2 ((UXY
3088 * log (row_tot[i] * col_tot[j] / (W * W)))
3089 - (UX + UY) * log (entry / W));
3092 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3093 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3094 t[5] = v[5] / ((2. / (W * (UX + UY)))
3095 * sqrt (P - pow2 (UX + UY - UXY) / W));
3097 v[6] = (UX + UY - UXY) / UX;
3098 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3099 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3101 v[7] = (UX + UY - UXY) / UY;
3102 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3103 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3107 if (cmd.a_statistics[CRS_ST_D])
3112 calc_symmetric (NULL, NULL, NULL);
3113 for (i = 0; i < 3; i++)
3115 v[8 + i] = somers_d_v[i];
3116 ase[8 + i] = somers_d_ase[i];
3117 t[8 + i] = somers_d_t[i];
3122 if (cmd.a_statistics[CRS_ST_ETA])
3125 double sum_Xr, sum_X2r;
3129 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3131 sum_Xr += rows[i].f * row_tot[i];
3132 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3134 SX = sum_X2r - sum_Xr * sum_Xr / W;
3136 for (SXW = 0., j = 0; j < n_cols; j++)
3140 for (cum = 0., i = 0; i < n_rows; i++)
3142 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3143 cum += rows[i].f * mat[j + i * n_cols];
3146 SXW -= cum * cum / col_tot[j];
3148 v[11] = sqrt (1. - SXW / SX);
3152 double sum_Yc, sum_Y2c;
3156 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3158 sum_Yc += cols[i].f * col_tot[i];
3159 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3161 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3163 for (SYW = 0., i = 0; i < n_rows; i++)
3167 for (cum = 0., j = 0; j < n_cols; j++)
3169 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3170 cum += cols[j].f * mat[j + i * n_cols];
3173 SYW -= cum * cum / row_tot[i];
3175 v[12] = sqrt (1. - SYW / SY);
3182 /* A wrapper around data_out() that limits string output to short
3183 string width and null terminates the result. */
3185 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3187 struct fmt_spec fmt_subst;
3189 /* Limit to short string width. */
3190 if (fmt_is_string (fp->type))
3194 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3195 if (fmt_subst.type == FMT_A)
3196 fmt_subst.w = MIN (8, fmt_subst.w);
3198 fmt_subst.w = MIN (16, fmt_subst.w);
3204 data_out (v, fp, s);
3206 /* Null terminate. */