1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/data-out.h>
41 #include <data/dictionary.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
64 #define _(msgid) gettext (msgid)
65 #define N_(msgid) msgid
73 missing=miss:!table/include/report;
74 +write[wr_]=none,cells,all;
75 +format=fmt:!labels/nolabels/novallabs,
78 tabl:!tables/notables,
81 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
83 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
89 /* Number of chi-square statistics. */
92 /* Number of symmetric statistics. */
95 /* Number of directional statistics. */
96 #define N_DIRECTIONAL 13
98 /* A single table entry for general mode. */
101 int table; /* Flattened table number. */
104 double freq; /* Frequency count. */
105 double *data; /* Crosstabulation table for integer mode. */
108 union value values[1]; /* Values. */
111 /* A crosstabulation. */
114 int nvar; /* Number of variables. */
115 double missing; /* Missing cases count. */
116 int ofs; /* Integer mode: Offset into sorted_tab[]. */
117 struct variable *vars[2]; /* At least two variables; sorted by
118 larger indices first. */
121 /* Integer mode variable info. */
124 int min; /* Minimum value. */
125 int max; /* Maximum value + 1. */
126 int count; /* max - min. */
129 static inline struct var_range *
130 get_var_range (struct variable *v)
133 assert (v->aux != NULL);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (const struct ccase *, void *, const struct dataset *);
181 static bool calc_general (const struct ccase *, void *, const struct dataset *);
182 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
183 static bool postcalc (void *, const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
213 pl_tc = pool_create ();
214 pl_col = pool_create ();
216 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
219 mode = variables ? INTEGER : GENERAL;
224 cmd.a_cells[CRS_CL_COUNT] = 1;
230 for (i = 0; i < CRS_CL_count; i++)
235 cmd.a_cells[CRS_CL_COUNT] = 1;
236 cmd.a_cells[CRS_CL_ROW] = 1;
237 cmd.a_cells[CRS_CL_COLUMN] = 1;
238 cmd.a_cells[CRS_CL_TOTAL] = 1;
240 if (cmd.a_cells[CRS_CL_ALL])
242 for (i = 0; i < CRS_CL_count; i++)
244 cmd.a_cells[CRS_CL_ALL] = 0;
246 cmd.a_cells[CRS_CL_NONE] = 0;
248 for (num_cells = i = 0; i < CRS_CL_count; i++)
250 cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
297 ok = procedure_with_splits (ds, precalc,
298 mode == GENERAL ? calc_general : calc_integer,
301 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
304 /* Parses the TABLES subcommand. */
306 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
308 struct var_set *var_set;
310 struct variable ***by = NULL;
311 size_t *by_nvar = NULL;
315 /* Ensure that this is a TABLES subcommand. */
316 if (!lex_match_id (lexer, "TABLES")
317 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
318 && lex_token (lexer) != T_ALL)
320 lex_match (lexer, '=');
322 if (variables != NULL)
323 var_set = var_set_create_from_array (variables, variables_cnt);
325 var_set = var_set_create_from_dict (dataset_dict (ds));
326 assert (var_set != NULL);
330 by = xnrealloc (by, n_by + 1, sizeof *by);
331 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
332 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
333 PV_NO_DUPLICATE | PV_NO_SCRATCH))
335 if (xalloc_oversized (nx, by_nvar[n_by]))
337 msg (SE, _("Too many crosstabulation variables or dimensions."));
343 if (!lex_match (lexer, T_BY))
347 lex_error (lexer, _("expecting BY"));
356 int *by_iter = xcalloc (n_by, sizeof *by_iter);
359 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
360 for (i = 0; i < nx; i++)
364 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
371 for (i = 0; i < n_by; i++)
372 x->vars[i] = by[i][by_iter[i]];
378 for (i = n_by - 1; i >= 0; i--)
380 if (++by_iter[i] < by_nvar[i])
393 /* All return paths lead here. */
397 for (i = 0; i < n_by; i++)
403 var_set_destroy (var_set);
408 /* Parses the VARIABLES subcommand. */
410 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
414 msg (SE, _("VARIABLES must be specified before TABLES."));
418 lex_match (lexer, '=');
422 size_t orig_nv = variables_cnt;
427 if (!parse_variables (lexer, dataset_dict (ds),
428 &variables, &variables_cnt,
429 (PV_APPEND | PV_NUMERIC
430 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 if (lex_token (lexer) != '(')
435 lex_error (lexer, "expecting `('");
440 if (!lex_force_int (lexer))
442 min = lex_integer (lexer);
445 lex_match (lexer, ',');
447 if (!lex_force_int (lexer))
449 max = lex_integer (lexer);
452 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 if (lex_token (lexer) != ')')
460 lex_error (lexer, "expecting `)'");
465 for (i = orig_nv; i < variables_cnt; i++)
467 struct var_range *vr = xmalloc (sizeof *vr);
470 vr->count = max - min + 1;
471 var_attach_aux (variables[i], vr, var_dtor_free);
474 if (lex_token (lexer) == '/')
486 /* Data file processing. */
488 static int compare_table_entry (const void *, const void *, const void *);
489 static unsigned hash_table_entry (const void *, const void *);
491 /* Set up the crosstabulation tables for processing. */
493 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
495 output_split_file_values (ds, first);
498 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
508 for (i = 0; i < nxtab; i++)
510 struct crosstab *x = xtab[i];
515 x->ofs = n_sorted_tab;
517 for (j = 2; j < x->nvar; j++)
518 count *= get_var_range (x->vars[j - 2])->count;
520 sorted_tab = xnrealloc (sorted_tab,
521 n_sorted_tab + count, sizeof *sorted_tab);
522 v = local_alloc (sizeof *v * x->nvar);
523 for (j = 2; j < x->nvar; j++)
524 v[j] = get_var_range (x->vars[j])->min;
525 for (j = 0; j < count; j++)
527 struct table_entry *te;
530 te = sorted_tab[n_sorted_tab++]
531 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
535 int row_cnt = get_var_range (x->vars[0])->count;
536 int col_cnt = get_var_range (x->vars[1])->count;
537 const int mat_size = row_cnt * col_cnt;
540 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
541 for (m = 0; m < mat_size; m++)
545 for (k = 2; k < x->nvar; k++)
546 te->values[k].f = v[k];
547 for (k = 2; k < x->nvar; k++)
549 struct var_range *vr = get_var_range (x->vars[k]);
550 if (++v[k] >= vr->max)
559 sorted_tab = xnrealloc (sorted_tab,
560 n_sorted_tab + 1, sizeof *sorted_tab);
561 sorted_tab[n_sorted_tab] = NULL;
566 /* Form crosstabulations for general mode. */
568 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
570 bool bad_warn = true;
573 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
575 /* Flattened current table index. */
578 for (t = 0; t < nxtab; t++)
580 struct crosstab *x = xtab[t];
581 const size_t entry_size = (sizeof (struct table_entry)
582 + sizeof (union value) * (x->nvar - 1));
583 struct table_entry *te = local_alloc (entry_size);
585 /* Construct table entry for the current record and table. */
591 for (j = 0; j < x->nvar; j++)
593 const union value *v = case_data (c, x->vars[j]->fv);
594 const struct missing_values *mv = &x->vars[j]->miss;
595 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
596 || (cmd.miss == CRS_INCLUDE
597 && mv_is_value_system_missing (mv, v)))
599 x->missing += weight;
603 if (x->vars[j]->type == NUMERIC)
604 te->values[j].f = case_num (c, x->vars[j]->fv);
607 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
610 /* Necessary in order to simplify comparisons. */
611 memset (&te->values[j].s[x->vars[j]->width], 0,
612 sizeof (union value) - x->vars[j]->width);
617 /* Add record to hash table. */
619 struct table_entry **tepp
620 = (struct table_entry **) hsh_probe (gen_tab, te);
623 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
626 memcpy (tep, te, entry_size);
631 (*tepp)->u.freq += weight;
642 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
644 bool bad_warn = true;
647 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
649 /* Flattened current table index. */
652 for (t = 0; t < nxtab; t++)
654 struct crosstab *x = xtab[t];
659 for (i = 0; i < x->nvar; i++)
661 struct variable *const v = x->vars[i];
662 struct var_range *vr = get_var_range (v);
663 double value = case_num (c, v->fv);
665 /* Note that the first test also rules out SYSMIS. */
666 if ((value < vr->min || value >= vr->max)
667 || (cmd.miss == CRS_TABLE
668 && mv_is_num_user_missing (&v->miss, value)))
670 x->missing += weight;
676 ofs += fact * ((int) value - vr->min);
682 struct variable *row_var = x->vars[ROW_VAR];
683 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
685 struct variable *col_var = x->vars[COL_VAR];
686 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
688 const int col_dim = get_var_range (col_var)->count;
690 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
699 /* Compare the table_entry's at A and B and return a strcmp()-type
702 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
704 const struct table_entry *a = a_;
705 const struct table_entry *b = b_;
707 if (a->table > b->table)
709 else if (a->table < b->table)
713 const struct crosstab *x = xtab[a->table];
716 for (i = x->nvar - 1; i >= 0; i--)
717 if (x->vars[i]->type == NUMERIC)
719 const double diffnum = a->values[i].f - b->values[i].f;
722 else if (diffnum > 0)
727 assert (x->vars[i]->type == ALPHA);
729 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
740 /* Calculate a hash value from table_entry A. */
742 hash_table_entry (const void *a_, const void *aux UNUSED)
744 const struct table_entry *a = a_;
749 for (i = 0; i < xtab[a->table]->nvar; i++)
750 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
755 /* Post-data reading calculations. */
757 static struct table_entry **find_pivot_extent (struct table_entry **,
758 int *cnt, int pivot);
759 static void enum_var_values (struct table_entry **entries, int entry_cnt,
761 union value **values, int *value_cnt);
762 static void output_pivot_table (struct table_entry **, struct table_entry **,
763 double **, double **, double **,
764 int *, int *, int *);
765 static void make_summary_table (void);
768 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
772 n_sorted_tab = hsh_count (gen_tab);
773 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
776 make_summary_table ();
778 /* Identify all the individual crosstabulation tables, and deal with
781 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
782 int pc = n_sorted_tab; /* Pivot count. */
784 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
785 int maxrows = 0, maxcols = 0, maxcells = 0;
789 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
793 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
794 &maxrows, &maxcols, &maxcells);
803 hsh_destroy (gen_tab);
808 static void insert_summary (struct tab_table *, int tab_index, double valid);
810 /* Output a table summarizing the cases processed. */
812 make_summary_table (void)
814 struct tab_table *summary;
816 struct table_entry **pb = sorted_tab, **pe;
817 int pc = n_sorted_tab;
820 summary = tab_create (7, 3 + nxtab, 1);
821 tab_title (summary, _("Summary."));
822 tab_headers (summary, 1, 0, 3, 0);
823 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
824 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
825 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
826 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
827 tab_hline (summary, TAL_1, 1, 6, 1);
828 tab_hline (summary, TAL_1, 1, 6, 2);
829 tab_vline (summary, TAL_1, 3, 1, 1);
830 tab_vline (summary, TAL_1, 5, 1, 1);
834 for (i = 0; i < 3; i++)
836 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
837 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
840 tab_offset (summary, 0, 3);
846 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
850 while (cur_tab < (*pb)->table)
851 insert_summary (summary, cur_tab++, 0.);
854 for (valid = 0.; pb < pe; pb++)
855 valid += (*pb)->u.freq;
858 const struct crosstab *const x = xtab[(*pb)->table];
859 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
860 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
861 const int count = n_cols * n_rows;
863 for (valid = 0.; pb < pe; pb++)
865 const double *data = (*pb)->u.data;
868 for (i = 0; i < count; i++)
872 insert_summary (summary, cur_tab++, valid);
877 while (cur_tab < nxtab)
878 insert_summary (summary, cur_tab++, 0.);
883 /* Inserts a line into T describing the crosstabulation at index
884 TAB_INDEX, which has VALID valid observations. */
886 insert_summary (struct tab_table *t, int tab_index, double valid)
888 struct crosstab *x = xtab[tab_index];
890 tab_hline (t, TAL_1, 0, 6, 0);
892 /* Crosstabulation name. */
894 char *buf = local_alloc (128 * x->nvar);
898 for (i = 0; i < x->nvar; i++)
901 cp = stpcpy (cp, " * ");
904 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
906 tab_text (t, 0, 0, TAB_LEFT, buf);
911 /* Counts and percentages. */
921 for (i = 0; i < 3; i++)
923 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
924 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
935 static struct tab_table *table; /* Crosstabulation table. */
936 static struct tab_table *chisq; /* Chi-square table. */
937 static struct tab_table *sym; /* Symmetric measures table. */
938 static struct tab_table *risk; /* Risk estimate table. */
939 static struct tab_table *direct; /* Directional measures table. */
942 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
944 /* Column values, number of columns. */
945 static union value *cols;
948 /* Row values, number of rows. */
949 static union value *rows;
952 /* Number of statistically interesting columns/rows (columns/rows with
954 static int ns_cols, ns_rows;
956 /* Crosstabulation. */
957 static const struct crosstab *x;
959 /* Number of variables from the crosstabulation to consider. This is
960 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
963 /* Matrix contents. */
964 static double *mat; /* Matrix proper. */
965 static double *row_tot; /* Row totals. */
966 static double *col_tot; /* Column totals. */
967 static double W; /* Grand total. */
969 static void display_dimensions (struct tab_table *, int first_difference,
970 struct table_entry *);
971 static void display_crosstabulation (void);
972 static void display_chisq (void);
973 static void display_symmetric (void);
974 static void display_risk (void);
975 static void display_directional (void);
976 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
977 static void table_value_missing (struct tab_table *table, int c, int r,
978 unsigned char opt, const union value *v,
979 const struct variable *var);
980 static void delete_missing (void);
982 /* Output pivot table beginning at PB and continuing until PE,
983 exclusive. For efficiency, *MATP is a pointer to a matrix that can
984 hold *MAXROWS entries. */
986 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
987 double **matp, double **row_totp, double **col_totp,
988 int *maxrows, int *maxcols, int *maxcells)
991 struct table_entry **tb = pb, **te; /* Table begin, table end. */
992 int tc = pe - pb; /* Table count. */
994 /* Table entry for header comparison. */
995 struct table_entry *cmp = NULL;
997 x = xtab[(*pb)->table];
998 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1000 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1002 /* Crosstabulation table initialization. */
1005 table = tab_create (nvar + n_cols,
1006 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1007 tab_headers (table, nvar - 1, 0, 2, 0);
1009 /* First header line. */
1010 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1011 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1013 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1015 /* Second header line. */
1019 for (i = 2; i < nvar; i++)
1020 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1021 TAB_RIGHT | TAT_TITLE,
1023 ? x->vars[i]->label : x->vars[i]->name));
1024 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1025 x->vars[ROW_VAR]->name);
1026 for (i = 0; i < n_cols; i++)
1027 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1029 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1032 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1033 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1037 char *title = local_alloc (x->nvar * 64 + 128);
1041 if (cmd.pivot == CRS_PIVOT)
1042 for (i = 0; i < nvar; i++)
1045 cp = stpcpy (cp, " by ");
1046 cp = stpcpy (cp, x->vars[i]->name);
1050 cp = spprintf (cp, "%s by %s for",
1051 x->vars[0]->name, x->vars[1]->name);
1052 for (i = 2; i < nvar; i++)
1054 char buf[64], *bufp;
1059 cp = stpcpy (cp, x->vars[i]->name);
1061 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1062 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1064 cp = stpcpy (cp, bufp);
1068 cp = stpcpy (cp, " [");
1069 for (i = 0; i < num_cells; i++)
1077 static const struct tuple cell_names[] =
1079 {CRS_CL_COUNT, N_("count")},
1080 {CRS_CL_ROW, N_("row %")},
1081 {CRS_CL_COLUMN, N_("column %")},
1082 {CRS_CL_TOTAL, N_("total %")},
1083 {CRS_CL_EXPECTED, N_("expected")},
1084 {CRS_CL_RESIDUAL, N_("residual")},
1085 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1086 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1090 const struct tuple *t;
1092 for (t = cell_names; t->value != cells[i]; t++)
1093 assert (t->value != -1);
1095 cp = stpcpy (cp, ", ");
1096 cp = stpcpy (cp, gettext (t->name));
1100 tab_title (table, "%s", title);
1104 tab_offset (table, 0, 2);
1109 /* Chi-square table initialization. */
1110 if (cmd.a_statistics[CRS_ST_CHISQ])
1112 chisq = tab_create (6 + (nvar - 2),
1113 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1114 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1116 tab_title (chisq, _("Chi-square tests."));
1118 tab_offset (chisq, nvar - 2, 0);
1119 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1120 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1121 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1122 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1123 _("Asymp. Sig. (2-sided)"));
1124 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1125 _("Exact. Sig. (2-sided)"));
1126 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1127 _("Exact. Sig. (1-sided)"));
1129 tab_offset (chisq, 0, 1);
1134 /* Symmetric measures. */
1135 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1136 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1137 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1138 || cmd.a_statistics[CRS_ST_KAPPA])
1140 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1141 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1142 tab_title (sym, _("Symmetric measures."));
1144 tab_offset (sym, nvar - 2, 0);
1145 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1146 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1147 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1148 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1149 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1150 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1151 tab_offset (sym, 0, 1);
1156 /* Risk estimate. */
1157 if (cmd.a_statistics[CRS_ST_RISK])
1159 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1160 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1161 tab_title (risk, _("Risk estimate."));
1163 tab_offset (risk, nvar - 2, 0);
1164 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1165 _("95%% Confidence Interval"));
1166 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1167 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1168 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1169 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1170 tab_hline (risk, TAL_1, 2, 3, 1);
1171 tab_vline (risk, TAL_1, 2, 0, 1);
1172 tab_offset (risk, 0, 2);
1177 /* Directional measures. */
1178 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1179 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1181 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1182 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1183 tab_title (direct, _("Directional measures."));
1185 tab_offset (direct, nvar - 2, 0);
1186 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1187 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1188 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1189 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1190 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1191 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1192 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1193 tab_offset (direct, 0, 1);
1200 /* Find pivot subtable if applicable. */
1201 te = find_pivot_extent (tb, &tc, 0);
1205 /* Find all the row variable values. */
1206 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1208 /* Allocate memory space for the column and row totals. */
1209 if (n_rows > *maxrows)
1211 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1212 row_tot = *row_totp;
1215 if (n_cols > *maxcols)
1217 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1218 col_tot = *col_totp;
1222 /* Allocate table space for the matrix. */
1223 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1224 tab_realloc (table, -1,
1225 max (tab_nr (table) + (n_rows + 1) * num_cells,
1226 tab_nr (table) * (pe - pb) / (te - tb)));
1228 if (mode == GENERAL)
1230 /* Allocate memory space for the matrix. */
1231 if (n_cols * n_rows > *maxcells)
1233 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1234 *maxcells = n_cols * n_rows;
1239 /* Build the matrix and calculate column totals. */
1241 union value *cur_col = cols;
1242 union value *cur_row = rows;
1244 double *cp = col_tot;
1245 struct table_entry **p;
1248 for (p = &tb[0]; p < te; p++)
1250 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1254 for (; cur_row < &rows[n_rows]; cur_row++)
1260 mp = &mat[cur_col - cols];
1263 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1270 *cp += *mp = (*p)->u.freq;
1275 /* Zero out the rest of the matrix. */
1276 for (; cur_row < &rows[n_rows]; cur_row++)
1282 if (cur_col < &cols[n_cols])
1284 const int rem_cols = n_cols - (cur_col - cols);
1287 for (c = 0; c < rem_cols; c++)
1289 mp = &mat[cur_col - cols];
1290 for (r = 0; r < n_rows; r++)
1292 for (c = 0; c < rem_cols; c++)
1294 mp += n_cols - rem_cols;
1302 double *tp = col_tot;
1304 assert (mode == INTEGER);
1305 mat = (*tb)->u.data;
1308 /* Calculate column totals. */
1309 for (c = 0; c < n_cols; c++)
1312 double *cp = &mat[c];
1314 for (r = 0; r < n_rows; r++)
1315 cum += cp[r * n_cols];
1323 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1324 ns_cols += *cp != 0.;
1327 /* Calculate row totals. */
1330 double *rp = row_tot;
1333 for (ns_rows = 0, r = n_rows; r--; )
1336 for (c = n_cols; c--; )
1344 /* Calculate grand total. */
1350 if (n_rows < n_cols)
1351 tp = row_tot, n = n_rows;
1353 tp = col_tot, n = n_cols;
1359 /* Find the first variable that differs from the last subtable,
1360 then display the values of the dimensioning variables for
1361 each table that needs it. */
1363 int first_difference = nvar - 1;
1366 for (; ; first_difference--)
1368 assert (first_difference >= 2);
1369 if (memcmp (&cmp->values[first_difference],
1370 &(*tb)->values[first_difference],
1371 sizeof *cmp->values))
1377 display_dimensions (table, first_difference, *tb);
1379 display_dimensions (chisq, first_difference, *tb);
1381 display_dimensions (sym, first_difference, *tb);
1383 display_dimensions (risk, first_difference, *tb);
1385 display_dimensions (direct, first_difference, *tb);
1389 display_crosstabulation ();
1390 if (cmd.miss == CRS_REPORT)
1395 display_symmetric ();
1399 display_directional ();
1410 tab_resize (chisq, 4 + (nvar - 2), -1);
1421 /* Delete missing rows and columns for statistical analysis when
1424 delete_missing (void)
1429 for (r = 0; r < n_rows; r++)
1430 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1434 for (c = 0; c < n_cols; c++)
1435 mat[c + r * n_cols] = 0.;
1443 for (c = 0; c < n_cols; c++)
1444 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1448 for (r = 0; r < n_rows; r++)
1449 mat[c + r * n_cols] = 0.;
1455 /* Prepare table T for submission, and submit it. */
1457 submit (struct tab_table *t)
1464 tab_resize (t, -1, 0);
1465 if (tab_nr (t) == tab_t (t))
1470 tab_offset (t, 0, 0);
1472 for (i = 2; i < nvar; i++)
1473 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1474 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1475 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1476 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1478 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1480 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1481 tab_dim (t, crosstabs_dim);
1485 /* Sets the widths of all the columns and heights of all the rows in
1486 table T for driver D. */
1488 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1492 /* Width of a numerical column. */
1493 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1494 if (cmd.miss == CRS_REPORT)
1495 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1497 /* Set width for header columns. */
1503 w = d->width - c * (t->nc - t->l);
1504 for (i = 0; i <= t->nc; i++)
1508 if (w < d->prop_em_width * 8)
1509 w = d->prop_em_width * 8;
1511 if (w > d->prop_em_width * 15)
1512 w = d->prop_em_width * 15;
1514 for (i = 0; i < t->l; i++)
1518 for (i = t->l; i < t->nc; i++)
1521 for (i = 0; i < t->nr; i++)
1522 t->h[i] = tab_natural_height (t, d, i);
1525 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1526 int *cnt, int pivot);
1527 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1528 int *cnt, int pivot);
1530 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1532 static struct table_entry **
1533 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1535 return (mode == GENERAL
1536 ? find_pivot_extent_general (tp, cnt, pivot)
1537 : find_pivot_extent_integer (tp, cnt, pivot));
1540 /* Find the extent of a region in TP that contains one table. If
1541 PIVOT != 0 that means a set of table entries with identical table
1542 number; otherwise they also have to have the same values for every
1543 dimension after the row and column dimensions. The table that is
1544 searched starts at TP and has length CNT. Returns the first entry
1545 after the last one in the table; sets *CNT to the number of
1546 remaining values. If there are no entries in TP at all, returns
1547 NULL. A yucky interface, admittedly, but it works. */
1548 static struct table_entry **
1549 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1551 struct table_entry *fp = *tp;
1556 x = xtab[(*tp)->table];
1564 if ((*tp)->table != fp->table)
1569 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1576 /* Integer mode correspondent to find_pivot_extent_general(). This
1577 could be optimized somewhat, but I just don't give a crap about
1578 CROSSTABS performance in integer mode, which is just a
1579 CROSSTABS wart as far as I'm concerned.
1581 That said, feel free to send optimization patches to me. */
1582 static struct table_entry **
1583 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1585 struct table_entry *fp = *tp;
1590 x = xtab[(*tp)->table];
1598 if ((*tp)->table != fp->table)
1603 if (memcmp (&(*tp)->values[2], &fp->values[2],
1604 sizeof (union value) * (x->nvar - 2)))
1611 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1612 result. WIDTH_ points to an int which is either 0 for a
1613 numeric value or a string width for a string value. */
1615 compare_value (const void *a_, const void *b_, const void *width_)
1617 const union value *a = a_;
1618 const union value *b = b_;
1619 const int *pwidth = width_;
1620 const int width = *pwidth;
1623 return (a->f < b->f) ? -1 : (a->f > b->f);
1625 return strncmp (a->s, b->s, width);
1628 /* Given an array of ENTRY_CNT table_entry structures starting at
1629 ENTRIES, creates a sorted list of the values that the variable
1630 with index VAR_IDX takes on. The values are returned as a
1631 malloc()'darray stored in *VALUES, with the number of values
1632 stored in *VALUE_CNT.
1635 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1636 union value **values, int *value_cnt)
1638 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1640 if (mode == GENERAL)
1642 int width = v->width;
1645 *values = xnmalloc (entry_cnt, sizeof **values);
1646 for (i = 0; i < entry_cnt; i++)
1647 (*values)[i] = entries[i]->values[var_idx];
1648 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1649 compare_value, &width);
1653 struct var_range *vr = get_var_range (v);
1656 assert (mode == INTEGER);
1657 *values = xnmalloc (vr->count, sizeof **values);
1658 for (i = 0; i < vr->count; i++)
1659 (*values)[i].f = i + vr->min;
1660 *value_cnt = vr->count;
1664 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1665 from V, displayed with print format spec from variable VAR. When
1666 in REPORT missing-value mode, missing values have an M appended. */
1668 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1669 const union value *v, const struct variable *var)
1673 const char *label = val_labs_find (var->val_labs, *v);
1676 tab_text (table, c, r, TAB_LEFT, label);
1680 s.string = tab_alloc (table, var->print.w);
1681 format_short (s.string, &var->print, v);
1682 s.length = strlen (s.string);
1683 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1684 s.string[s.length++] = 'M';
1685 while (s.length && *s.string == ' ')
1690 tab_raw (table, c, r, opt, &s);
1693 /* Draws a line across TABLE at the current row to indicate the most
1694 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1695 that changed, and puts the values that changed into the table. TB
1696 and X must be the corresponding table_entry and crosstab,
1699 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1701 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1703 for (; first_difference >= 2; first_difference--)
1704 table_value_missing (table, nvar - first_difference - 1, 0,
1705 TAB_RIGHT, &tb->values[first_difference],
1706 x->vars[first_difference]);
1709 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1710 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1711 additionally suffixed with a letter `M'. */
1713 format_cell_entry (struct tab_table *table, int c, int r, double value,
1714 char suffix, bool mark_missing)
1716 const struct fmt_spec f = {FMT_F, 10, 1};
1721 s.string = tab_alloc (table, 16);
1723 data_out (&v, &f, s.string);
1724 while (*s.string == ' ')
1730 s.string[s.length++] = suffix;
1732 s.string[s.length++] = 'M';
1734 tab_raw (table, c, r, TAB_RIGHT, &s);
1737 /* Displays the crosstabulation table. */
1739 display_crosstabulation (void)
1744 for (r = 0; r < n_rows; r++)
1745 table_value_missing (table, nvar - 2, r * num_cells,
1746 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1748 tab_text (table, nvar - 2, n_rows * num_cells,
1749 TAB_LEFT, _("Total"));
1751 /* Put in the actual cells. */
1756 tab_offset (table, nvar - 1, -1);
1757 for (r = 0; r < n_rows; r++)
1760 tab_hline (table, TAL_1, -1, n_cols, 0);
1761 for (c = 0; c < n_cols; c++)
1763 bool mark_missing = false;
1764 double expected_value = row_tot[r] * col_tot[c] / W;
1765 if (cmd.miss == CRS_REPORT
1766 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1767 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1769 mark_missing = true;
1770 for (i = 0; i < num_cells; i++)
1781 v = *mp / row_tot[r] * 100.;
1785 v = *mp / col_tot[c] * 100.;
1792 case CRS_CL_EXPECTED:
1795 case CRS_CL_RESIDUAL:
1796 v = *mp - expected_value;
1798 case CRS_CL_SRESIDUAL:
1799 v = (*mp - expected_value) / sqrt (expected_value);
1801 case CRS_CL_ASRESIDUAL:
1802 v = ((*mp - expected_value)
1803 / sqrt (expected_value
1804 * (1. - row_tot[r] / W)
1805 * (1. - col_tot[c] / W)));
1811 format_cell_entry (table, c, i, v, suffix, mark_missing);
1817 tab_offset (table, -1, tab_row (table) + num_cells);
1825 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1826 for (r = 0; r < n_rows; r++)
1829 bool mark_missing = false;
1831 if (cmd.miss == CRS_REPORT
1832 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1833 mark_missing = true;
1835 for (i = 0; i < num_cells; i++)
1849 v = row_tot[r] / W * 100.;
1853 v = row_tot[r] / W * 100.;
1856 case CRS_CL_EXPECTED:
1857 case CRS_CL_RESIDUAL:
1858 case CRS_CL_SRESIDUAL:
1859 case CRS_CL_ASRESIDUAL:
1866 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1867 tab_next_row (table);
1872 /* Column totals, grand total. */
1878 tab_hline (table, TAL_1, -1, n_cols, 0);
1879 for (c = 0; c <= n_cols; c++)
1881 double ct = c < n_cols ? col_tot[c] : W;
1882 bool mark_missing = false;
1886 if (cmd.miss == CRS_REPORT && c < n_cols
1887 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1888 mark_missing = true;
1890 for (i = 0; i < num_cells; i++)
1912 case CRS_CL_EXPECTED:
1913 case CRS_CL_RESIDUAL:
1914 case CRS_CL_SRESIDUAL:
1915 case CRS_CL_ASRESIDUAL:
1921 format_cell_entry (table, c, i, v, suffix, mark_missing);
1926 tab_offset (table, -1, tab_row (table) + last_row);
1929 tab_offset (table, 0, -1);
1932 static void calc_r (double *X, double *Y, double *, double *, double *);
1933 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1935 /* Display chi-square statistics. */
1937 display_chisq (void)
1939 static const char *chisq_stats[N_CHISQ] =
1941 N_("Pearson Chi-Square"),
1942 N_("Likelihood Ratio"),
1943 N_("Fisher's Exact Test"),
1944 N_("Continuity Correction"),
1945 N_("Linear-by-Linear Association"),
1947 double chisq_v[N_CHISQ];
1948 double fisher1, fisher2;
1954 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1956 tab_offset (chisq, nvar - 2, -1);
1958 for (i = 0; i < N_CHISQ; i++)
1960 if ((i != 2 && chisq_v[i] == SYSMIS)
1961 || (i == 2 && fisher1 == SYSMIS))
1965 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1968 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1969 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1970 tab_float (chisq, 3, 0, TAB_RIGHT,
1971 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1976 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1977 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1979 tab_next_row (chisq);
1982 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1983 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1984 tab_next_row (chisq);
1986 tab_offset (chisq, 0, -1);
1989 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1990 double[N_SYMMETRIC]);
1992 /* Display symmetric measures. */
1994 display_symmetric (void)
1996 static const char *categories[] =
1998 N_("Nominal by Nominal"),
1999 N_("Ordinal by Ordinal"),
2000 N_("Interval by Interval"),
2001 N_("Measure of Agreement"),
2004 static const char *stats[N_SYMMETRIC] =
2008 N_("Contingency Coefficient"),
2009 N_("Kendall's tau-b"),
2010 N_("Kendall's tau-c"),
2012 N_("Spearman Correlation"),
2017 static const int stats_categories[N_SYMMETRIC] =
2019 0, 0, 0, 1, 1, 1, 1, 2, 3,
2023 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2026 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2029 tab_offset (sym, nvar - 2, -1);
2031 for (i = 0; i < N_SYMMETRIC; i++)
2033 if (sym_v[i] == SYSMIS)
2036 if (stats_categories[i] != last_cat)
2038 last_cat = stats_categories[i];
2039 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2042 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2043 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2044 if (sym_ase[i] != SYSMIS)
2045 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2046 if (sym_t[i] != SYSMIS)
2047 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2048 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2052 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2053 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2056 tab_offset (sym, 0, -1);
2059 static int calc_risk (double[], double[], double[], union value *);
2061 /* Display risk estimate. */
2066 double risk_v[3], lower[3], upper[3];
2070 if (!calc_risk (risk_v, upper, lower, c))
2073 tab_offset (risk, nvar - 2, -1);
2075 for (i = 0; i < 3; i++)
2077 if (risk_v[i] == SYSMIS)
2083 if (x->vars[COL_VAR]->type == NUMERIC)
2084 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2085 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2087 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2088 x->vars[COL_VAR]->name,
2089 x->vars[COL_VAR]->width, c[0].s,
2090 x->vars[COL_VAR]->width, c[1].s);
2094 if (x->vars[ROW_VAR]->type == NUMERIC)
2095 sprintf (buf, _("For cohort %s = %g"),
2096 x->vars[ROW_VAR]->name, rows[i - 1].f);
2098 sprintf (buf, _("For cohort %s = %.*s"),
2099 x->vars[ROW_VAR]->name,
2100 x->vars[ROW_VAR]->width, rows[i - 1].s);
2104 tab_text (risk, 0, 0, TAB_LEFT, buf);
2105 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2106 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2107 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2108 tab_next_row (risk);
2111 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2112 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2113 tab_next_row (risk);
2115 tab_offset (risk, 0, -1);
2118 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2119 double[N_DIRECTIONAL]);
2121 /* Display directional measures. */
2123 display_directional (void)
2125 static const char *categories[] =
2127 N_("Nominal by Nominal"),
2128 N_("Ordinal by Ordinal"),
2129 N_("Nominal by Interval"),
2132 static const char *stats[] =
2135 N_("Goodman and Kruskal tau"),
2136 N_("Uncertainty Coefficient"),
2141 static const char *types[] =
2148 static const int stats_categories[N_DIRECTIONAL] =
2150 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2153 static const int stats_stats[N_DIRECTIONAL] =
2155 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2158 static const int stats_types[N_DIRECTIONAL] =
2160 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2163 static const int *stats_lookup[] =
2170 static const char **stats_names[] =
2182 double direct_v[N_DIRECTIONAL];
2183 double direct_ase[N_DIRECTIONAL];
2184 double direct_t[N_DIRECTIONAL];
2188 if (!calc_directional (direct_v, direct_ase, direct_t))
2191 tab_offset (direct, nvar - 2, -1);
2193 for (i = 0; i < N_DIRECTIONAL; i++)
2195 if (direct_v[i] == SYSMIS)
2201 for (j = 0; j < 3; j++)
2202 if (last[j] != stats_lookup[j][i])
2205 tab_hline (direct, TAL_1, j, 6, 0);
2210 int k = last[j] = stats_lookup[j][i];
2215 string = x->vars[0]->name;
2217 string = x->vars[1]->name;
2219 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2220 gettext (stats_names[j][k]), string);
2225 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2226 if (direct_ase[i] != SYSMIS)
2227 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2228 if (direct_t[i] != SYSMIS)
2229 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2230 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2231 tab_next_row (direct);
2234 tab_offset (direct, 0, -1);
2237 /* Statistical calculations. */
2239 /* Returns the value of the gamma (factorial) function for an integer
2242 gamma_int (double x)
2247 for (i = 2; i < x; i++)
2252 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2254 static inline double
2255 Pr (int a, int b, int c, int d)
2257 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2258 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2259 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2260 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2261 / gamma_int (a + b + c + d + 1.));
2264 /* Swap the contents of A and B. */
2266 swap (int *a, int *b)
2273 /* Calculate significance for Fisher's exact test as specified in
2274 _SPSS Statistical Algorithms_, Appendix 5. */
2276 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2280 if (min (c, d) < min (a, b))
2281 swap (&a, &c), swap (&b, &d);
2282 if (min (b, d) < min (a, c))
2283 swap (&a, &b), swap (&c, &d);
2287 swap (&a, &b), swap (&c, &d);
2289 swap (&a, &c), swap (&b, &d);
2293 for (x = 0; x <= a; x++)
2294 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2296 *fisher2 = *fisher1;
2297 for (x = 1; x <= b; x++)
2298 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2301 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2302 columns with values COLS and N_ROWS rows with values ROWS. Values
2303 in the matrix sum to W. */
2305 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2306 double *fisher1, double *fisher2)
2310 chisq[0] = chisq[1] = 0.;
2311 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2312 *fisher1 = *fisher2 = SYSMIS;
2314 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2316 if (ns_rows <= 1 || ns_cols <= 1)
2318 chisq[0] = chisq[1] = SYSMIS;
2322 for (r = 0; r < n_rows; r++)
2323 for (c = 0; c < n_cols; c++)
2325 const double expected = row_tot[r] * col_tot[c] / W;
2326 const double freq = mat[n_cols * r + c];
2327 const double residual = freq - expected;
2329 chisq[0] += residual * residual / expected;
2331 chisq[1] += freq * log (expected / freq);
2342 /* Calculate Yates and Fisher exact test. */
2343 if (ns_cols == 2 && ns_rows == 2)
2345 double f11, f12, f21, f22;
2351 for (i = j = 0; i < n_cols; i++)
2352 if (col_tot[i] != 0.)
2361 f11 = mat[nz_cols[0]];
2362 f12 = mat[nz_cols[1]];
2363 f21 = mat[nz_cols[0] + n_cols];
2364 f22 = mat[nz_cols[1] + n_cols];
2369 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2372 chisq[3] = (W * x * x
2373 / (f11 + f12) / (f21 + f22)
2374 / (f11 + f21) / (f12 + f22));
2382 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2383 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2386 /* Calculate Mantel-Haenszel. */
2387 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2389 double r, ase_0, ase_1;
2390 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2392 chisq[4] = (W - 1.) * r * r;
2397 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2398 ASE_1, and ase_0 into ASE_0. The row and column values must be
2399 passed in X and Y. */
2401 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2403 double SX, SY, S, T;
2405 double sum_XYf, sum_X2Y2f;
2406 double sum_Xr, sum_X2r;
2407 double sum_Yc, sum_Y2c;
2410 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2411 for (j = 0; j < n_cols; j++)
2413 double fij = mat[j + i * n_cols];
2414 double product = X[i] * Y[j];
2415 double temp = fij * product;
2417 sum_X2Y2f += temp * product;
2420 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2422 sum_Xr += X[i] * row_tot[i];
2423 sum_X2r += X[i] * X[i] * row_tot[i];
2427 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2429 sum_Yc += Y[i] * col_tot[i];
2430 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2434 S = sum_XYf - sum_Xr * sum_Yc / W;
2435 SX = sum_X2r - sum_Xr * sum_Xr / W;
2436 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2439 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2444 for (s = c = 0., i = 0; i < n_rows; i++)
2445 for (j = 0; j < n_cols; j++)
2447 double Xresid, Yresid;
2450 Xresid = X[i] - Xbar;
2451 Yresid = Y[j] - Ybar;
2452 temp = (T * Xresid * Yresid
2454 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2455 y = mat[j + i * n_cols] * temp * temp - c;
2460 *ase_1 = sqrt (s) / (T * T);
2464 static double somers_d_v[3];
2465 static double somers_d_ase[3];
2466 static double somers_d_t[3];
2468 /* Calculate symmetric statistics and their asymptotic standard
2469 errors. Returns 0 if none could be calculated. */
2471 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2472 double t[N_SYMMETRIC])
2474 int q = min (ns_rows, ns_cols);
2483 for (i = 0; i < N_SYMMETRIC; i++)
2484 v[i] = ase[i] = t[i] = SYSMIS;
2487 /* Phi, Cramer's V, contingency coefficient. */
2488 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2490 double Xp = 0.; /* Pearson chi-square. */
2495 for (r = 0; r < n_rows; r++)
2496 for (c = 0; c < n_cols; c++)
2498 const double expected = row_tot[r] * col_tot[c] / W;
2499 const double freq = mat[n_cols * r + c];
2500 const double residual = freq - expected;
2502 Xp += residual * residual / expected;
2506 if (cmd.a_statistics[CRS_ST_PHI])
2508 v[0] = sqrt (Xp / W);
2509 v[1] = sqrt (Xp / (W * (q - 1)));
2511 if (cmd.a_statistics[CRS_ST_CC])
2512 v[2] = sqrt (Xp / (Xp + W));
2515 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2516 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2521 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2528 for (r = 0; r < n_rows; r++)
2529 Dr -= row_tot[r] * row_tot[r];
2530 for (c = 0; c < n_cols; c++)
2531 Dc -= col_tot[c] * col_tot[c];
2537 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2538 for (c = 0; c < n_cols; c++)
2542 for (r = 0; r < n_rows; r++)
2543 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2553 for (i = 0; i < n_rows; i++)
2557 for (j = 1; j < n_cols; j++)
2558 Cij += col_tot[j] - cum[j + i * n_cols];
2561 for (j = 1; j < n_cols; j++)
2562 Dij += cum[j + (i - 1) * n_cols];
2566 double fij = mat[j + i * n_cols];
2572 assert (j < n_cols);
2574 Cij -= col_tot[j] - cum[j + i * n_cols];
2575 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2579 Cij += cum[j - 1 + (i - 1) * n_cols];
2580 Dij -= cum[j + (i - 1) * n_cols];
2586 if (cmd.a_statistics[CRS_ST_BTAU])
2587 v[3] = (P - Q) / sqrt (Dr * Dc);
2588 if (cmd.a_statistics[CRS_ST_CTAU])
2589 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2590 if (cmd.a_statistics[CRS_ST_GAMMA])
2591 v[5] = (P - Q) / (P + Q);
2593 /* ASE for tau-b, tau-c, gamma. Calculations could be
2594 eliminated here, at expense of memory. */
2599 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2600 for (i = 0; i < n_rows; i++)
2604 for (j = 1; j < n_cols; j++)
2605 Cij += col_tot[j] - cum[j + i * n_cols];
2608 for (j = 1; j < n_cols; j++)
2609 Dij += cum[j + (i - 1) * n_cols];
2613 double fij = mat[j + i * n_cols];
2615 if (cmd.a_statistics[CRS_ST_BTAU])
2617 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2618 + v[3] * (row_tot[i] * Dc
2619 + col_tot[j] * Dr));
2620 btau_cum += fij * temp * temp;
2624 const double temp = Cij - Dij;
2625 ctau_cum += fij * temp * temp;
2628 if (cmd.a_statistics[CRS_ST_GAMMA])
2630 const double temp = Q * Cij - P * Dij;
2631 gamma_cum += fij * temp * temp;
2634 if (cmd.a_statistics[CRS_ST_D])
2636 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2637 - (P - Q) * (W - row_tot[i]));
2638 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2639 - (Q - P) * (W - col_tot[j]));
2644 assert (j < n_cols);
2646 Cij -= col_tot[j] - cum[j + i * n_cols];
2647 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2651 Cij += cum[j - 1 + (i - 1) * n_cols];
2652 Dij -= cum[j + (i - 1) * n_cols];
2658 btau_var = ((btau_cum
2659 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2661 if (cmd.a_statistics[CRS_ST_BTAU])
2663 ase[3] = sqrt (btau_var);
2664 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2667 if (cmd.a_statistics[CRS_ST_CTAU])
2669 ase[4] = ((2 * q / ((q - 1) * W * W))
2670 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2671 t[4] = v[4] / ase[4];
2673 if (cmd.a_statistics[CRS_ST_GAMMA])
2675 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2676 t[5] = v[5] / (2. / (P + Q)
2677 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2679 if (cmd.a_statistics[CRS_ST_D])
2681 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2682 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2683 somers_d_t[0] = (somers_d_v[0]
2685 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2686 somers_d_v[1] = (P - Q) / Dc;
2687 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2688 somers_d_t[1] = (somers_d_v[1]
2690 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2691 somers_d_v[2] = (P - Q) / Dr;
2692 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2693 somers_d_t[2] = (somers_d_v[2]
2695 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2701 /* Spearman correlation, Pearson's r. */
2702 if (cmd.a_statistics[CRS_ST_CORR])
2704 double *R = local_alloc (sizeof *R * n_rows);
2705 double *C = local_alloc (sizeof *C * n_cols);
2708 double y, t, c = 0., s = 0.;
2713 R[i] = s + (row_tot[i] + 1.) / 2.;
2720 assert (i < n_rows);
2725 double y, t, c = 0., s = 0.;
2730 C[j] = s + (col_tot[j] + 1.) / 2;
2737 assert (j < n_cols);
2741 calc_r (R, C, &v[6], &t[6], &ase[6]);
2747 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2751 /* Cohen's kappa. */
2752 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2754 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2757 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2758 i < ns_rows; i++, j++)
2762 while (col_tot[j] == 0.)
2765 prod = row_tot[i] * col_tot[j];
2766 sum = row_tot[i] + col_tot[j];
2768 sum_fii += mat[j + i * n_cols];
2770 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2771 sum_riciri_ci += prod * sum;
2773 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2774 for (j = 0; j < ns_cols; j++)
2776 double sum = row_tot[i] + col_tot[j];
2777 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2780 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2782 ase[8] = sqrt ((W * W * sum_rici
2783 + sum_rici * sum_rici
2784 - W * sum_riciri_ci)
2785 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2787 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2788 / pow2 (W * W - sum_rici))
2789 + ((2. * (W - sum_fii)
2790 * (2. * sum_fii * sum_rici
2791 - W * sum_fiiri_ci))
2792 / cube (W * W - sum_rici))
2793 + (pow2 (W - sum_fii)
2794 * (W * sum_fijri_ci2 - 4.
2795 * sum_rici * sum_rici)
2796 / pow4 (W * W - sum_rici))));
2798 t[8] = v[8] / ase[8];
2805 /* Calculate risk estimate. */
2807 calc_risk (double *value, double *upper, double *lower, union value *c)
2809 double f11, f12, f21, f22;
2815 for (i = 0; i < 3; i++)
2816 value[i] = upper[i] = lower[i] = SYSMIS;
2819 if (ns_rows != 2 || ns_cols != 2)
2826 for (i = j = 0; i < n_cols; i++)
2827 if (col_tot[i] != 0.)
2836 f11 = mat[nz_cols[0]];
2837 f12 = mat[nz_cols[1]];
2838 f21 = mat[nz_cols[0] + n_cols];
2839 f22 = mat[nz_cols[1] + n_cols];
2841 c[0] = cols[nz_cols[0]];
2842 c[1] = cols[nz_cols[1]];
2845 value[0] = (f11 * f22) / (f12 * f21);
2846 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2847 lower[0] = value[0] * exp (-1.960 * v);
2848 upper[0] = value[0] * exp (1.960 * v);
2850 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2851 v = sqrt ((f12 / (f11 * (f11 + f12)))
2852 + (f22 / (f21 * (f21 + f22))));
2853 lower[1] = value[1] * exp (-1.960 * v);
2854 upper[1] = value[1] * exp (1.960 * v);
2856 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2857 v = sqrt ((f11 / (f12 * (f11 + f12)))
2858 + (f21 / (f22 * (f21 + f22))));
2859 lower[2] = value[2] * exp (-1.960 * v);
2860 upper[2] = value[2] * exp (1.960 * v);
2865 /* Calculate directional measures. */
2867 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2868 double t[N_DIRECTIONAL])
2873 for (i = 0; i < N_DIRECTIONAL; i++)
2874 v[i] = ase[i] = t[i] = SYSMIS;
2878 if (cmd.a_statistics[CRS_ST_LAMBDA])
2880 double *fim = xnmalloc (n_rows, sizeof *fim);
2881 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2882 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2883 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2884 double sum_fim, sum_fmj;
2886 int rm_index, cm_index;
2889 /* Find maximum for each row and their sum. */
2890 for (sum_fim = 0., i = 0; i < n_rows; i++)
2892 double max = mat[i * n_cols];
2895 for (j = 1; j < n_cols; j++)
2896 if (mat[j + i * n_cols] > max)
2898 max = mat[j + i * n_cols];
2902 sum_fim += fim[i] = max;
2903 fim_index[i] = index;
2906 /* Find maximum for each column. */
2907 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2909 double max = mat[j];
2912 for (i = 1; i < n_rows; i++)
2913 if (mat[j + i * n_cols] > max)
2915 max = mat[j + i * n_cols];
2919 sum_fmj += fmj[j] = max;
2920 fmj_index[j] = index;
2923 /* Find maximum row total. */
2926 for (i = 1; i < n_rows; i++)
2927 if (row_tot[i] > rm)
2933 /* Find maximum column total. */
2936 for (j = 1; j < n_cols; j++)
2937 if (col_tot[j] > cm)
2943 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2944 v[1] = (sum_fmj - rm) / (W - rm);
2945 v[2] = (sum_fim - cm) / (W - cm);
2947 /* ASE1 for Y given X. */
2951 for (accum = 0., i = 0; i < n_rows; i++)
2952 for (j = 0; j < n_cols; j++)
2954 const int deltaj = j == cm_index;
2955 accum += (mat[j + i * n_cols]
2956 * pow2 ((j == fim_index[i])
2961 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2964 /* ASE0 for Y given X. */
2968 for (accum = 0., i = 0; i < n_rows; i++)
2969 if (cm_index != fim_index[i])
2970 accum += (mat[i * n_cols + fim_index[i]]
2971 + mat[i * n_cols + cm_index]);
2972 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2975 /* ASE1 for X given Y. */
2979 for (accum = 0., i = 0; i < n_rows; i++)
2980 for (j = 0; j < n_cols; j++)
2982 const int deltaj = i == rm_index;
2983 accum += (mat[j + i * n_cols]
2984 * pow2 ((i == fmj_index[j])
2989 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2992 /* ASE0 for X given Y. */
2996 for (accum = 0., j = 0; j < n_cols; j++)
2997 if (rm_index != fmj_index[j])
2998 accum += (mat[j + n_cols * fmj_index[j]]
2999 + mat[j + n_cols * rm_index]);
3000 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3003 /* Symmetric ASE0 and ASE1. */
3008 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3009 for (j = 0; j < n_cols; j++)
3011 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3012 int temp1 = (i == rm_index) + (j == cm_index);
3013 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3014 accum1 += (mat[j + i * n_cols]
3015 * pow2 (temp0 + (v[0] - 1.) * temp1));
3017 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3018 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3019 / (2. * W - rm - cm));
3028 double sum_fij2_ri, sum_fij2_ci;
3029 double sum_ri2, sum_cj2;
3031 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3032 for (j = 0; j < n_cols; j++)
3034 double temp = pow2 (mat[j + i * n_cols]);
3035 sum_fij2_ri += temp / row_tot[i];
3036 sum_fij2_ci += temp / col_tot[j];
3039 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3040 sum_ri2 += row_tot[i] * row_tot[i];
3042 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3043 sum_cj2 += col_tot[j] * col_tot[j];
3045 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3046 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3050 if (cmd.a_statistics[CRS_ST_UC])
3052 double UX, UY, UXY, P;
3053 double ase1_yx, ase1_xy, ase1_sym;
3056 for (UX = 0., i = 0; i < n_rows; i++)
3057 if (row_tot[i] > 0.)
3058 UX -= row_tot[i] / W * log (row_tot[i] / W);
3060 for (UY = 0., j = 0; j < n_cols; j++)
3061 if (col_tot[j] > 0.)
3062 UY -= col_tot[j] / W * log (col_tot[j] / W);
3064 for (UXY = P = 0., i = 0; i < n_rows; i++)
3065 for (j = 0; j < n_cols; j++)
3067 double entry = mat[j + i * n_cols];
3072 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3073 UXY -= entry / W * log (entry / W);
3076 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3077 for (j = 0; j < n_cols; j++)
3079 double entry = mat[j + i * n_cols];
3084 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3085 + (UX - UXY) * log (col_tot[j] / W));
3086 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3087 + (UY - UXY) * log (row_tot[i] / W));
3088 ase1_sym += entry * pow2 ((UXY
3089 * log (row_tot[i] * col_tot[j] / (W * W)))
3090 - (UX + UY) * log (entry / W));
3093 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3094 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3095 t[5] = v[5] / ((2. / (W * (UX + UY)))
3096 * sqrt (P - pow2 (UX + UY - UXY) / W));
3098 v[6] = (UX + UY - UXY) / UX;
3099 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3100 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3102 v[7] = (UX + UY - UXY) / UY;
3103 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3104 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3108 if (cmd.a_statistics[CRS_ST_D])
3113 calc_symmetric (NULL, NULL, NULL);
3114 for (i = 0; i < 3; i++)
3116 v[8 + i] = somers_d_v[i];
3117 ase[8 + i] = somers_d_ase[i];
3118 t[8 + i] = somers_d_t[i];
3123 if (cmd.a_statistics[CRS_ST_ETA])
3126 double sum_Xr, sum_X2r;
3130 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3132 sum_Xr += rows[i].f * row_tot[i];
3133 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3135 SX = sum_X2r - sum_Xr * sum_Xr / W;
3137 for (SXW = 0., j = 0; j < n_cols; j++)
3141 for (cum = 0., i = 0; i < n_rows; i++)
3143 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3144 cum += rows[i].f * mat[j + i * n_cols];
3147 SXW -= cum * cum / col_tot[j];
3149 v[11] = sqrt (1. - SXW / SX);
3153 double sum_Yc, sum_Y2c;
3157 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3159 sum_Yc += cols[i].f * col_tot[i];
3160 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3162 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3164 for (SYW = 0., i = 0; i < n_rows; i++)
3168 for (cum = 0., j = 0; j < n_cols; j++)
3170 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3171 cum += cols[j].f * mat[j + i * n_cols];
3174 SYW -= cum * cum / row_tot[i];
3176 v[12] = sqrt (1. - SYW / SY);
3183 /* A wrapper around data_out() that limits string output to short
3184 string width and null terminates the result. */
3186 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3188 struct fmt_spec fmt_subst;
3190 /* Limit to short string width. */
3191 if (fmt_is_string (fp->type))
3195 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3196 if (fmt_subst.type == FMT_A)
3197 fmt_subst.w = min (8, fmt_subst.w);
3199 fmt_subst.w = min (16, fmt_subst.w);
3205 data_out (v, fp, s);
3207 /* Null terminate. */