1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 - Pearson's R (but not Spearman!) is off a little.
22 - T values for Spearman's R and Pearson's R are wrong.
23 - How to calculate significance of symmetric and directional measures?
24 - Asymmetric ASEs and T values for lambda are wrong.
25 - ASE of Goodman and Kruskal's tau is not calculated.
26 - ASE of symmetric somers' d is wrong.
27 - Approx. T of uncertainty coefficient is wrong.
34 #include <gsl/gsl_cdf.h>
38 #include <data/case.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (const struct ccase *, void *, const struct dataset *);
181 static bool calc_general (const struct ccase *, void *, const struct dataset *);
182 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
183 static bool postcalc (void *, const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
213 pl_tc = pool_create ();
214 pl_col = pool_create ();
216 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
219 mode = variables ? INTEGER : GENERAL;
224 cmd.a_cells[CRS_CL_COUNT] = 1;
230 for (i = 0; i < CRS_CL_count; i++)
235 cmd.a_cells[CRS_CL_COUNT] = 1;
236 cmd.a_cells[CRS_CL_ROW] = 1;
237 cmd.a_cells[CRS_CL_COLUMN] = 1;
238 cmd.a_cells[CRS_CL_TOTAL] = 1;
240 if (cmd.a_cells[CRS_CL_ALL])
242 for (i = 0; i < CRS_CL_count; i++)
244 cmd.a_cells[CRS_CL_ALL] = 0;
246 cmd.a_cells[CRS_CL_NONE] = 0;
248 for (num_cells = i = 0; i < CRS_CL_count; i++)
250 cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
297 ok = procedure_with_splits (ds, precalc,
298 mode == GENERAL ? calc_general : calc_integer,
301 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
304 /* Parses the TABLES subcommand. */
306 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
308 struct var_set *var_set;
310 struct variable ***by = NULL;
311 size_t *by_nvar = NULL;
315 /* Ensure that this is a TABLES subcommand. */
316 if (!lex_match_id (lexer, "TABLES")
317 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
318 && lex_token (lexer) != T_ALL)
320 lex_match (lexer, '=');
322 if (variables != NULL)
323 var_set = var_set_create_from_array (variables, variables_cnt);
325 var_set = var_set_create_from_dict (dataset_dict (ds));
326 assert (var_set != NULL);
330 by = xnrealloc (by, n_by + 1, sizeof *by);
331 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
332 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
333 PV_NO_DUPLICATE | PV_NO_SCRATCH))
335 if (xalloc_oversized (nx, by_nvar[n_by]))
337 msg (SE, _("Too many crosstabulation variables or dimensions."));
343 if (!lex_match (lexer, T_BY))
347 lex_error (lexer, _("expecting BY"));
356 int *by_iter = xcalloc (n_by, sizeof *by_iter);
359 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
360 for (i = 0; i < nx; i++)
364 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
371 for (i = 0; i < n_by; i++)
372 x->vars[i] = by[i][by_iter[i]];
378 for (i = n_by - 1; i >= 0; i--)
380 if (++by_iter[i] < by_nvar[i])
393 /* All return paths lead here. */
397 for (i = 0; i < n_by; i++)
403 var_set_destroy (var_set);
408 /* Parses the VARIABLES subcommand. */
410 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
414 msg (SE, _("VARIABLES must be specified before TABLES."));
418 lex_match (lexer, '=');
422 size_t orig_nv = variables_cnt;
427 if (!parse_variables (lexer, dataset_dict (ds),
428 &variables, &variables_cnt,
429 (PV_APPEND | PV_NUMERIC
430 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 if (lex_token (lexer) != '(')
435 lex_error (lexer, "expecting `('");
440 if (!lex_force_int (lexer))
442 min = lex_integer (lexer);
445 lex_match (lexer, ',');
447 if (!lex_force_int (lexer))
449 max = lex_integer (lexer);
452 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 if (lex_token (lexer) != ')')
460 lex_error (lexer, "expecting `)'");
465 for (i = orig_nv; i < variables_cnt; i++)
467 struct var_range *vr = xmalloc (sizeof *vr);
470 vr->count = max - min + 1;
471 var_attach_aux (variables[i], vr, var_dtor_free);
474 if (lex_token (lexer) == '/')
486 /* Data file processing. */
488 static int compare_table_entry (const void *, const void *, const void *);
489 static unsigned hash_table_entry (const void *, const void *);
491 /* Set up the crosstabulation tables for processing. */
493 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
495 output_split_file_values (ds, first);
498 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
508 for (i = 0; i < nxtab; i++)
510 struct crosstab *x = xtab[i];
515 x->ofs = n_sorted_tab;
517 for (j = 2; j < x->nvar; j++)
518 count *= get_var_range (x->vars[j - 2])->count;
520 sorted_tab = xnrealloc (sorted_tab,
521 n_sorted_tab + count, sizeof *sorted_tab);
522 v = local_alloc (sizeof *v * x->nvar);
523 for (j = 2; j < x->nvar; j++)
524 v[j] = get_var_range (x->vars[j])->min;
525 for (j = 0; j < count; j++)
527 struct table_entry *te;
530 te = sorted_tab[n_sorted_tab++]
531 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
535 int row_cnt = get_var_range (x->vars[0])->count;
536 int col_cnt = get_var_range (x->vars[1])->count;
537 const int mat_size = row_cnt * col_cnt;
540 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
541 for (m = 0; m < mat_size; m++)
545 for (k = 2; k < x->nvar; k++)
546 te->values[k].f = v[k];
547 for (k = 2; k < x->nvar; k++)
549 struct var_range *vr = get_var_range (x->vars[k]);
550 if (++v[k] >= vr->max)
559 sorted_tab = xnrealloc (sorted_tab,
560 n_sorted_tab + 1, sizeof *sorted_tab);
561 sorted_tab[n_sorted_tab] = NULL;
566 /* Form crosstabulations for general mode. */
568 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
570 bool bad_warn = true;
573 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
575 /* Flattened current table index. */
578 for (t = 0; t < nxtab; t++)
580 struct crosstab *x = xtab[t];
581 const size_t entry_size = (sizeof (struct table_entry)
582 + sizeof (union value) * (x->nvar - 1));
583 struct table_entry *te = local_alloc (entry_size);
585 /* Construct table entry for the current record and table. */
591 for (j = 0; j < x->nvar; j++)
593 const union value *v = case_data (c, x->vars[j]);
594 if ((cmd.miss == CRS_TABLE && var_is_value_missing (x->vars[j], v))
595 || (cmd.miss == CRS_INCLUDE
596 && var_is_value_system_missing (x->vars[j], v)))
598 x->missing += weight;
602 if (var_is_numeric (x->vars[j]))
603 te->values[j].f = case_num (c, x->vars[j]);
606 memcpy (te->values[j].s, case_str (c, x->vars[j]),
607 var_get_width (x->vars[j]));
609 /* Necessary in order to simplify comparisons. */
610 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
611 sizeof (union value) - var_get_width (x->vars[j]));
616 /* Add record to hash table. */
618 struct table_entry **tepp
619 = (struct table_entry **) hsh_probe (gen_tab, te);
622 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
625 memcpy (tep, te, entry_size);
630 (*tepp)->u.freq += weight;
641 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
643 bool bad_warn = true;
646 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
648 /* Flattened current table index. */
651 for (t = 0; t < nxtab; t++)
653 struct crosstab *x = xtab[t];
658 for (i = 0; i < x->nvar; i++)
660 struct variable *const v = x->vars[i];
661 struct var_range *vr = get_var_range (v);
662 double value = case_num (c, v);
664 /* Note that the first test also rules out SYSMIS. */
665 if ((value < vr->min || value >= vr->max)
666 || (cmd.miss == CRS_TABLE && var_is_num_user_missing (v, value)))
668 x->missing += weight;
674 ofs += fact * ((int) value - vr->min);
680 struct variable *row_var = x->vars[ROW_VAR];
681 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
683 struct variable *col_var = x->vars[COL_VAR];
684 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
686 const int col_dim = get_var_range (col_var)->count;
688 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
697 /* Compare the table_entry's at A and B and return a strcmp()-type
700 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
702 const struct table_entry *a = a_;
703 const struct table_entry *b = b_;
705 if (a->table > b->table)
707 else if (a->table < b->table)
711 const struct crosstab *x = xtab[a->table];
714 for (i = x->nvar - 1; i >= 0; i--)
715 if (var_is_numeric (x->vars[i]))
717 const double diffnum = a->values[i].f - b->values[i].f;
720 else if (diffnum > 0)
725 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
726 var_get_width (x->vars[i]));
735 /* Calculate a hash value from table_entry A. */
737 hash_table_entry (const void *a_, const void *aux UNUSED)
739 const struct table_entry *a = a_;
744 for (i = 0; i < xtab[a->table]->nvar; i++)
745 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
750 /* Post-data reading calculations. */
752 static struct table_entry **find_pivot_extent (struct table_entry **,
753 int *cnt, int pivot);
754 static void enum_var_values (struct table_entry **entries, int entry_cnt,
756 union value **values, int *value_cnt);
757 static void output_pivot_table (struct table_entry **, struct table_entry **,
758 double **, double **, double **,
759 int *, int *, int *);
760 static void make_summary_table (void);
763 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
767 n_sorted_tab = hsh_count (gen_tab);
768 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
771 make_summary_table ();
773 /* Identify all the individual crosstabulation tables, and deal with
776 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
777 int pc = n_sorted_tab; /* Pivot count. */
779 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
780 int maxrows = 0, maxcols = 0, maxcells = 0;
784 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
788 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
789 &maxrows, &maxcols, &maxcells);
798 hsh_destroy (gen_tab);
803 static void insert_summary (struct tab_table *, int tab_index, double valid);
805 /* Output a table summarizing the cases processed. */
807 make_summary_table (void)
809 struct tab_table *summary;
811 struct table_entry **pb = sorted_tab, **pe;
812 int pc = n_sorted_tab;
815 summary = tab_create (7, 3 + nxtab, 1);
816 tab_title (summary, _("Summary."));
817 tab_headers (summary, 1, 0, 3, 0);
818 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
819 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
820 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
821 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
822 tab_hline (summary, TAL_1, 1, 6, 1);
823 tab_hline (summary, TAL_1, 1, 6, 2);
824 tab_vline (summary, TAL_1, 3, 1, 1);
825 tab_vline (summary, TAL_1, 5, 1, 1);
829 for (i = 0; i < 3; i++)
831 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
832 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
835 tab_offset (summary, 0, 3);
841 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
845 while (cur_tab < (*pb)->table)
846 insert_summary (summary, cur_tab++, 0.);
849 for (valid = 0.; pb < pe; pb++)
850 valid += (*pb)->u.freq;
853 const struct crosstab *const x = xtab[(*pb)->table];
854 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
855 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
856 const int count = n_cols * n_rows;
858 for (valid = 0.; pb < pe; pb++)
860 const double *data = (*pb)->u.data;
863 for (i = 0; i < count; i++)
867 insert_summary (summary, cur_tab++, valid);
872 while (cur_tab < nxtab)
873 insert_summary (summary, cur_tab++, 0.);
878 /* Inserts a line into T describing the crosstabulation at index
879 TAB_INDEX, which has VALID valid observations. */
881 insert_summary (struct tab_table *t, int tab_index, double valid)
883 struct crosstab *x = xtab[tab_index];
885 tab_hline (t, TAL_1, 0, 6, 0);
887 /* Crosstabulation name. */
889 char *buf = local_alloc (128 * x->nvar);
893 for (i = 0; i < x->nvar; i++)
896 cp = stpcpy (cp, " * ");
898 cp = stpcpy (cp, var_to_string (x->vars[i]));
900 tab_text (t, 0, 0, TAB_LEFT, buf);
905 /* Counts and percentages. */
915 for (i = 0; i < 3; i++)
917 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
918 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
929 static struct tab_table *table; /* Crosstabulation table. */
930 static struct tab_table *chisq; /* Chi-square table. */
931 static struct tab_table *sym; /* Symmetric measures table. */
932 static struct tab_table *risk; /* Risk estimate table. */
933 static struct tab_table *direct; /* Directional measures table. */
936 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
938 /* Column values, number of columns. */
939 static union value *cols;
942 /* Row values, number of rows. */
943 static union value *rows;
946 /* Number of statistically interesting columns/rows (columns/rows with
948 static int ns_cols, ns_rows;
950 /* Crosstabulation. */
951 static const struct crosstab *x;
953 /* Number of variables from the crosstabulation to consider. This is
954 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
957 /* Matrix contents. */
958 static double *mat; /* Matrix proper. */
959 static double *row_tot; /* Row totals. */
960 static double *col_tot; /* Column totals. */
961 static double W; /* Grand total. */
963 static void display_dimensions (struct tab_table *, int first_difference,
964 struct table_entry *);
965 static void display_crosstabulation (void);
966 static void display_chisq (void);
967 static void display_symmetric (void);
968 static void display_risk (void);
969 static void display_directional (void);
970 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
971 static void table_value_missing (struct tab_table *table, int c, int r,
972 unsigned char opt, const union value *v,
973 const struct variable *var);
974 static void delete_missing (void);
976 /* Output pivot table beginning at PB and continuing until PE,
977 exclusive. For efficiency, *MATP is a pointer to a matrix that can
978 hold *MAXROWS entries. */
980 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
981 double **matp, double **row_totp, double **col_totp,
982 int *maxrows, int *maxcols, int *maxcells)
985 struct table_entry **tb = pb, **te; /* Table begin, table end. */
986 int tc = pe - pb; /* Table count. */
988 /* Table entry for header comparison. */
989 struct table_entry *cmp = NULL;
991 x = xtab[(*pb)->table];
992 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
994 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
996 /* Crosstabulation table initialization. */
999 table = tab_create (nvar + n_cols,
1000 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1001 tab_headers (table, nvar - 1, 0, 2, 0);
1003 /* First header line. */
1004 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1005 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1007 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1009 /* Second header line. */
1013 for (i = 2; i < nvar; i++)
1014 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1015 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1016 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1017 var_get_name (x->vars[ROW_VAR]));
1018 for (i = 0; i < n_cols; i++)
1019 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1021 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1024 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1025 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1029 char *title = local_alloc (x->nvar * 64 + 128);
1033 if (cmd.pivot == CRS_PIVOT)
1034 for (i = 0; i < nvar; i++)
1037 cp = stpcpy (cp, " by ");
1038 cp = stpcpy (cp, var_get_name (x->vars[i]));
1042 cp = spprintf (cp, "%s by %s for",
1043 var_get_name (x->vars[0]),
1044 var_get_name (x->vars[1]));
1045 for (i = 2; i < nvar; i++)
1047 char buf[64], *bufp;
1052 cp = stpcpy (cp, var_get_name (x->vars[i]));
1054 format_short (buf, var_get_print_format (x->vars[i]),
1056 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1058 cp = stpcpy (cp, bufp);
1062 cp = stpcpy (cp, " [");
1063 for (i = 0; i < num_cells; i++)
1071 static const struct tuple cell_names[] =
1073 {CRS_CL_COUNT, N_("count")},
1074 {CRS_CL_ROW, N_("row %")},
1075 {CRS_CL_COLUMN, N_("column %")},
1076 {CRS_CL_TOTAL, N_("total %")},
1077 {CRS_CL_EXPECTED, N_("expected")},
1078 {CRS_CL_RESIDUAL, N_("residual")},
1079 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1080 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1084 const struct tuple *t;
1086 for (t = cell_names; t->value != cells[i]; t++)
1087 assert (t->value != -1);
1089 cp = stpcpy (cp, ", ");
1090 cp = stpcpy (cp, gettext (t->name));
1094 tab_title (table, "%s", title);
1098 tab_offset (table, 0, 2);
1103 /* Chi-square table initialization. */
1104 if (cmd.a_statistics[CRS_ST_CHISQ])
1106 chisq = tab_create (6 + (nvar - 2),
1107 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1108 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1110 tab_title (chisq, _("Chi-square tests."));
1112 tab_offset (chisq, nvar - 2, 0);
1113 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1114 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1115 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1116 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1117 _("Asymp. Sig. (2-sided)"));
1118 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1119 _("Exact. Sig. (2-sided)"));
1120 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Exact. Sig. (1-sided)"));
1123 tab_offset (chisq, 0, 1);
1128 /* Symmetric measures. */
1129 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1130 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1131 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1132 || cmd.a_statistics[CRS_ST_KAPPA])
1134 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1135 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1136 tab_title (sym, _("Symmetric measures."));
1138 tab_offset (sym, nvar - 2, 0);
1139 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1140 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1141 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1142 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1143 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1144 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1145 tab_offset (sym, 0, 1);
1150 /* Risk estimate. */
1151 if (cmd.a_statistics[CRS_ST_RISK])
1153 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1154 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1155 tab_title (risk, _("Risk estimate."));
1157 tab_offset (risk, nvar - 2, 0);
1158 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1159 _("95%% Confidence Interval"));
1160 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1161 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1162 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1163 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1164 tab_hline (risk, TAL_1, 2, 3, 1);
1165 tab_vline (risk, TAL_1, 2, 0, 1);
1166 tab_offset (risk, 0, 2);
1171 /* Directional measures. */
1172 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1173 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1175 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1176 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1177 tab_title (direct, _("Directional measures."));
1179 tab_offset (direct, nvar - 2, 0);
1180 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1181 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1182 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1183 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1184 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1185 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1186 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1187 tab_offset (direct, 0, 1);
1194 /* Find pivot subtable if applicable. */
1195 te = find_pivot_extent (tb, &tc, 0);
1199 /* Find all the row variable values. */
1200 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1202 /* Allocate memory space for the column and row totals. */
1203 if (n_rows > *maxrows)
1205 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1206 row_tot = *row_totp;
1209 if (n_cols > *maxcols)
1211 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1212 col_tot = *col_totp;
1216 /* Allocate table space for the matrix. */
1217 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1218 tab_realloc (table, -1,
1219 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1220 tab_nr (table) * (pe - pb) / (te - tb)));
1222 if (mode == GENERAL)
1224 /* Allocate memory space for the matrix. */
1225 if (n_cols * n_rows > *maxcells)
1227 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1228 *maxcells = n_cols * n_rows;
1233 /* Build the matrix and calculate column totals. */
1235 union value *cur_col = cols;
1236 union value *cur_row = rows;
1238 double *cp = col_tot;
1239 struct table_entry **p;
1242 for (p = &tb[0]; p < te; p++)
1244 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1248 for (; cur_row < &rows[n_rows]; cur_row++)
1254 mp = &mat[cur_col - cols];
1257 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1264 *cp += *mp = (*p)->u.freq;
1269 /* Zero out the rest of the matrix. */
1270 for (; cur_row < &rows[n_rows]; cur_row++)
1276 if (cur_col < &cols[n_cols])
1278 const int rem_cols = n_cols - (cur_col - cols);
1281 for (c = 0; c < rem_cols; c++)
1283 mp = &mat[cur_col - cols];
1284 for (r = 0; r < n_rows; r++)
1286 for (c = 0; c < rem_cols; c++)
1288 mp += n_cols - rem_cols;
1296 double *tp = col_tot;
1298 assert (mode == INTEGER);
1299 mat = (*tb)->u.data;
1302 /* Calculate column totals. */
1303 for (c = 0; c < n_cols; c++)
1306 double *cp = &mat[c];
1308 for (r = 0; r < n_rows; r++)
1309 cum += cp[r * n_cols];
1317 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1318 ns_cols += *cp != 0.;
1321 /* Calculate row totals. */
1324 double *rp = row_tot;
1327 for (ns_rows = 0, r = n_rows; r--; )
1330 for (c = n_cols; c--; )
1338 /* Calculate grand total. */
1344 if (n_rows < n_cols)
1345 tp = row_tot, n = n_rows;
1347 tp = col_tot, n = n_cols;
1353 /* Find the first variable that differs from the last subtable,
1354 then display the values of the dimensioning variables for
1355 each table that needs it. */
1357 int first_difference = nvar - 1;
1360 for (; ; first_difference--)
1362 assert (first_difference >= 2);
1363 if (memcmp (&cmp->values[first_difference],
1364 &(*tb)->values[first_difference],
1365 sizeof *cmp->values))
1371 display_dimensions (table, first_difference, *tb);
1373 display_dimensions (chisq, first_difference, *tb);
1375 display_dimensions (sym, first_difference, *tb);
1377 display_dimensions (risk, first_difference, *tb);
1379 display_dimensions (direct, first_difference, *tb);
1383 display_crosstabulation ();
1384 if (cmd.miss == CRS_REPORT)
1389 display_symmetric ();
1393 display_directional ();
1404 tab_resize (chisq, 4 + (nvar - 2), -1);
1415 /* Delete missing rows and columns for statistical analysis when
1418 delete_missing (void)
1423 for (r = 0; r < n_rows; r++)
1424 if (var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1428 for (c = 0; c < n_cols; c++)
1429 mat[c + r * n_cols] = 0.;
1437 for (c = 0; c < n_cols; c++)
1438 if (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1442 for (r = 0; r < n_rows; r++)
1443 mat[c + r * n_cols] = 0.;
1449 /* Prepare table T for submission, and submit it. */
1451 submit (struct tab_table *t)
1458 tab_resize (t, -1, 0);
1459 if (tab_nr (t) == tab_t (t))
1464 tab_offset (t, 0, 0);
1466 for (i = 2; i < nvar; i++)
1467 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1468 var_to_string (x->vars[i]));
1469 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1470 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1472 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1474 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1475 tab_dim (t, crosstabs_dim);
1479 /* Sets the widths of all the columns and heights of all the rows in
1480 table T for driver D. */
1482 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1486 /* Width of a numerical column. */
1487 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1488 if (cmd.miss == CRS_REPORT)
1489 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1491 /* Set width for header columns. */
1497 w = d->width - c * (t->nc - t->l);
1498 for (i = 0; i <= t->nc; i++)
1502 if (w < d->prop_em_width * 8)
1503 w = d->prop_em_width * 8;
1505 if (w > d->prop_em_width * 15)
1506 w = d->prop_em_width * 15;
1508 for (i = 0; i < t->l; i++)
1512 for (i = t->l; i < t->nc; i++)
1515 for (i = 0; i < t->nr; i++)
1516 t->h[i] = tab_natural_height (t, d, i);
1519 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1520 int *cnt, int pivot);
1521 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1522 int *cnt, int pivot);
1524 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1526 static struct table_entry **
1527 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1529 return (mode == GENERAL
1530 ? find_pivot_extent_general (tp, cnt, pivot)
1531 : find_pivot_extent_integer (tp, cnt, pivot));
1534 /* Find the extent of a region in TP that contains one table. If
1535 PIVOT != 0 that means a set of table entries with identical table
1536 number; otherwise they also have to have the same values for every
1537 dimension after the row and column dimensions. The table that is
1538 searched starts at TP and has length CNT. Returns the first entry
1539 after the last one in the table; sets *CNT to the number of
1540 remaining values. If there are no entries in TP at all, returns
1541 NULL. A yucky interface, admittedly, but it works. */
1542 static struct table_entry **
1543 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1545 struct table_entry *fp = *tp;
1550 x = xtab[(*tp)->table];
1558 if ((*tp)->table != fp->table)
1563 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1570 /* Integer mode correspondent to find_pivot_extent_general(). This
1571 could be optimized somewhat, but I just don't give a crap about
1572 CROSSTABS performance in integer mode, which is just a
1573 CROSSTABS wart as far as I'm concerned.
1575 That said, feel free to send optimization patches to me. */
1576 static struct table_entry **
1577 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1579 struct table_entry *fp = *tp;
1584 x = xtab[(*tp)->table];
1592 if ((*tp)->table != fp->table)
1597 if (memcmp (&(*tp)->values[2], &fp->values[2],
1598 sizeof (union value) * (x->nvar - 2)))
1605 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1606 result. WIDTH_ points to an int which is either 0 for a
1607 numeric value or a string width for a string value. */
1609 compare_value (const void *a_, const void *b_, const void *width_)
1611 const union value *a = a_;
1612 const union value *b = b_;
1613 const int *pwidth = width_;
1614 const int width = *pwidth;
1617 return (a->f < b->f) ? -1 : (a->f > b->f);
1619 return strncmp (a->s, b->s, width);
1622 /* Given an array of ENTRY_CNT table_entry structures starting at
1623 ENTRIES, creates a sorted list of the values that the variable
1624 with index VAR_IDX takes on. The values are returned as a
1625 malloc()'darray stored in *VALUES, with the number of values
1626 stored in *VALUE_CNT.
1629 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1630 union value **values, int *value_cnt)
1632 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1634 if (mode == GENERAL)
1636 int width = var_get_width (v);
1639 *values = xnmalloc (entry_cnt, sizeof **values);
1640 for (i = 0; i < entry_cnt; i++)
1641 (*values)[i] = entries[i]->values[var_idx];
1642 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1643 compare_value, &width);
1647 struct var_range *vr = get_var_range (v);
1650 assert (mode == INTEGER);
1651 *values = xnmalloc (vr->count, sizeof **values);
1652 for (i = 0; i < vr->count; i++)
1653 (*values)[i].f = i + vr->min;
1654 *value_cnt = vr->count;
1658 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1659 from V, displayed with print format spec from variable VAR. When
1660 in REPORT missing-value mode, missing values have an M appended. */
1662 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1663 const union value *v, const struct variable *var)
1666 const struct fmt_spec *print = var_get_print_format (var);
1668 const char *label = var_lookup_value_label (var, v);
1671 tab_text (table, c, r, TAB_LEFT, label);
1675 s.string = tab_alloc (table, print->w);
1676 format_short (s.string, print, v);
1677 s.length = strlen (s.string);
1678 if (cmd.miss == CRS_REPORT && var_is_num_user_missing (var, v->f))
1679 s.string[s.length++] = 'M';
1680 while (s.length && *s.string == ' ')
1685 tab_raw (table, c, r, opt, &s);
1688 /* Draws a line across TABLE at the current row to indicate the most
1689 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1690 that changed, and puts the values that changed into the table. TB
1691 and X must be the corresponding table_entry and crosstab,
1694 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1696 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1698 for (; first_difference >= 2; first_difference--)
1699 table_value_missing (table, nvar - first_difference - 1, 0,
1700 TAB_RIGHT, &tb->values[first_difference],
1701 x->vars[first_difference]);
1704 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1705 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1706 additionally suffixed with a letter `M'. */
1708 format_cell_entry (struct tab_table *table, int c, int r, double value,
1709 char suffix, bool mark_missing)
1711 const struct fmt_spec f = {FMT_F, 10, 1};
1716 s.string = tab_alloc (table, 16);
1718 data_out (&v, &f, s.string);
1719 while (*s.string == ' ')
1725 s.string[s.length++] = suffix;
1727 s.string[s.length++] = 'M';
1729 tab_raw (table, c, r, TAB_RIGHT, &s);
1732 /* Displays the crosstabulation table. */
1734 display_crosstabulation (void)
1739 for (r = 0; r < n_rows; r++)
1740 table_value_missing (table, nvar - 2, r * num_cells,
1741 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1743 tab_text (table, nvar - 2, n_rows * num_cells,
1744 TAB_LEFT, _("Total"));
1746 /* Put in the actual cells. */
1751 tab_offset (table, nvar - 1, -1);
1752 for (r = 0; r < n_rows; r++)
1755 tab_hline (table, TAL_1, -1, n_cols, 0);
1756 for (c = 0; c < n_cols; c++)
1758 bool mark_missing = false;
1759 double expected_value = row_tot[r] * col_tot[c] / W;
1760 if (cmd.miss == CRS_REPORT
1761 && (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f)
1762 || var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f)))
1763 mark_missing = true;
1764 for (i = 0; i < num_cells; i++)
1775 v = *mp / row_tot[r] * 100.;
1779 v = *mp / col_tot[c] * 100.;
1786 case CRS_CL_EXPECTED:
1789 case CRS_CL_RESIDUAL:
1790 v = *mp - expected_value;
1792 case CRS_CL_SRESIDUAL:
1793 v = (*mp - expected_value) / sqrt (expected_value);
1795 case CRS_CL_ASRESIDUAL:
1796 v = ((*mp - expected_value)
1797 / sqrt (expected_value
1798 * (1. - row_tot[r] / W)
1799 * (1. - col_tot[c] / W)));
1805 format_cell_entry (table, c, i, v, suffix, mark_missing);
1811 tab_offset (table, -1, tab_row (table) + num_cells);
1819 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1820 for (r = 0; r < n_rows; r++)
1823 bool mark_missing = false;
1825 if (cmd.miss == CRS_REPORT
1826 && var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1827 mark_missing = true;
1829 for (i = 0; i < num_cells; i++)
1843 v = row_tot[r] / W * 100.;
1847 v = row_tot[r] / W * 100.;
1850 case CRS_CL_EXPECTED:
1851 case CRS_CL_RESIDUAL:
1852 case CRS_CL_SRESIDUAL:
1853 case CRS_CL_ASRESIDUAL:
1860 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1861 tab_next_row (table);
1866 /* Column totals, grand total. */
1872 tab_hline (table, TAL_1, -1, n_cols, 0);
1873 for (c = 0; c <= n_cols; c++)
1875 double ct = c < n_cols ? col_tot[c] : W;
1876 bool mark_missing = false;
1880 if (cmd.miss == CRS_REPORT && c < n_cols
1881 && var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1882 mark_missing = true;
1884 for (i = 0; i < num_cells; i++)
1906 case CRS_CL_EXPECTED:
1907 case CRS_CL_RESIDUAL:
1908 case CRS_CL_SRESIDUAL:
1909 case CRS_CL_ASRESIDUAL:
1915 format_cell_entry (table, c, i, v, suffix, mark_missing);
1920 tab_offset (table, -1, tab_row (table) + last_row);
1923 tab_offset (table, 0, -1);
1926 static void calc_r (double *X, double *Y, double *, double *, double *);
1927 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1929 /* Display chi-square statistics. */
1931 display_chisq (void)
1933 static const char *chisq_stats[N_CHISQ] =
1935 N_("Pearson Chi-Square"),
1936 N_("Likelihood Ratio"),
1937 N_("Fisher's Exact Test"),
1938 N_("Continuity Correction"),
1939 N_("Linear-by-Linear Association"),
1941 double chisq_v[N_CHISQ];
1942 double fisher1, fisher2;
1948 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1950 tab_offset (chisq, nvar - 2, -1);
1952 for (i = 0; i < N_CHISQ; i++)
1954 if ((i != 2 && chisq_v[i] == SYSMIS)
1955 || (i == 2 && fisher1 == SYSMIS))
1959 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1962 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1963 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1964 tab_float (chisq, 3, 0, TAB_RIGHT,
1965 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1970 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1971 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1973 tab_next_row (chisq);
1976 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1977 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1978 tab_next_row (chisq);
1980 tab_offset (chisq, 0, -1);
1983 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1984 double[N_SYMMETRIC]);
1986 /* Display symmetric measures. */
1988 display_symmetric (void)
1990 static const char *categories[] =
1992 N_("Nominal by Nominal"),
1993 N_("Ordinal by Ordinal"),
1994 N_("Interval by Interval"),
1995 N_("Measure of Agreement"),
1998 static const char *stats[N_SYMMETRIC] =
2002 N_("Contingency Coefficient"),
2003 N_("Kendall's tau-b"),
2004 N_("Kendall's tau-c"),
2006 N_("Spearman Correlation"),
2011 static const int stats_categories[N_SYMMETRIC] =
2013 0, 0, 0, 1, 1, 1, 1, 2, 3,
2017 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2020 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2023 tab_offset (sym, nvar - 2, -1);
2025 for (i = 0; i < N_SYMMETRIC; i++)
2027 if (sym_v[i] == SYSMIS)
2030 if (stats_categories[i] != last_cat)
2032 last_cat = stats_categories[i];
2033 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2036 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2037 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2038 if (sym_ase[i] != SYSMIS)
2039 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2040 if (sym_t[i] != SYSMIS)
2041 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2042 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2046 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2047 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2050 tab_offset (sym, 0, -1);
2053 static int calc_risk (double[], double[], double[], union value *);
2055 /* Display risk estimate. */
2060 double risk_v[3], lower[3], upper[3];
2064 if (!calc_risk (risk_v, upper, lower, c))
2067 tab_offset (risk, nvar - 2, -1);
2069 for (i = 0; i < 3; i++)
2071 if (risk_v[i] == SYSMIS)
2077 if (var_is_numeric (x->vars[COL_VAR]))
2078 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2079 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2081 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2082 var_get_name (x->vars[COL_VAR]),
2083 var_get_width (x->vars[COL_VAR]), c[0].s,
2084 var_get_width (x->vars[COL_VAR]), c[1].s);
2088 if (var_is_numeric (x->vars[ROW_VAR]))
2089 sprintf (buf, _("For cohort %s = %g"),
2090 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2092 sprintf (buf, _("For cohort %s = %.*s"),
2093 var_get_name (x->vars[ROW_VAR]),
2094 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2098 tab_text (risk, 0, 0, TAB_LEFT, buf);
2099 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2100 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2101 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2102 tab_next_row (risk);
2105 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2106 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2107 tab_next_row (risk);
2109 tab_offset (risk, 0, -1);
2112 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2113 double[N_DIRECTIONAL]);
2115 /* Display directional measures. */
2117 display_directional (void)
2119 static const char *categories[] =
2121 N_("Nominal by Nominal"),
2122 N_("Ordinal by Ordinal"),
2123 N_("Nominal by Interval"),
2126 static const char *stats[] =
2129 N_("Goodman and Kruskal tau"),
2130 N_("Uncertainty Coefficient"),
2135 static const char *types[] =
2142 static const int stats_categories[N_DIRECTIONAL] =
2144 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2147 static const int stats_stats[N_DIRECTIONAL] =
2149 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2152 static const int stats_types[N_DIRECTIONAL] =
2154 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2157 static const int *stats_lookup[] =
2164 static const char **stats_names[] =
2176 double direct_v[N_DIRECTIONAL];
2177 double direct_ase[N_DIRECTIONAL];
2178 double direct_t[N_DIRECTIONAL];
2182 if (!calc_directional (direct_v, direct_ase, direct_t))
2185 tab_offset (direct, nvar - 2, -1);
2187 for (i = 0; i < N_DIRECTIONAL; i++)
2189 if (direct_v[i] == SYSMIS)
2195 for (j = 0; j < 3; j++)
2196 if (last[j] != stats_lookup[j][i])
2199 tab_hline (direct, TAL_1, j, 6, 0);
2204 int k = last[j] = stats_lookup[j][i];
2209 string = var_get_name (x->vars[0]);
2211 string = var_get_name (x->vars[1]);
2213 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2214 gettext (stats_names[j][k]), string);
2219 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2220 if (direct_ase[i] != SYSMIS)
2221 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2222 if (direct_t[i] != SYSMIS)
2223 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2224 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2225 tab_next_row (direct);
2228 tab_offset (direct, 0, -1);
2231 /* Statistical calculations. */
2233 /* Returns the value of the gamma (factorial) function for an integer
2236 gamma_int (double x)
2241 for (i = 2; i < x; i++)
2246 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2248 static inline double
2249 Pr (int a, int b, int c, int d)
2251 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2252 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2253 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2254 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2255 / gamma_int (a + b + c + d + 1.));
2258 /* Swap the contents of A and B. */
2260 swap (int *a, int *b)
2267 /* Calculate significance for Fisher's exact test as specified in
2268 _SPSS Statistical Algorithms_, Appendix 5. */
2270 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2274 if (MIN (c, d) < MIN (a, b))
2275 swap (&a, &c), swap (&b, &d);
2276 if (MIN (b, d) < MIN (a, c))
2277 swap (&a, &b), swap (&c, &d);
2281 swap (&a, &b), swap (&c, &d);
2283 swap (&a, &c), swap (&b, &d);
2287 for (x = 0; x <= a; x++)
2288 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2290 *fisher2 = *fisher1;
2291 for (x = 1; x <= b; x++)
2292 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2295 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2296 columns with values COLS and N_ROWS rows with values ROWS. Values
2297 in the matrix sum to W. */
2299 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2300 double *fisher1, double *fisher2)
2304 chisq[0] = chisq[1] = 0.;
2305 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2306 *fisher1 = *fisher2 = SYSMIS;
2308 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2310 if (ns_rows <= 1 || ns_cols <= 1)
2312 chisq[0] = chisq[1] = SYSMIS;
2316 for (r = 0; r < n_rows; r++)
2317 for (c = 0; c < n_cols; c++)
2319 const double expected = row_tot[r] * col_tot[c] / W;
2320 const double freq = mat[n_cols * r + c];
2321 const double residual = freq - expected;
2323 chisq[0] += residual * residual / expected;
2325 chisq[1] += freq * log (expected / freq);
2336 /* Calculate Yates and Fisher exact test. */
2337 if (ns_cols == 2 && ns_rows == 2)
2339 double f11, f12, f21, f22;
2345 for (i = j = 0; i < n_cols; i++)
2346 if (col_tot[i] != 0.)
2355 f11 = mat[nz_cols[0]];
2356 f12 = mat[nz_cols[1]];
2357 f21 = mat[nz_cols[0] + n_cols];
2358 f22 = mat[nz_cols[1] + n_cols];
2363 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2366 chisq[3] = (W * x * x
2367 / (f11 + f12) / (f21 + f22)
2368 / (f11 + f21) / (f12 + f22));
2376 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2377 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2380 /* Calculate Mantel-Haenszel. */
2381 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2383 double r, ase_0, ase_1;
2384 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2386 chisq[4] = (W - 1.) * r * r;
2391 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2392 ASE_1, and ase_0 into ASE_0. The row and column values must be
2393 passed in X and Y. */
2395 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2397 double SX, SY, S, T;
2399 double sum_XYf, sum_X2Y2f;
2400 double sum_Xr, sum_X2r;
2401 double sum_Yc, sum_Y2c;
2404 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2405 for (j = 0; j < n_cols; j++)
2407 double fij = mat[j + i * n_cols];
2408 double product = X[i] * Y[j];
2409 double temp = fij * product;
2411 sum_X2Y2f += temp * product;
2414 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2416 sum_Xr += X[i] * row_tot[i];
2417 sum_X2r += X[i] * X[i] * row_tot[i];
2421 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2423 sum_Yc += Y[i] * col_tot[i];
2424 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2428 S = sum_XYf - sum_Xr * sum_Yc / W;
2429 SX = sum_X2r - sum_Xr * sum_Xr / W;
2430 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2433 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2438 for (s = c = 0., i = 0; i < n_rows; i++)
2439 for (j = 0; j < n_cols; j++)
2441 double Xresid, Yresid;
2444 Xresid = X[i] - Xbar;
2445 Yresid = Y[j] - Ybar;
2446 temp = (T * Xresid * Yresid
2448 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2449 y = mat[j + i * n_cols] * temp * temp - c;
2454 *ase_1 = sqrt (s) / (T * T);
2458 static double somers_d_v[3];
2459 static double somers_d_ase[3];
2460 static double somers_d_t[3];
2462 /* Calculate symmetric statistics and their asymptotic standard
2463 errors. Returns 0 if none could be calculated. */
2465 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2466 double t[N_SYMMETRIC])
2468 int q = MIN (ns_rows, ns_cols);
2477 for (i = 0; i < N_SYMMETRIC; i++)
2478 v[i] = ase[i] = t[i] = SYSMIS;
2481 /* Phi, Cramer's V, contingency coefficient. */
2482 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2484 double Xp = 0.; /* Pearson chi-square. */
2489 for (r = 0; r < n_rows; r++)
2490 for (c = 0; c < n_cols; c++)
2492 const double expected = row_tot[r] * col_tot[c] / W;
2493 const double freq = mat[n_cols * r + c];
2494 const double residual = freq - expected;
2496 Xp += residual * residual / expected;
2500 if (cmd.a_statistics[CRS_ST_PHI])
2502 v[0] = sqrt (Xp / W);
2503 v[1] = sqrt (Xp / (W * (q - 1)));
2505 if (cmd.a_statistics[CRS_ST_CC])
2506 v[2] = sqrt (Xp / (Xp + W));
2509 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2510 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2515 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2522 for (r = 0; r < n_rows; r++)
2523 Dr -= row_tot[r] * row_tot[r];
2524 for (c = 0; c < n_cols; c++)
2525 Dc -= col_tot[c] * col_tot[c];
2531 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2532 for (c = 0; c < n_cols; c++)
2536 for (r = 0; r < n_rows; r++)
2537 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2547 for (i = 0; i < n_rows; i++)
2551 for (j = 1; j < n_cols; j++)
2552 Cij += col_tot[j] - cum[j + i * n_cols];
2555 for (j = 1; j < n_cols; j++)
2556 Dij += cum[j + (i - 1) * n_cols];
2560 double fij = mat[j + i * n_cols];
2566 assert (j < n_cols);
2568 Cij -= col_tot[j] - cum[j + i * n_cols];
2569 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2573 Cij += cum[j - 1 + (i - 1) * n_cols];
2574 Dij -= cum[j + (i - 1) * n_cols];
2580 if (cmd.a_statistics[CRS_ST_BTAU])
2581 v[3] = (P - Q) / sqrt (Dr * Dc);
2582 if (cmd.a_statistics[CRS_ST_CTAU])
2583 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2584 if (cmd.a_statistics[CRS_ST_GAMMA])
2585 v[5] = (P - Q) / (P + Q);
2587 /* ASE for tau-b, tau-c, gamma. Calculations could be
2588 eliminated here, at expense of memory. */
2593 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2594 for (i = 0; i < n_rows; i++)
2598 for (j = 1; j < n_cols; j++)
2599 Cij += col_tot[j] - cum[j + i * n_cols];
2602 for (j = 1; j < n_cols; j++)
2603 Dij += cum[j + (i - 1) * n_cols];
2607 double fij = mat[j + i * n_cols];
2609 if (cmd.a_statistics[CRS_ST_BTAU])
2611 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2612 + v[3] * (row_tot[i] * Dc
2613 + col_tot[j] * Dr));
2614 btau_cum += fij * temp * temp;
2618 const double temp = Cij - Dij;
2619 ctau_cum += fij * temp * temp;
2622 if (cmd.a_statistics[CRS_ST_GAMMA])
2624 const double temp = Q * Cij - P * Dij;
2625 gamma_cum += fij * temp * temp;
2628 if (cmd.a_statistics[CRS_ST_D])
2630 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2631 - (P - Q) * (W - row_tot[i]));
2632 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2633 - (Q - P) * (W - col_tot[j]));
2638 assert (j < n_cols);
2640 Cij -= col_tot[j] - cum[j + i * n_cols];
2641 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2645 Cij += cum[j - 1 + (i - 1) * n_cols];
2646 Dij -= cum[j + (i - 1) * n_cols];
2652 btau_var = ((btau_cum
2653 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2655 if (cmd.a_statistics[CRS_ST_BTAU])
2657 ase[3] = sqrt (btau_var);
2658 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2661 if (cmd.a_statistics[CRS_ST_CTAU])
2663 ase[4] = ((2 * q / ((q - 1) * W * W))
2664 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2665 t[4] = v[4] / ase[4];
2667 if (cmd.a_statistics[CRS_ST_GAMMA])
2669 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2670 t[5] = v[5] / (2. / (P + Q)
2671 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2673 if (cmd.a_statistics[CRS_ST_D])
2675 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2676 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2677 somers_d_t[0] = (somers_d_v[0]
2679 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2680 somers_d_v[1] = (P - Q) / Dc;
2681 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2682 somers_d_t[1] = (somers_d_v[1]
2684 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2685 somers_d_v[2] = (P - Q) / Dr;
2686 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2687 somers_d_t[2] = (somers_d_v[2]
2689 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2695 /* Spearman correlation, Pearson's r. */
2696 if (cmd.a_statistics[CRS_ST_CORR])
2698 double *R = local_alloc (sizeof *R * n_rows);
2699 double *C = local_alloc (sizeof *C * n_cols);
2702 double y, t, c = 0., s = 0.;
2707 R[i] = s + (row_tot[i] + 1.) / 2.;
2714 assert (i < n_rows);
2719 double y, t, c = 0., s = 0.;
2724 C[j] = s + (col_tot[j] + 1.) / 2;
2731 assert (j < n_cols);
2735 calc_r (R, C, &v[6], &t[6], &ase[6]);
2741 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2745 /* Cohen's kappa. */
2746 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2748 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2751 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2752 i < ns_rows; i++, j++)
2756 while (col_tot[j] == 0.)
2759 prod = row_tot[i] * col_tot[j];
2760 sum = row_tot[i] + col_tot[j];
2762 sum_fii += mat[j + i * n_cols];
2764 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2765 sum_riciri_ci += prod * sum;
2767 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2768 for (j = 0; j < ns_cols; j++)
2770 double sum = row_tot[i] + col_tot[j];
2771 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2774 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2776 ase[8] = sqrt ((W * W * sum_rici
2777 + sum_rici * sum_rici
2778 - W * sum_riciri_ci)
2779 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2781 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2782 / pow2 (W * W - sum_rici))
2783 + ((2. * (W - sum_fii)
2784 * (2. * sum_fii * sum_rici
2785 - W * sum_fiiri_ci))
2786 / cube (W * W - sum_rici))
2787 + (pow2 (W - sum_fii)
2788 * (W * sum_fijri_ci2 - 4.
2789 * sum_rici * sum_rici)
2790 / pow4 (W * W - sum_rici))));
2792 t[8] = v[8] / ase[8];
2799 /* Calculate risk estimate. */
2801 calc_risk (double *value, double *upper, double *lower, union value *c)
2803 double f11, f12, f21, f22;
2809 for (i = 0; i < 3; i++)
2810 value[i] = upper[i] = lower[i] = SYSMIS;
2813 if (ns_rows != 2 || ns_cols != 2)
2820 for (i = j = 0; i < n_cols; i++)
2821 if (col_tot[i] != 0.)
2830 f11 = mat[nz_cols[0]];
2831 f12 = mat[nz_cols[1]];
2832 f21 = mat[nz_cols[0] + n_cols];
2833 f22 = mat[nz_cols[1] + n_cols];
2835 c[0] = cols[nz_cols[0]];
2836 c[1] = cols[nz_cols[1]];
2839 value[0] = (f11 * f22) / (f12 * f21);
2840 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2841 lower[0] = value[0] * exp (-1.960 * v);
2842 upper[0] = value[0] * exp (1.960 * v);
2844 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2845 v = sqrt ((f12 / (f11 * (f11 + f12)))
2846 + (f22 / (f21 * (f21 + f22))));
2847 lower[1] = value[1] * exp (-1.960 * v);
2848 upper[1] = value[1] * exp (1.960 * v);
2850 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2851 v = sqrt ((f11 / (f12 * (f11 + f12)))
2852 + (f21 / (f22 * (f21 + f22))));
2853 lower[2] = value[2] * exp (-1.960 * v);
2854 upper[2] = value[2] * exp (1.960 * v);
2859 /* Calculate directional measures. */
2861 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2862 double t[N_DIRECTIONAL])
2867 for (i = 0; i < N_DIRECTIONAL; i++)
2868 v[i] = ase[i] = t[i] = SYSMIS;
2872 if (cmd.a_statistics[CRS_ST_LAMBDA])
2874 double *fim = xnmalloc (n_rows, sizeof *fim);
2875 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2876 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2877 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2878 double sum_fim, sum_fmj;
2880 int rm_index, cm_index;
2883 /* Find maximum for each row and their sum. */
2884 for (sum_fim = 0., i = 0; i < n_rows; i++)
2886 double max = mat[i * n_cols];
2889 for (j = 1; j < n_cols; j++)
2890 if (mat[j + i * n_cols] > max)
2892 max = mat[j + i * n_cols];
2896 sum_fim += fim[i] = max;
2897 fim_index[i] = index;
2900 /* Find maximum for each column. */
2901 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2903 double max = mat[j];
2906 for (i = 1; i < n_rows; i++)
2907 if (mat[j + i * n_cols] > max)
2909 max = mat[j + i * n_cols];
2913 sum_fmj += fmj[j] = max;
2914 fmj_index[j] = index;
2917 /* Find maximum row total. */
2920 for (i = 1; i < n_rows; i++)
2921 if (row_tot[i] > rm)
2927 /* Find maximum column total. */
2930 for (j = 1; j < n_cols; j++)
2931 if (col_tot[j] > cm)
2937 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2938 v[1] = (sum_fmj - rm) / (W - rm);
2939 v[2] = (sum_fim - cm) / (W - cm);
2941 /* ASE1 for Y given X. */
2945 for (accum = 0., i = 0; i < n_rows; i++)
2946 for (j = 0; j < n_cols; j++)
2948 const int deltaj = j == cm_index;
2949 accum += (mat[j + i * n_cols]
2950 * pow2 ((j == fim_index[i])
2955 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2958 /* ASE0 for Y given X. */
2962 for (accum = 0., i = 0; i < n_rows; i++)
2963 if (cm_index != fim_index[i])
2964 accum += (mat[i * n_cols + fim_index[i]]
2965 + mat[i * n_cols + cm_index]);
2966 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2969 /* ASE1 for X given Y. */
2973 for (accum = 0., i = 0; i < n_rows; i++)
2974 for (j = 0; j < n_cols; j++)
2976 const int deltaj = i == rm_index;
2977 accum += (mat[j + i * n_cols]
2978 * pow2 ((i == fmj_index[j])
2983 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2986 /* ASE0 for X given Y. */
2990 for (accum = 0., j = 0; j < n_cols; j++)
2991 if (rm_index != fmj_index[j])
2992 accum += (mat[j + n_cols * fmj_index[j]]
2993 + mat[j + n_cols * rm_index]);
2994 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2997 /* Symmetric ASE0 and ASE1. */
3002 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3003 for (j = 0; j < n_cols; j++)
3005 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3006 int temp1 = (i == rm_index) + (j == cm_index);
3007 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3008 accum1 += (mat[j + i * n_cols]
3009 * pow2 (temp0 + (v[0] - 1.) * temp1));
3011 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3012 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3013 / (2. * W - rm - cm));
3022 double sum_fij2_ri, sum_fij2_ci;
3023 double sum_ri2, sum_cj2;
3025 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3026 for (j = 0; j < n_cols; j++)
3028 double temp = pow2 (mat[j + i * n_cols]);
3029 sum_fij2_ri += temp / row_tot[i];
3030 sum_fij2_ci += temp / col_tot[j];
3033 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3034 sum_ri2 += row_tot[i] * row_tot[i];
3036 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3037 sum_cj2 += col_tot[j] * col_tot[j];
3039 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3040 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3044 if (cmd.a_statistics[CRS_ST_UC])
3046 double UX, UY, UXY, P;
3047 double ase1_yx, ase1_xy, ase1_sym;
3050 for (UX = 0., i = 0; i < n_rows; i++)
3051 if (row_tot[i] > 0.)
3052 UX -= row_tot[i] / W * log (row_tot[i] / W);
3054 for (UY = 0., j = 0; j < n_cols; j++)
3055 if (col_tot[j] > 0.)
3056 UY -= col_tot[j] / W * log (col_tot[j] / W);
3058 for (UXY = P = 0., i = 0; i < n_rows; i++)
3059 for (j = 0; j < n_cols; j++)
3061 double entry = mat[j + i * n_cols];
3066 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3067 UXY -= entry / W * log (entry / W);
3070 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3071 for (j = 0; j < n_cols; j++)
3073 double entry = mat[j + i * n_cols];
3078 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3079 + (UX - UXY) * log (col_tot[j] / W));
3080 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3081 + (UY - UXY) * log (row_tot[i] / W));
3082 ase1_sym += entry * pow2 ((UXY
3083 * log (row_tot[i] * col_tot[j] / (W * W)))
3084 - (UX + UY) * log (entry / W));
3087 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3088 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3089 t[5] = v[5] / ((2. / (W * (UX + UY)))
3090 * sqrt (P - pow2 (UX + UY - UXY) / W));
3092 v[6] = (UX + UY - UXY) / UX;
3093 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3094 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3096 v[7] = (UX + UY - UXY) / UY;
3097 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3098 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3102 if (cmd.a_statistics[CRS_ST_D])
3107 calc_symmetric (NULL, NULL, NULL);
3108 for (i = 0; i < 3; i++)
3110 v[8 + i] = somers_d_v[i];
3111 ase[8 + i] = somers_d_ase[i];
3112 t[8 + i] = somers_d_t[i];
3117 if (cmd.a_statistics[CRS_ST_ETA])
3120 double sum_Xr, sum_X2r;
3124 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3126 sum_Xr += rows[i].f * row_tot[i];
3127 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3129 SX = sum_X2r - sum_Xr * sum_Xr / W;
3131 for (SXW = 0., j = 0; j < n_cols; j++)
3135 for (cum = 0., i = 0; i < n_rows; i++)
3137 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3138 cum += rows[i].f * mat[j + i * n_cols];
3141 SXW -= cum * cum / col_tot[j];
3143 v[11] = sqrt (1. - SXW / SX);
3147 double sum_Yc, sum_Y2c;
3151 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3153 sum_Yc += cols[i].f * col_tot[i];
3154 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3156 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3158 for (SYW = 0., i = 0; i < n_rows; i++)
3162 for (cum = 0., j = 0; j < n_cols; j++)
3164 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3165 cum += cols[j].f * mat[j + i * n_cols];
3168 SYW -= cum * cum / row_tot[i];
3170 v[12] = sqrt (1. - SYW / SY);
3177 /* A wrapper around data_out() that limits string output to short
3178 string width and null terminates the result. */
3180 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3182 struct fmt_spec fmt_subst;
3184 /* Limit to short string width. */
3185 if (fmt_is_string (fp->type))
3189 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3190 if (fmt_subst.type == FMT_A)
3191 fmt_subst.w = MIN (8, fmt_subst.w);
3193 fmt_subst.w = MIN (16, fmt_subst.w);
3199 data_out (v, fp, s);
3201 /* Null terminate. */