1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - How to calculate significance of some symmetric and directional measures?
20 - How to calculate ASE for symmetric Somers ' d?
21 - How to calculate ASE for Goodman and Kruskal's tau?
22 - How to calculate approx. T of symmetric uncertainty coefficient?
30 #include <gsl/gsl_cdf.h>
34 #include "data/case.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/data-out.h"
38 #include "data/dataset.h"
39 #include "data/dictionary.h"
40 #include "data/format.h"
41 #include "data/value-labels.h"
42 #include "data/variable.h"
43 #include "language/command.h"
44 #include "language/stats/freq.h"
45 #include "language/dictionary/split-file.h"
46 #include "language/lexer/lexer.h"
47 #include "language/lexer/variable-parser.h"
48 #include "libpspp/array.h"
49 #include "libpspp/assertion.h"
50 #include "libpspp/compiler.h"
51 #include "libpspp/hash-functions.h"
52 #include "libpspp/hmap.h"
53 #include "libpspp/hmapx.h"
54 #include "libpspp/message.h"
55 #include "libpspp/misc.h"
56 #include "libpspp/pool.h"
57 #include "libpspp/str.h"
58 #include "output/tab.h"
59 #include "output/chart-item.h"
60 #include "output/charts/barchart.h"
62 #include "gl/minmax.h"
63 #include "gl/xalloc.h"
67 #define _(msgid) gettext (msgid)
68 #define N_(msgid) msgid
76 missing=miss:!table/include/report;
77 count=roundwhat:asis/case/!cell,
78 roundhow:!round/truncate;
79 +write[wr_]=none,cells,all;
80 +format=val:!avalue/dvalue,
82 tabl:!tables/notables,
86 +cells[cl_]=count,expected,row,column,total,residual,sresidual,
88 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
94 /* Number of chi-square statistics. */
97 /* Number of symmetric statistics. */
100 /* Number of directional statistics. */
101 #define N_DIRECTIONAL 13
104 /* Indexes into the 'vars' member of struct crosstabulation and
105 struct crosstab member. */
108 ROW_VAR = 0, /* Row variable. */
109 COL_VAR = 1 /* Column variable. */
110 /* Higher indexes cause multiple tables to be output. */
115 const struct variable *var;
120 /* A crosstabulation of 2 or more variables. */
121 struct crosstabulation
123 struct crosstabs_proc *proc;
124 struct fmt_spec weight_format; /* Format for weight variable. */
125 double missing; /* Weight of missing cases. */
127 /* Variables (2 or more). */
129 struct xtab_var *vars;
131 /* Constants (0 or more). */
133 struct xtab_var *const_vars;
137 struct freq **entries;
140 /* Number of statistically interesting columns/rows
141 (columns/rows with data in them). */
142 int ns_cols, ns_rows;
144 /* Matrix contents. */
145 double *mat; /* Matrix proper. */
146 double *row_tot; /* Row totals. */
147 double *col_tot; /* Column totals. */
148 double total; /* Grand total. */
151 /* Integer mode variable info. */
154 struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */
155 const struct variable *var; /* The variable. */
156 int min; /* Minimum value. */
157 int max; /* Maximum value + 1. */
158 int count; /* max - min. */
161 struct crosstabs_proc
163 const struct dictionary *dict;
164 enum { INTEGER, GENERAL } mode;
165 enum mv_class exclude;
169 struct fmt_spec weight_format;
171 /* Variables specifies on VARIABLES. */
172 const struct variable **variables;
174 struct hmap var_ranges;
177 struct crosstabulation *pivots;
181 int n_cells; /* Number of cells requested. */
182 unsigned int cells; /* Bit k is 1 if cell k is requested. */
183 int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */
185 /* Rounding of cells. */
186 bool round_case_weights; /* Round case weights? */
187 bool round_cells; /* If !round_case_weights, round cells? */
188 bool round_down; /* Round down? (otherwise to nearest) */
191 unsigned int statistics; /* Bit k is 1 if statistic k is requested. */
193 bool descending; /* True if descending sort order is requested. */
196 const struct var_range *get_var_range (const struct crosstabs_proc *,
197 const struct variable *);
199 static bool should_tabulate_case (const struct crosstabulation *,
200 const struct ccase *, enum mv_class exclude);
201 static void tabulate_general_case (struct crosstabulation *, const struct ccase *,
203 static void tabulate_integer_case (struct crosstabulation *, const struct ccase *,
205 static void postcalc (struct crosstabs_proc *);
206 static void submit (struct crosstabulation *, struct tab_table *);
209 round_weight (const struct crosstabs_proc *proc, double weight)
211 return proc->round_down ? floor (weight) : floor (weight + 0.5);
214 /* Parses and executes the CROSSTABS procedure. */
216 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
218 struct var_range *range, *next_range;
219 struct crosstabs_proc proc;
220 struct casegrouper *grouper;
221 struct casereader *input, *group;
222 struct cmd_crosstabs cmd;
223 struct crosstabulation *xt;
228 proc.dict = dataset_dict (ds);
229 proc.bad_warn = true;
230 proc.variables = NULL;
231 proc.n_variables = 0;
232 hmap_init (&proc.var_ranges);
235 proc.descending = false;
236 proc.weight_format = *dict_get_weight_format (dataset_dict (ds));
238 if (!parse_crosstabs (lexer, ds, &cmd, &proc))
240 result = CMD_FAILURE;
244 proc.mode = proc.n_variables ? INTEGER : GENERAL;
245 proc.barchart = cmd.sbc_barchart > 0;
247 proc.descending = cmd.val == CRS_DVALUE;
249 proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE;
250 proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL;
251 proc.round_down = cmd.roundhow == CRS_TRUNCATE;
255 proc.cells = 1u << CRS_CL_COUNT;
256 else if (cmd.a_cells[CRS_CL_ALL])
257 proc.cells = UINT_MAX;
261 for (i = 0; i < CRS_CL_count; i++)
263 proc.cells |= 1u << i;
265 proc.cells = ((1u << CRS_CL_COUNT)
267 | (1u << CRS_CL_COLUMN)
268 | (1u << CRS_CL_TOTAL));
270 proc.cells &= ((1u << CRS_CL_count) - 1);
271 proc.cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL));
273 for (i = 0; i < CRS_CL_count; i++)
274 if (proc.cells & (1u << i))
275 proc.a_cells[proc.n_cells++] = i;
278 if (cmd.a_statistics[CRS_ST_ALL])
279 proc.statistics = UINT_MAX;
280 else if (cmd.sbc_statistics)
285 for (i = 0; i < CRS_ST_count; i++)
286 if (cmd.a_statistics[i])
287 proc.statistics |= 1u << i;
288 if (proc.statistics == 0)
289 proc.statistics |= 1u << CRS_ST_CHISQ;
295 proc.exclude = (cmd.miss == CRS_TABLE ? MV_ANY
296 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
298 if (proc.mode == GENERAL && proc.exclude == MV_NEVER)
300 msg (SE, _("Missing mode %s not allowed in general mode. "
301 "Assuming %s."), "REPORT", "MISSING=TABLE");
302 proc.exclude = MV_ANY;
306 proc.pivot = cmd.pivot == CRS_PIVOT;
308 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
310 grouper = casegrouper_create_splits (input, dataset_dict (ds));
311 while (casegrouper_get_next_group (grouper, &group))
315 /* Output SPLIT FILE variables. */
316 c = casereader_peek (group, 0);
319 output_split_file_values (ds, c);
323 /* Initialize hash tables. */
324 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
325 hmap_init (&xt->data);
328 for (; (c = casereader_read (group)) != NULL; case_unref (c))
329 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
331 double weight = dict_get_case_weight (dataset_dict (ds), c,
333 if (cmd.roundwhat == CRS_CASE)
335 weight = round_weight (&proc, weight);
339 if (should_tabulate_case (xt, c, proc.exclude))
341 if (proc.mode == GENERAL)
342 tabulate_general_case (xt, c, weight);
344 tabulate_integer_case (xt, c, weight);
347 xt->missing += weight;
349 casereader_destroy (group);
354 ok = casegrouper_destroy (grouper);
355 ok = proc_commit (ds) && ok;
357 result = ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
360 free (proc.variables);
361 HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node,
364 hmap_delete (&proc.var_ranges, &range->hmap_node);
367 for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++)
370 /* We must not call value_destroy on const_values because
371 it is a wild pointer; it never pointed to anything owned
372 by the crosstabulation.
374 The rest of the data was allocated and destroyed at a
375 lower level already. */
382 /* Parses the TABLES subcommand. */
384 crs_custom_tables (struct lexer *lexer, struct dataset *ds,
385 struct cmd_crosstabs *cmd UNUSED, void *proc_)
387 struct crosstabs_proc *proc = proc_;
388 struct const_var_set *var_set;
390 const struct variable ***by = NULL;
392 size_t *by_nvar = NULL;
397 /* Ensure that this is a TABLES subcommand. */
398 if (!lex_match_id (lexer, "TABLES")
399 && (lex_token (lexer) != T_ID ||
400 dict_lookup_var (dataset_dict (ds), lex_tokcstr (lexer)) == NULL)
401 && lex_token (lexer) != T_ALL)
403 lex_match (lexer, T_EQUALS);
405 if (proc->variables != NULL)
406 var_set = const_var_set_create_from_array (proc->variables,
409 var_set = const_var_set_create_from_dict (dataset_dict (ds));
410 assert (var_set != NULL);
414 by = xnrealloc (by, n_by + 1, sizeof *by);
415 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
416 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
417 PV_NO_DUPLICATE | PV_NO_SCRATCH))
419 if (xalloc_oversized (nx, by_nvar[n_by]))
421 msg (SE, _("Too many cross-tabulation variables or dimensions."));
427 if (!lex_match (lexer, T_BY))
436 by_iter = xcalloc (n_by, sizeof *by_iter);
437 proc->pivots = xnrealloc (proc->pivots,
438 proc->n_pivots + nx, sizeof *proc->pivots);
439 for (i = 0; i < nx; i++)
441 struct crosstabulation *xt = &proc->pivots[proc->n_pivots++];
445 xt->weight_format = proc->weight_format;
448 xt->vars = xcalloc (n_by, sizeof *xt->vars);
450 xt->const_vars = NULL;
452 for (j = 0; j < n_by; j++)
453 xt->vars[j].var = by[j][by_iter[j]];
455 for (j = n_by - 1; j >= 0; j--)
457 if (++by_iter[j] < by_nvar[j])
466 /* All return paths lead here. */
467 for (i = 0; i < n_by; i++)
472 const_var_set_destroy (var_set);
477 /* Parses the VARIABLES subcommand. */
479 crs_custom_variables (struct lexer *lexer, struct dataset *ds,
480 struct cmd_crosstabs *cmd UNUSED, void *proc_)
482 struct crosstabs_proc *proc = proc_;
485 msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES");
489 lex_match (lexer, T_EQUALS);
493 size_t orig_nv = proc->n_variables;
498 if (!parse_variables_const (lexer, dataset_dict (ds),
499 &proc->variables, &proc->n_variables,
500 (PV_APPEND | PV_NUMERIC
501 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
504 if (!lex_force_match (lexer, T_LPAREN))
507 if (!lex_force_int (lexer))
509 min = lex_integer (lexer);
512 lex_match (lexer, T_COMMA);
514 if (!lex_force_int (lexer))
516 max = lex_integer (lexer);
519 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
525 if (!lex_force_match (lexer, T_RPAREN))
528 for (i = orig_nv; i < proc->n_variables; i++)
530 const struct variable *var = proc->variables[i];
531 struct var_range *vr = xmalloc (sizeof *vr);
536 vr->count = max - min + 1;
537 hmap_insert (&proc->var_ranges, &vr->hmap_node,
538 hash_pointer (var, 0));
541 if (lex_token (lexer) == T_SLASH)
548 free (proc->variables);
549 proc->variables = NULL;
550 proc->n_variables = 0;
554 /* Data file processing. */
556 const struct var_range *
557 get_var_range (const struct crosstabs_proc *proc, const struct variable *var)
559 if (!hmap_is_empty (&proc->var_ranges))
561 const struct var_range *range;
563 HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node,
564 hash_pointer (var, 0), &proc->var_ranges)
565 if (range->var == var)
573 should_tabulate_case (const struct crosstabulation *xt, const struct ccase *c,
574 enum mv_class exclude)
577 for (j = 0; j < xt->n_vars; j++)
579 const struct variable *var = xt->vars[j].var;
580 const struct var_range *range = get_var_range (xt->proc, var);
582 if (var_is_value_missing (var, case_data (c, var), exclude))
587 double num = case_num (c, var);
588 if (num < range->min || num >= range->max + 1.)
596 tabulate_integer_case (struct crosstabulation *xt, const struct ccase *c,
604 for (j = 0; j < xt->n_vars; j++)
606 /* Throw away fractional parts of values. */
607 hash = hash_int (case_num (c, xt->vars[j].var), hash);
610 HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data)
612 for (j = 0; j < xt->n_vars; j++)
613 if ((int) case_num (c, xt->vars[j].var) != (int) te->values[j].f)
616 /* Found an existing entry. */
623 /* No existing entry. Create a new one. */
624 te = xmalloc (table_entry_size (xt->n_vars));
626 for (j = 0; j < xt->n_vars; j++)
627 te->values[j].f = (int) case_num (c, xt->vars[j].var);
628 hmap_insert (&xt->data, &te->node, hash);
632 tabulate_general_case (struct crosstabulation *xt, const struct ccase *c,
640 for (j = 0; j < xt->n_vars; j++)
642 const struct variable *var = xt->vars[j].var;
643 hash = value_hash (case_data (c, var), var_get_width (var), hash);
646 HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data)
648 for (j = 0; j < xt->n_vars; j++)
650 const struct variable *var = xt->vars[j].var;
651 if (!value_equal (case_data (c, var), &te->values[j],
652 var_get_width (var)))
656 /* Found an existing entry. */
663 /* No existing entry. Create a new one. */
664 te = xmalloc (table_entry_size (xt->n_vars));
666 for (j = 0; j < xt->n_vars; j++)
668 const struct variable *var = xt->vars[j].var;
669 value_clone (&te->values[j], case_data (c, var), var_get_width (var));
671 hmap_insert (&xt->data, &te->node, hash);
674 /* Post-data reading calculations. */
676 static int compare_table_entry_vars_3way (const struct freq *a,
677 const struct freq *b,
678 const struct crosstabulation *xt,
680 static int compare_table_entry_3way (const void *ap_, const void *bp_,
682 static int compare_table_entry_3way_inv (const void *ap_, const void *bp_,
685 static void enum_var_values (const struct crosstabulation *, int var_idx,
687 static void free_var_values (const struct crosstabulation *, int var_idx);
688 static void output_crosstabulation (struct crosstabs_proc *,
689 struct crosstabulation *);
690 static void make_crosstabulation_subset (struct crosstabulation *xt,
691 size_t row0, size_t row1,
692 struct crosstabulation *subset);
693 static void make_summary_table (struct crosstabs_proc *);
694 static bool find_crosstab (struct crosstabulation *, size_t *row0p,
698 postcalc (struct crosstabs_proc *proc)
701 /* Round hash table entries, if requested
703 If this causes any of the cell counts to fall to zero, delete those
705 if (proc->round_cells)
706 for (struct crosstabulation *xt = proc->pivots;
707 xt < &proc->pivots[proc->n_pivots]; xt++)
709 struct freq *e, *next;
710 HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &xt->data)
712 e->count = round_weight (proc, e->count);
715 hmap_delete (&xt->data, &e->node);
721 /* Convert hash tables into sorted arrays of entries. */
722 for (struct crosstabulation *xt = proc->pivots;
723 xt < &proc->pivots[proc->n_pivots]; xt++)
727 xt->n_entries = hmap_count (&xt->data);
728 xt->entries = xnmalloc (xt->n_entries, sizeof *xt->entries);
730 HMAP_FOR_EACH (e, struct freq, node, &xt->data)
731 xt->entries[i++] = e;
732 hmap_destroy (&xt->data);
734 sort (xt->entries, xt->n_entries, sizeof *xt->entries,
735 proc->descending ? compare_table_entry_3way_inv : compare_table_entry_3way,
740 make_summary_table (proc);
742 /* Output each pivot table. */
743 for (struct crosstabulation *xt = proc->pivots;
744 xt < &proc->pivots[proc->n_pivots]; xt++)
746 if (proc->pivot || xt->n_vars == 2)
747 output_crosstabulation (proc, xt);
750 size_t row0 = 0, row1 = 0;
751 while (find_crosstab (xt, &row0, &row1))
753 struct crosstabulation subset;
754 make_crosstabulation_subset (xt, row0, row1, &subset);
755 output_crosstabulation (proc, &subset);
760 const struct variable **vars = xcalloc (xt->n_vars, sizeof *vars);
761 for (size_t i = 0; i < xt->n_vars; i++)
762 vars[i] = xt->vars[i].var;
763 chart_item_submit (barchart_create (vars, xt->n_vars, _("Count"),
765 xt->entries, xt->n_entries));
770 /* Free output and prepare for next split file. */
771 for (struct crosstabulation *xt = proc->pivots;
772 xt < &proc->pivots[proc->n_pivots]; xt++)
776 /* Free the members that were allocated in this function(and the values
777 owned by the entries.
779 The other pointer members are either both allocated and destroyed at a
780 lower level (in output_crosstabulation), or both allocated and
781 destroyed at a higher level (in crs_custom_tables and free_proc,
783 for (size_t i = 0; i < xt->n_vars; i++)
785 int width = var_get_width (xt->vars[i].var);
786 if (value_needs_init (width))
790 for (j = 0; j < xt->n_entries; j++)
791 value_destroy (&xt->entries[j]->values[i], width);
795 for (size_t i = 0; i < xt->n_entries; i++)
796 free (xt->entries[i]);
802 make_crosstabulation_subset (struct crosstabulation *xt, size_t row0,
803 size_t row1, struct crosstabulation *subset)
808 assert (xt->n_consts == 0);
809 subset->missing = xt->missing;
811 subset->vars = xt->vars;
813 subset->n_consts = xt->n_vars - 2;
814 subset->const_vars = xt->vars + 2;
815 for (size_t i = 0; i < subset->n_consts; i++)
817 subset->const_vars[i].n_values = 1;
818 subset->const_vars[i].values = &xt->entries[row0]->values[2 + i];
821 subset->entries = &xt->entries[row0];
822 subset->n_entries = row1 - row0;
826 compare_table_entry_var_3way (const struct freq *a,
827 const struct freq *b,
828 const struct crosstabulation *xt,
831 return value_compare_3way (&a->values[idx], &b->values[idx],
832 var_get_width (xt->vars[idx].var));
836 compare_table_entry_vars_3way (const struct freq *a,
837 const struct freq *b,
838 const struct crosstabulation *xt,
843 for (i = idx1 - 1; i >= idx0; i--)
845 int cmp = compare_table_entry_var_3way (a, b, xt, i);
852 /* Compare the struct freq at *AP to the one at *BP and
853 return a strcmp()-type result. */
855 compare_table_entry_3way (const void *ap_, const void *bp_, const void *xt_)
857 const struct freq *const *ap = ap_;
858 const struct freq *const *bp = bp_;
859 const struct freq *a = *ap;
860 const struct freq *b = *bp;
861 const struct crosstabulation *xt = xt_;
864 cmp = compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars);
868 cmp = compare_table_entry_var_3way (a, b, xt, ROW_VAR);
872 return compare_table_entry_var_3way (a, b, xt, COL_VAR);
875 /* Inverted version of compare_table_entry_3way */
877 compare_table_entry_3way_inv (const void *ap_, const void *bp_, const void *xt_)
879 return -compare_table_entry_3way (ap_, bp_, xt_);
883 find_first_difference (const struct crosstabulation *xt, size_t row)
886 return xt->n_vars - 1;
889 const struct freq *a = xt->entries[row];
890 const struct freq *b = xt->entries[row - 1];
893 for (col = xt->n_vars - 1; col >= 0; col--)
894 if (compare_table_entry_var_3way (a, b, xt, col))
900 /* Output a table summarizing the cases processed. */
902 make_summary_table (struct crosstabs_proc *proc)
904 struct tab_table *summary;
905 struct crosstabulation *xt;
909 summary = tab_create (7, 3 + proc->n_pivots);
910 tab_set_format (summary, RC_WEIGHT, &proc->weight_format);
911 tab_title (summary, _("Summary."));
912 tab_headers (summary, 1, 0, 3, 0);
913 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
914 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
915 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
916 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
917 tab_hline (summary, TAL_1, 1, 6, 1);
918 tab_hline (summary, TAL_1, 1, 6, 2);
919 tab_vline (summary, TAL_1, 3, 1, 1);
920 tab_vline (summary, TAL_1, 5, 1, 1);
921 for (i = 0; i < 3; i++)
923 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
924 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
926 tab_offset (summary, 0, 3);
928 ds_init_empty (&name);
929 for (xt = &proc->pivots[0]; xt < &proc->pivots[proc->n_pivots]; xt++)
935 tab_hline (summary, TAL_1, 0, 6, 0);
938 for (i = 0; i < xt->n_vars; i++)
941 ds_put_cstr (&name, " * ");
942 ds_put_cstr (&name, var_to_string (xt->vars[i].var));
944 tab_text (summary, 0, 0, TAB_LEFT, ds_cstr (&name));
947 for (i = 0; i < xt->n_entries; i++)
948 valid += xt->entries[i]->count;
953 for (i = 0; i < 3; i++)
955 tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i], NULL, RC_WEIGHT);
956 tab_text_format (summary, i * 2 + 2, 0, TAB_RIGHT, "%.1f%%",
960 tab_next_row (summary);
964 submit (NULL, summary);
969 static struct tab_table *create_crosstab_table (struct crosstabs_proc *,
970 struct crosstabulation *);
971 static struct tab_table *create_chisq_table (struct crosstabs_proc *proc, struct crosstabulation *);
972 static struct tab_table *create_sym_table (struct crosstabs_proc *proc, struct crosstabulation *);
973 static struct tab_table *create_risk_table (struct crosstabs_proc *proc, struct crosstabulation *);
974 static struct tab_table *create_direct_table (struct crosstabs_proc *proc, struct crosstabulation *);
975 static void display_dimensions (struct crosstabs_proc *, struct crosstabulation *,
976 struct tab_table *, int first_difference);
977 static void display_crosstabulation (struct crosstabs_proc *,
978 struct crosstabulation *,
980 static void display_chisq (struct crosstabulation *, struct tab_table *,
981 bool *showed_fisher);
982 static void display_symmetric (struct crosstabs_proc *, struct crosstabulation *,
984 static void display_risk (struct crosstabulation *, struct tab_table *);
985 static void display_directional (struct crosstabs_proc *, struct crosstabulation *,
987 static void table_value_missing (struct crosstabs_proc *proc,
988 struct tab_table *table, int c, int r,
989 unsigned char opt, const union value *v,
990 const struct variable *var);
991 static void delete_missing (struct crosstabulation *);
992 static void build_matrix (struct crosstabulation *);
994 /* Output pivot table XT in the context of PROC. */
996 output_crosstabulation (struct crosstabs_proc *proc, struct crosstabulation *xt)
998 struct tab_table *table = NULL; /* Crosstabulation table. */
999 struct tab_table *chisq = NULL; /* Chi-square table. */
1000 bool showed_fisher = false;
1001 struct tab_table *sym = NULL; /* Symmetric measures table. */
1002 struct tab_table *risk = NULL; /* Risk estimate table. */
1003 struct tab_table *direct = NULL; /* Directional measures table. */
1006 enum_var_values (xt, COL_VAR, proc->descending);
1008 if (xt->vars[COL_VAR].n_values == 0)
1013 ds_init_cstr (&vars, var_to_string (xt->vars[0].var));
1014 for (i = 1; i < xt->n_vars; i++)
1015 ds_put_format (&vars, " * %s", var_to_string (xt->vars[i].var));
1017 /* TRANSLATORS: The %s here describes a crosstabulation. It takes the
1018 form "var1 * var2 * var3 * ...". */
1019 msg (SW, _("Crosstabulation %s contained no non-missing cases."),
1023 free_var_values (xt, COL_VAR);
1028 table = create_crosstab_table (proc, xt);
1029 if (proc->statistics & (1u << CRS_ST_CHISQ))
1030 chisq = create_chisq_table (proc, xt);
1031 if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)
1032 | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
1033 | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR)
1034 | (1u << CRS_ST_KAPPA)))
1035 sym = create_sym_table (proc, xt);
1036 if (proc->statistics & (1u << CRS_ST_RISK))
1037 risk = create_risk_table (proc, xt);
1038 if (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC)
1039 | (1u << CRS_ST_D) | (1u << CRS_ST_ETA)))
1040 direct = create_direct_table (proc, xt);
1043 while (find_crosstab (xt, &row0, &row1))
1045 struct crosstabulation x;
1046 int first_difference;
1048 make_crosstabulation_subset (xt, row0, row1, &x);
1050 /* Find all the row variable values. */
1051 enum_var_values (&x, ROW_VAR, proc->descending);
1053 size_t n_rows = x.vars[ROW_VAR].n_values;
1054 size_t n_cols = x.vars[COL_VAR].n_values;
1055 if (size_overflow_p (xtimes (xtimes (n_rows, n_cols), sizeof (double))))
1057 x.row_tot = xmalloc (n_rows * sizeof *x.row_tot);
1058 x.col_tot = xmalloc (n_cols * sizeof *x.col_tot);
1059 x.mat = xmalloc (n_rows * n_cols * sizeof *x.mat);
1061 /* Allocate table space for the matrix. */
1063 && tab_row (table) + (n_rows + 1) * proc->n_cells > tab_nr (table))
1064 tab_realloc (table, -1,
1065 MAX (tab_nr (table) + (n_rows + 1) * proc->n_cells,
1066 tab_nr (table) * xt->n_entries / x.n_entries));
1070 /* Find the first variable that differs from the last subtable. */
1071 first_difference = find_first_difference (xt, row0);
1074 display_dimensions (proc, &x, table, first_difference);
1075 display_crosstabulation (proc, &x, table);
1078 if (proc->exclude == MV_NEVER)
1079 delete_missing (&x);
1083 display_dimensions (proc, &x, chisq, first_difference);
1084 display_chisq (&x, chisq, &showed_fisher);
1088 display_dimensions (proc, &x, sym, first_difference);
1089 display_symmetric (proc, &x, sym);
1093 display_dimensions (proc, &x, risk, first_difference);
1094 display_risk (&x, risk);
1098 display_dimensions (proc, &x, direct, first_difference);
1099 display_directional (proc, &x, direct);
1102 /* Free the parts of x that are not owned by xt. In
1103 particular we must not free x.cols, which is the same as
1104 xt->cols, which is freed at the end of this function. */
1105 free_var_values (&x, ROW_VAR);
1112 submit (NULL, table);
1117 tab_resize (chisq, 4 + (xt->n_vars - 2), -1);
1123 submit (xt, direct);
1125 free_var_values (xt, COL_VAR);
1129 build_matrix (struct crosstabulation *x)
1131 const int col_var_width = var_get_width (x->vars[COL_VAR].var);
1132 const int row_var_width = var_get_width (x->vars[ROW_VAR].var);
1133 size_t n_rows = x->vars[ROW_VAR].n_values;
1134 size_t n_cols = x->vars[COL_VAR].n_values;
1141 for (p = x->entries; p < &x->entries[x->n_entries]; p++)
1143 const struct freq *te = *p;
1145 while (!value_equal (&x->vars[ROW_VAR].values[row],
1146 &te->values[ROW_VAR], row_var_width))
1148 for (; col < n_cols; col++)
1154 while (!value_equal (&x->vars[COL_VAR].values[col],
1155 &te->values[COL_VAR], col_var_width))
1162 if (++col >= n_cols)
1168 while (mp < &x->mat[n_cols * n_rows])
1170 assert (mp == &x->mat[n_cols * n_rows]);
1172 /* Column totals, row totals, ns_rows. */
1174 for (col = 0; col < n_cols; col++)
1175 x->col_tot[col] = 0.0;
1176 for (row = 0; row < n_rows; row++)
1177 x->row_tot[row] = 0.0;
1179 for (row = 0; row < n_rows; row++)
1181 bool row_is_empty = true;
1182 for (col = 0; col < n_cols; col++)
1186 row_is_empty = false;
1187 x->col_tot[col] += *mp;
1188 x->row_tot[row] += *mp;
1195 assert (mp == &x->mat[n_cols * n_rows]);
1199 for (col = 0; col < n_cols; col++)
1200 for (row = 0; row < n_rows; row++)
1201 if (x->mat[col + row * n_cols] != 0.0)
1209 for (col = 0; col < n_cols; col++)
1210 x->total += x->col_tot[col];
1213 static struct tab_table *
1214 create_crosstab_table (struct crosstabs_proc *proc, struct crosstabulation *xt)
1221 static const struct tuple names[] =
1223 {CRS_CL_COUNT, N_("count")},
1224 {CRS_CL_ROW, N_("row %")},
1225 {CRS_CL_COLUMN, N_("column %")},
1226 {CRS_CL_TOTAL, N_("total %")},
1227 {CRS_CL_EXPECTED, N_("expected")},
1228 {CRS_CL_RESIDUAL, N_("residual")},
1229 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1230 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1232 const int n_names = sizeof names / sizeof *names;
1233 const struct tuple *t;
1235 struct tab_table *table;
1236 struct string title;
1237 struct crosstabulation x;
1241 make_crosstabulation_subset (xt, 0, 0, &x);
1243 size_t n_cols = x.vars[COL_VAR].n_values;
1244 table = tab_create (x.n_consts + 1 + n_cols + 1,
1245 (x.n_entries / n_cols) * 3 / 2 * proc->n_cells + 10);
1246 tab_headers (table, x.n_consts + 1, 0, 2, 0);
1247 tab_set_format (table, RC_WEIGHT, &proc->weight_format);
1249 /* First header line. */
1250 tab_joint_text (table, x.n_consts + 1, 0,
1251 (x.n_consts + 1) + (n_cols - 1), 0,
1252 TAB_CENTER | TAT_TITLE, var_to_string (x.vars[COL_VAR].var));
1254 tab_hline (table, TAL_1, x.n_consts + 1,
1255 x.n_consts + 2 + n_cols - 2, 1);
1257 /* Second header line. */
1258 for (i = 2; i < x.n_consts + 2; i++)
1259 tab_joint_text (table, x.n_consts + 2 - i - 1, 0,
1260 x.n_consts + 2 - i - 1, 1,
1261 TAB_RIGHT | TAT_TITLE, var_to_string (x.vars[i].var));
1262 tab_text (table, x.n_consts + 2 - 2, 1, TAB_RIGHT | TAT_TITLE,
1263 var_to_string (x.vars[ROW_VAR].var));
1264 for (i = 0; i < n_cols; i++)
1265 table_value_missing (proc, table, x.n_consts + 2 + i - 1, 1, TAB_RIGHT,
1266 &x.vars[COL_VAR].values[i], x.vars[COL_VAR].var);
1267 tab_text (table, x.n_consts + 2 + n_cols - 1, 1, TAB_CENTER, _("Total"));
1269 tab_hline (table, TAL_1, 0, x.n_consts + 2 + n_cols - 1, 2);
1270 tab_vline (table, TAL_1, x.n_consts + 2 + n_cols - 1, 0, 1);
1273 ds_init_empty (&title);
1274 for (i = 0; i < x.n_consts + 2; i++)
1277 ds_put_cstr (&title, " * ");
1278 ds_put_cstr (&title, var_to_string (x.vars[i].var));
1280 for (i = 0; i < xt->n_consts; i++)
1282 const struct variable *var = xt->const_vars[i].var;
1285 ds_put_format (&title, ", %s=", var_to_string (var));
1287 /* Insert the formatted value of VAR without any leading spaces. */
1288 s = data_out (&xt->const_vars[i].values[0], var_get_encoding (var),
1289 var_get_print_format (var));
1290 ds_put_cstr (&title, s + strspn (s, " "));
1294 ds_put_cstr (&title, " [");
1296 for (t = names; t < &names[n_names]; t++)
1297 if (proc->cells & (1u << t->value))
1300 ds_put_cstr (&title, ", ");
1301 ds_put_cstr (&title, gettext (t->name));
1303 ds_put_cstr (&title, "].");
1305 tab_title (table, "%s", ds_cstr (&title));
1306 ds_destroy (&title);
1308 tab_offset (table, 0, 2);
1312 static struct tab_table *
1313 create_chisq_table (struct crosstabs_proc *proc, struct crosstabulation *xt)
1315 struct tab_table *chisq;
1317 size_t n_cols = xt->vars[COL_VAR].n_values;
1318 chisq = tab_create (6 + (xt->n_vars - 2),
1319 xt->n_entries / n_cols * 3 / 2 * N_CHISQ + 10);
1320 tab_headers (chisq, 1 + (xt->n_vars - 2), 0, 1, 0);
1321 tab_set_format (chisq, RC_WEIGHT, &proc->weight_format);
1323 tab_title (chisq, _("Chi-square tests."));
1325 tab_offset (chisq, xt->n_vars - 2, 0);
1326 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1327 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1328 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1329 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1330 _("Asymp. Sig. (2-tailed)"));
1331 tab_text_format (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1332 _("Exact Sig. (%d-tailed)"), 2);
1333 tab_text_format (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1334 _("Exact Sig. (%d-tailed)"), 1);
1335 tab_offset (chisq, 0, 1);
1340 /* Symmetric measures. */
1341 static struct tab_table *
1342 create_sym_table (struct crosstabs_proc *proc, struct crosstabulation *xt)
1344 struct tab_table *sym;
1346 size_t n_cols = xt->vars[COL_VAR].n_values;
1347 sym = tab_create (6 + (xt->n_vars - 2),
1348 xt->n_entries / n_cols * 7 + 10);
1350 tab_set_format (sym, RC_WEIGHT, &proc->weight_format);
1352 tab_headers (sym, 2 + (xt->n_vars - 2), 0, 1, 0);
1353 tab_title (sym, _("Symmetric measures."));
1355 tab_offset (sym, xt->n_vars - 2, 0);
1356 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1357 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1358 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1359 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1360 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1361 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1362 tab_offset (sym, 0, 1);
1367 /* Risk estimate. */
1368 static struct tab_table *
1369 create_risk_table (struct crosstabs_proc *proc, struct crosstabulation *xt)
1371 struct tab_table *risk;
1373 size_t n_cols = xt->vars[COL_VAR].n_values;
1374 risk = tab_create (4 + (xt->n_vars - 2), xt->n_entries / n_cols * 4 + 10);
1375 tab_headers (risk, 1 + xt->n_vars - 2, 0, 2, 0);
1376 tab_title (risk, _("Risk estimate."));
1377 tab_set_format (risk, RC_WEIGHT, &proc->weight_format);
1379 tab_offset (risk, xt->n_vars - 2, 0);
1380 tab_joint_text_format (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE,
1381 _("95%% Confidence Interval"));
1382 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1383 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1384 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1385 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1386 tab_hline (risk, TAL_1, 2, 3, 1);
1387 tab_vline (risk, TAL_1, 2, 0, 1);
1388 tab_offset (risk, 0, 2);
1393 /* Directional measures. */
1394 static struct tab_table *
1395 create_direct_table (struct crosstabs_proc *proc, struct crosstabulation *xt)
1397 struct tab_table *direct;
1399 size_t n_cols = xt->vars[COL_VAR].n_values;
1400 direct = tab_create (7 + (xt->n_vars - 2),
1401 xt->n_entries / n_cols * 7 + 10);
1402 tab_headers (direct, 3 + (xt->n_vars - 2), 0, 1, 0);
1403 tab_title (direct, _("Directional measures."));
1404 tab_set_format (direct, RC_WEIGHT, &proc->weight_format);
1406 tab_offset (direct, xt->n_vars - 2, 0);
1407 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1408 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1409 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1410 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1411 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1412 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1413 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1414 tab_offset (direct, 0, 1);
1420 /* Delete missing rows and columns for statistical analysis when
1423 delete_missing (struct crosstabulation *xt)
1425 size_t n_rows = xt->vars[ROW_VAR].n_values;
1426 size_t n_cols = xt->vars[COL_VAR].n_values;
1429 for (r = 0; r < n_rows; r++)
1430 if (var_is_num_missing (xt->vars[ROW_VAR].var,
1431 xt->vars[ROW_VAR].values[r].f, MV_USER))
1433 for (c = 0; c < n_cols; c++)
1434 xt->mat[c + r * n_cols] = 0.;
1439 for (c = 0; c < n_cols; c++)
1440 if (var_is_num_missing (xt->vars[COL_VAR].var,
1441 xt->vars[COL_VAR].values[c].f, MV_USER))
1443 for (r = 0; r < n_rows; r++)
1444 xt->mat[c + r * n_cols] = 0.;
1449 /* Prepare table T for submission, and submit it. */
1451 submit (struct crosstabulation *xt, struct tab_table *t)
1458 tab_resize (t, -1, 0);
1459 if (tab_nr (t) == tab_t (t))
1461 table_unref (&t->table);
1464 tab_offset (t, 0, 0);
1466 for (i = 2; i < xt->n_vars; i++)
1467 tab_text (t, xt->n_vars - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1468 var_to_string (xt->vars[i].var));
1469 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1470 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1472 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1478 find_crosstab (struct crosstabulation *xt, size_t *row0p, size_t *row1p)
1480 size_t row0 = *row1p;
1483 if (row0 >= xt->n_entries)
1486 for (row1 = row0 + 1; row1 < xt->n_entries; row1++)
1488 struct freq *a = xt->entries[row0];
1489 struct freq *b = xt->entries[row1];
1490 if (compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars) != 0)
1498 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1499 result. WIDTH_ points to an int which is either 0 for a
1500 numeric value or a string width for a string value. */
1502 compare_value_3way (const void *a_, const void *b_, const void *width_)
1504 const union value *a = a_;
1505 const union value *b = b_;
1506 const int *width = width_;
1508 return value_compare_3way (a, b, *width);
1511 /* Inverted version of the above */
1513 compare_value_3way_inv (const void *a_, const void *b_, const void *width_)
1515 return -compare_value_3way (a_, b_, width_);
1519 /* Given an array of ENTRY_CNT table_entry structures starting at
1520 ENTRIES, creates a sorted list of the values that the variable
1521 with index VAR_IDX takes on. The values are returned as a
1522 malloc()'d array stored in *VALUES, with the number of values
1523 stored in *VALUE_CNT.
1525 The caller must eventually free *VALUES, but each pointer in *VALUES points
1526 to existing data not owned by *VALUES itself. */
1528 enum_var_values (const struct crosstabulation *xt, int var_idx,
1531 struct xtab_var *xv = &xt->vars[var_idx];
1532 const struct var_range *range = get_var_range (xt->proc, xv->var);
1536 xv->values = xnmalloc (range->count, sizeof *xv->values);
1537 xv->n_values = range->count;
1538 for (size_t i = 0; i < range->count; i++)
1539 xv->values[i].f = range->min + i;
1543 int width = var_get_width (xv->var);
1544 struct hmapx_node *node;
1545 const union value *iter;
1549 for (size_t i = 0; i < xt->n_entries; i++)
1551 const struct freq *te = xt->entries[i];
1552 const union value *value = &te->values[var_idx];
1553 size_t hash = value_hash (value, width, 0);
1555 HMAPX_FOR_EACH_WITH_HASH (iter, node, hash, &set)
1556 if (value_equal (iter, value, width))
1559 hmapx_insert (&set, (union value *) value, hash);
1564 xv->n_values = hmapx_count (&set);
1565 xv->values = xnmalloc (xv->n_values, sizeof *xv->values);
1567 HMAPX_FOR_EACH (iter, node, &set)
1568 xv->values[i++] = *iter;
1569 hmapx_destroy (&set);
1571 sort (xv->values, xv->n_values, sizeof *xv->values,
1572 descending ? compare_value_3way_inv : compare_value_3way,
1578 free_var_values (const struct crosstabulation *xt, int var_idx)
1580 struct xtab_var *xv = &xt->vars[var_idx];
1586 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1587 from V, displayed with print format spec from variable VAR. When
1588 in REPORT missing-value mode, missing values have an M appended. */
1590 table_value_missing (struct crosstabs_proc *proc,
1591 struct tab_table *table, int c, int r, unsigned char opt,
1592 const union value *v, const struct variable *var)
1594 const char *label = var_lookup_value_label (var, v);
1596 tab_text (table, c, r, TAB_LEFT, label);
1599 const struct fmt_spec *print = var_get_print_format (var);
1600 if (proc->exclude == MV_NEVER && var_is_value_missing (var, v, MV_USER))
1602 char *s = data_out (v, dict_get_encoding (proc->dict), print);
1603 tab_text_format (table, c, r, opt, "%sM", s + strspn (s, " "));
1607 tab_value (table, c, r, opt, v, var, print);
1611 /* Draws a line across TABLE at the current row to indicate the most
1612 major dimension variable with index FIRST_DIFFERENCE out of N_VARS
1613 that changed, and puts the values that changed into the table. TB
1614 and XT must be the corresponding table_entry and crosstab,
1617 display_dimensions (struct crosstabs_proc *proc, struct crosstabulation *xt,
1618 struct tab_table *table, int first_difference)
1620 tab_hline (table, TAL_1, xt->n_consts + xt->n_vars - first_difference - 1, tab_nc (table) - 1, 0);
1622 for (; first_difference >= 2; first_difference--)
1623 table_value_missing (proc, table, xt->n_consts + xt->n_vars - first_difference - 1, 0,
1624 TAB_RIGHT, &xt->entries[0]->values[first_difference],
1625 xt->vars[first_difference].var);
1628 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1629 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1630 additionally suffixed with a letter `M'. */
1632 format_cell_entry (struct tab_table *table, int c, int r, double value,
1633 char suffix, bool mark_missing, const struct dictionary *dict)
1641 s = data_out (&v, dict_get_encoding (dict), settings_get_format ());
1645 suffixes[suffix_len++] = suffix;
1647 suffixes[suffix_len++] = 'M';
1648 suffixes[suffix_len] = '\0';
1650 tab_text_format (table, c, r, TAB_RIGHT, "%s%s",
1651 s + strspn (s, " "), suffixes);
1656 /* Displays the crosstabulation table. */
1658 display_crosstabulation (struct crosstabs_proc *proc,
1659 struct crosstabulation *xt, struct tab_table *table)
1661 size_t n_rows = xt->vars[ROW_VAR].n_values;
1662 size_t n_cols = xt->vars[COL_VAR].n_values;
1667 for (r = 0; r < n_rows; r++)
1668 table_value_missing (proc, table, xt->n_consts + xt->n_vars - 2,
1669 r * proc->n_cells, TAB_RIGHT,
1670 &xt->vars[ROW_VAR].values[r],
1671 xt->vars[ROW_VAR].var);
1673 tab_text (table, xt->n_vars - 2, n_rows * proc->n_cells,
1674 TAB_LEFT, _("Total"));
1676 /* Put in the actual cells. */
1678 tab_offset (table, xt->n_consts + xt->n_vars - 1, -1);
1679 for (r = 0; r < n_rows; r++)
1681 if (proc->n_cells > 1)
1682 tab_hline (table, TAL_1, -1, n_cols, 0);
1683 for (c = 0; c < n_cols; c++)
1685 bool mark_missing = false;
1686 double expected_value = xt->row_tot[r] * xt->col_tot[c] / xt->total;
1687 if (proc->exclude == MV_NEVER
1688 && (var_is_num_missing (xt->vars[COL_VAR].var,
1689 xt->vars[COL_VAR].values[c].f, MV_USER)
1690 || var_is_num_missing (xt->vars[ROW_VAR].var,
1691 xt->vars[ROW_VAR].values[r].f,
1693 mark_missing = true;
1694 for (i = 0; i < proc->n_cells; i++)
1699 switch (proc->a_cells[i])
1705 v = *mp / xt->row_tot[r] * 100.;
1709 v = *mp / xt->col_tot[c] * 100.;
1713 v = *mp / xt->total * 100.;
1716 case CRS_CL_EXPECTED:
1719 case CRS_CL_RESIDUAL:
1720 v = *mp - expected_value;
1722 case CRS_CL_SRESIDUAL:
1723 v = (*mp - expected_value) / sqrt (expected_value);
1725 case CRS_CL_ASRESIDUAL:
1726 v = ((*mp - expected_value)
1727 / sqrt (expected_value
1728 * (1. - xt->row_tot[r] / xt->total)
1729 * (1. - xt->col_tot[c] / xt->total)));
1734 format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict);
1740 tab_offset (table, -1, tab_row (table) + proc->n_cells);
1744 tab_offset (table, -1, tab_row (table) - proc->n_cells * n_rows);
1745 for (r = 0; r < n_rows; r++)
1747 bool mark_missing = false;
1749 if (proc->exclude == MV_NEVER
1750 && var_is_num_missing (xt->vars[ROW_VAR].var,
1751 xt->vars[ROW_VAR].values[r].f, MV_USER))
1752 mark_missing = true;
1754 for (i = 0; i < proc->n_cells; i++)
1759 switch (proc->a_cells[i])
1769 v = xt->row_tot[r] / xt->total * 100.;
1773 v = xt->row_tot[r] / xt->total * 100.;
1776 case CRS_CL_EXPECTED:
1777 case CRS_CL_RESIDUAL:
1778 case CRS_CL_SRESIDUAL:
1779 case CRS_CL_ASRESIDUAL:
1786 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing, proc->dict);
1787 tab_next_row (table);
1791 /* Column totals, grand total. */
1793 if (proc->n_cells > 1)
1794 tab_hline (table, TAL_1, -1, n_cols, 0);
1795 for (c = 0; c <= n_cols; c++)
1797 double ct = c < n_cols ? xt->col_tot[c] : xt->total;
1798 bool mark_missing = false;
1801 if (proc->exclude == MV_NEVER && c < n_cols
1802 && var_is_num_missing (xt->vars[COL_VAR].var,
1803 xt->vars[COL_VAR].values[c].f, MV_USER))
1804 mark_missing = true;
1806 for (i = 0; i < proc->n_cells; i++)
1811 switch (proc->a_cells[i])
1817 v = ct / xt->total * 100.;
1825 v = ct / xt->total * 100.;
1828 case CRS_CL_EXPECTED:
1829 case CRS_CL_RESIDUAL:
1830 case CRS_CL_SRESIDUAL:
1831 case CRS_CL_ASRESIDUAL:
1837 format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict);
1842 tab_offset (table, -1, tab_row (table) + last_row);
1843 tab_offset (table, 0, -1);
1846 static void calc_r (struct crosstabulation *,
1847 double *XT, double *Y, double *, double *, double *);
1848 static void calc_chisq (struct crosstabulation *,
1849 double[N_CHISQ], int[N_CHISQ], double *, double *);
1851 /* Display chi-square statistics. */
1853 display_chisq (struct crosstabulation *xt, struct tab_table *chisq,
1854 bool *showed_fisher)
1856 static const char *chisq_stats[N_CHISQ] =
1858 N_("Pearson Chi-Square"),
1859 N_("Likelihood Ratio"),
1860 N_("Fisher's Exact Test"),
1861 N_("Continuity Correction"),
1862 N_("Linear-by-Linear Association"),
1864 double chisq_v[N_CHISQ];
1865 double fisher1, fisher2;
1870 calc_chisq (xt, chisq_v, df, &fisher1, &fisher2);
1872 tab_offset (chisq, xt->n_consts + xt->n_vars - 2, -1);
1874 for (i = 0; i < N_CHISQ; i++)
1876 if ((i != 2 && chisq_v[i] == SYSMIS)
1877 || (i == 2 && fisher1 == SYSMIS))
1880 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1883 tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL, RC_OTHER);
1884 tab_double (chisq, 2, 0, TAB_RIGHT, df[i], NULL, RC_WEIGHT);
1885 tab_double (chisq, 3, 0, TAB_RIGHT,
1886 gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL, RC_PVALUE);
1890 *showed_fisher = true;
1891 tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL, RC_PVALUE);
1892 tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL, RC_PVALUE);
1894 tab_next_row (chisq);
1897 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1898 tab_double (chisq, 1, 0, TAB_RIGHT, xt->total, NULL, RC_WEIGHT);
1899 tab_next_row (chisq);
1901 tab_offset (chisq, 0, -1);
1904 static int calc_symmetric (struct crosstabs_proc *, struct crosstabulation *,
1905 double[N_SYMMETRIC], double[N_SYMMETRIC],
1906 double[N_SYMMETRIC],
1907 double[3], double[3], double[3]);
1909 /* Display symmetric measures. */
1911 display_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt,
1912 struct tab_table *sym)
1914 static const char *categories[] =
1916 N_("Nominal by Nominal"),
1917 N_("Ordinal by Ordinal"),
1918 N_("Interval by Interval"),
1919 N_("Measure of Agreement"),
1922 static const char *stats[N_SYMMETRIC] =
1926 N_("Contingency Coefficient"),
1927 N_("Kendall's tau-b"),
1928 N_("Kendall's tau-c"),
1930 N_("Spearman Correlation"),
1935 static const int stats_categories[N_SYMMETRIC] =
1937 0, 0, 0, 1, 1, 1, 1, 2, 3,
1941 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
1942 double somers_d_v[3], somers_d_ase[3], somers_d_t[3];
1945 if (!calc_symmetric (proc, xt, sym_v, sym_ase, sym_t,
1946 somers_d_v, somers_d_ase, somers_d_t))
1949 tab_offset (sym, xt->n_consts + xt->n_vars - 2, -1);
1951 for (i = 0; i < N_SYMMETRIC; i++)
1953 if (sym_v[i] == SYSMIS)
1956 if (stats_categories[i] != last_cat)
1958 last_cat = stats_categories[i];
1959 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
1962 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
1963 tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL, RC_OTHER);
1964 if (sym_ase[i] != SYSMIS)
1965 tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL, RC_OTHER);
1966 if (sym_t[i] != SYSMIS)
1967 tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL, RC_OTHER);
1968 /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL, RC_PVALUE);*/
1972 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1973 tab_double (sym, 2, 0, TAB_RIGHT, xt->total, NULL, RC_WEIGHT);
1976 tab_offset (sym, 0, -1);
1979 static int calc_risk (struct crosstabulation *,
1980 double[], double[], double[], union value *);
1982 /* Display risk estimate. */
1984 display_risk (struct crosstabulation *xt, struct tab_table *risk)
1987 double risk_v[3], lower[3], upper[3];
1991 if (!calc_risk (xt, risk_v, upper, lower, c))
1994 tab_offset (risk, xt->n_consts + xt->n_vars - 2, -1);
1996 for (i = 0; i < 3; i++)
1998 const struct variable *cv = xt->vars[COL_VAR].var;
1999 const struct variable *rv = xt->vars[ROW_VAR].var;
2000 int cvw = var_get_width (cv);
2001 int rvw = var_get_width (rv);
2003 if (risk_v[i] == SYSMIS)
2009 if (var_is_numeric (cv))
2010 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2011 var_to_string (cv), c[0].f, c[1].f);
2013 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2015 cvw, value_str (&c[0], cvw),
2016 cvw, value_str (&c[1], cvw));
2020 if (var_is_numeric (rv))
2021 sprintf (buf, _("For cohort %s = %.*g"),
2022 var_to_string (rv), DBL_DIG + 1,
2023 xt->vars[ROW_VAR].values[i - 1].f);
2025 sprintf (buf, _("For cohort %s = %.*s"),
2027 rvw, value_str (&xt->vars[ROW_VAR].values[i - 1], rvw));
2031 tab_text (risk, 0, 0, TAB_LEFT, buf);
2032 tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL, RC_OTHER);
2033 tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL, RC_OTHER);
2034 tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL, RC_OTHER);
2035 tab_next_row (risk);
2038 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2039 tab_double (risk, 1, 0, TAB_RIGHT, xt->total, NULL, RC_WEIGHT);
2040 tab_next_row (risk);
2042 tab_offset (risk, 0, -1);
2045 static int calc_directional (struct crosstabs_proc *, struct crosstabulation *,
2046 double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2047 double[N_DIRECTIONAL], double[N_DIRECTIONAL]);
2049 /* Display directional measures. */
2051 display_directional (struct crosstabs_proc *proc, struct crosstabulation *xt,
2052 struct tab_table *direct)
2054 static const char *categories[] =
2056 N_("Nominal by Nominal"),
2057 N_("Ordinal by Ordinal"),
2058 N_("Nominal by Interval"),
2061 static const char *stats[] =
2064 N_("Goodman and Kruskal tau"),
2065 N_("Uncertainty Coefficient"),
2070 static const char *types[] =
2077 static const int stats_categories[N_DIRECTIONAL] =
2079 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2082 static const int stats_stats[N_DIRECTIONAL] =
2084 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2087 static const int stats_types[N_DIRECTIONAL] =
2089 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2092 static const int *stats_lookup[] =
2099 static const char **stats_names[] =
2111 double direct_v[N_DIRECTIONAL];
2112 double direct_ase[N_DIRECTIONAL];
2113 double direct_t[N_DIRECTIONAL];
2114 double sig[N_DIRECTIONAL];
2118 if (!calc_directional (proc, xt, direct_v, direct_ase, direct_t, sig))
2121 tab_offset (direct, xt->n_consts + xt->n_vars - 2, -1);
2123 for (i = 0; i < N_DIRECTIONAL; i++)
2125 if (direct_v[i] == SYSMIS)
2131 for (j = 0; j < 3; j++)
2132 if (last[j] != stats_lookup[j][i])
2135 tab_hline (direct, TAL_1, j, 6, 0);
2140 int k = last[j] = stats_lookup[j][i];
2145 string = var_to_string (xt->vars[0].var);
2147 string = var_to_string (xt->vars[1].var);
2149 tab_text_format (direct, j, 0, TAB_LEFT,
2150 gettext (stats_names[j][k]), string);
2155 tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL, RC_OTHER);
2156 if (direct_ase[i] != SYSMIS)
2157 tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL, RC_OTHER);
2158 if (direct_t[i] != SYSMIS)
2159 tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL, RC_OTHER);
2160 tab_double (direct, 6, 0, TAB_RIGHT, sig[i], NULL, RC_PVALUE);
2161 tab_next_row (direct);
2164 tab_offset (direct, 0, -1);
2167 /* Statistical calculations. */
2169 /* Returns the value of the logarithm of gamma (factorial) function for an integer
2172 log_gamma_int (double xt)
2177 for (i = 2; i < xt; i++)
2183 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2185 static inline double
2186 Pr (int a, int b, int c, int d)
2188 return exp (log_gamma_int (a + b + 1.) - log_gamma_int (a + 1.)
2189 + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.)
2190 + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.)
2191 + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.)
2192 - log_gamma_int (a + b + c + d + 1.));
2195 /* Swap the contents of A and B. */
2197 swap (int *a, int *b)
2204 /* Calculate significance for Fisher's exact test as specified in
2205 _SPSS Statistical Algorithms_, Appendix 5. */
2207 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2212 if (MIN (c, d) < MIN (a, b))
2213 swap (&a, &c), swap (&b, &d);
2214 if (MIN (b, d) < MIN (a, c))
2215 swap (&a, &b), swap (&c, &d);
2219 swap (&a, &b), swap (&c, &d);
2221 swap (&a, &c), swap (&b, &d);
2224 pn1 = Pr (a, b, c, d);
2226 for (xt = 1; xt <= a; xt++)
2228 *fisher1 += Pr (a - xt, b + xt, c + xt, d - xt);
2231 *fisher2 = *fisher1;
2233 for (xt = 1; xt <= b; xt++)
2235 double p = Pr (a + xt, b - xt, c - xt, d + xt);
2241 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2242 columns with values COLS and N_ROWS rows with values ROWS. Values
2243 in the matrix sum to xt->total. */
2245 calc_chisq (struct crosstabulation *xt,
2246 double chisq[N_CHISQ], int df[N_CHISQ],
2247 double *fisher1, double *fisher2)
2251 chisq[0] = chisq[1] = 0.;
2252 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2253 *fisher1 = *fisher2 = SYSMIS;
2255 df[0] = df[1] = (xt->ns_cols - 1) * (xt->ns_rows - 1);
2257 if (xt->ns_rows <= 1 || xt->ns_cols <= 1)
2259 chisq[0] = chisq[1] = SYSMIS;
2263 size_t n_rows = xt->vars[ROW_VAR].n_values;
2264 size_t n_cols = xt->vars[COL_VAR].n_values;
2265 for (r = 0; r < n_rows; r++)
2266 for (c = 0; c < n_cols; c++)
2268 const double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total;
2269 const double freq = xt->mat[n_cols * r + c];
2270 const double residual = freq - expected;
2272 chisq[0] += residual * residual / expected;
2274 chisq[1] += freq * log (expected / freq);
2285 /* Calculate Yates and Fisher exact test. */
2286 if (xt->ns_cols == 2 && xt->ns_rows == 2)
2288 double f11, f12, f21, f22;
2294 for (i = j = 0; i < n_cols; i++)
2295 if (xt->col_tot[i] != 0.)
2304 f11 = xt->mat[nz_cols[0]];
2305 f12 = xt->mat[nz_cols[1]];
2306 f21 = xt->mat[nz_cols[0] + n_cols];
2307 f22 = xt->mat[nz_cols[1] + n_cols];
2312 const double xt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * xt->total;
2315 chisq[3] = (xt->total * pow2 (xt_)
2316 / (f11 + f12) / (f21 + f22)
2317 / (f11 + f21) / (f12 + f22));
2325 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2328 /* Calculate Mantel-Haenszel. */
2329 if (var_is_numeric (xt->vars[ROW_VAR].var)
2330 && var_is_numeric (xt->vars[COL_VAR].var))
2332 double r, ase_0, ase_1;
2333 calc_r (xt, (double *) xt->vars[ROW_VAR].values,
2334 (double *) xt->vars[COL_VAR].values,
2335 &r, &ase_0, &ase_1);
2337 chisq[4] = (xt->total - 1.) * r * r;
2342 /* Calculate the value of Pearson's r. r is stored into R, its T value into
2343 T, and standard error into ERROR. The row and column values must be
2344 passed in XT and Y. */
2346 calc_r (struct crosstabulation *xt,
2347 double *XT, double *Y, double *r, double *t, double *error)
2349 size_t n_rows = xt->vars[ROW_VAR].n_values;
2350 size_t n_cols = xt->vars[COL_VAR].n_values;
2351 double SX, SY, S, T;
2353 double sum_XYf, sum_X2Y2f;
2354 double sum_Xr, sum_X2r;
2355 double sum_Yc, sum_Y2c;
2358 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2359 for (j = 0; j < n_cols; j++)
2361 double fij = xt->mat[j + i * n_cols];
2362 double product = XT[i] * Y[j];
2363 double temp = fij * product;
2365 sum_X2Y2f += temp * product;
2368 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2370 sum_Xr += XT[i] * xt->row_tot[i];
2371 sum_X2r += pow2 (XT[i]) * xt->row_tot[i];
2373 Xbar = sum_Xr / xt->total;
2375 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2377 sum_Yc += Y[i] * xt->col_tot[i];
2378 sum_Y2c += Y[i] * Y[i] * xt->col_tot[i];
2380 Ybar = sum_Yc / xt->total;
2382 S = sum_XYf - sum_Xr * sum_Yc / xt->total;
2383 SX = sum_X2r - pow2 (sum_Xr) / xt->total;
2384 SY = sum_Y2c - pow2 (sum_Yc) / xt->total;
2387 *t = *r / sqrt (1 - pow2 (*r)) * sqrt (xt->total - 2);
2392 for (s = c = 0., i = 0; i < n_rows; i++)
2393 for (j = 0; j < n_cols; j++)
2395 double Xresid, Yresid;
2398 Xresid = XT[i] - Xbar;
2399 Yresid = Y[j] - Ybar;
2400 temp = (T * Xresid * Yresid
2402 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2403 y = xt->mat[j + i * n_cols] * temp * temp - c;
2408 *error = sqrt (s) / (T * T);
2412 /* Calculate symmetric statistics and their asymptotic standard
2413 errors. Returns 0 if none could be calculated. */
2415 calc_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt,
2416 double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2417 double t[N_SYMMETRIC],
2418 double somers_d_v[3], double somers_d_ase[3],
2419 double somers_d_t[3])
2421 size_t n_rows = xt->vars[ROW_VAR].n_values;
2422 size_t n_cols = xt->vars[COL_VAR].n_values;
2425 q = MIN (xt->ns_rows, xt->ns_cols);
2429 for (i = 0; i < N_SYMMETRIC; i++)
2430 v[i] = ase[i] = t[i] = SYSMIS;
2432 /* Phi, Cramer's V, contingency coefficient. */
2433 if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)))
2435 double Xp = 0.; /* Pearson chi-square. */
2438 for (r = 0; r < n_rows; r++)
2439 for (c = 0; c < n_cols; c++)
2441 const double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total;
2442 const double freq = xt->mat[n_cols * r + c];
2443 const double residual = freq - expected;
2445 Xp += residual * residual / expected;
2448 if (proc->statistics & (1u << CRS_ST_PHI))
2450 v[0] = sqrt (Xp / xt->total);
2451 v[1] = sqrt (Xp / (xt->total * (q - 1)));
2453 if (proc->statistics & (1u << CRS_ST_CC))
2454 v[2] = sqrt (Xp / (Xp + xt->total));
2457 if (proc->statistics & ((1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
2458 | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_D)))
2463 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2467 Dr = Dc = pow2 (xt->total);
2468 for (r = 0; r < n_rows; r++)
2469 Dr -= pow2 (xt->row_tot[r]);
2470 for (c = 0; c < n_cols; c++)
2471 Dc -= pow2 (xt->col_tot[c]);
2473 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2474 for (c = 0; c < n_cols; c++)
2478 for (r = 0; r < n_rows; r++)
2479 cum[c + r * n_cols] = ct += xt->mat[c + r * n_cols];
2488 for (i = 0; i < n_rows; i++)
2492 for (j = 1; j < n_cols; j++)
2493 Cij += xt->col_tot[j] - cum[j + i * n_cols];
2496 for (j = 1; j < n_cols; j++)
2497 Dij += cum[j + (i - 1) * n_cols];
2501 double fij = xt->mat[j + i * n_cols];
2507 assert (j < n_cols);
2509 Cij -= xt->col_tot[j] - cum[j + i * n_cols];
2510 Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols];
2514 Cij += cum[j - 1 + (i - 1) * n_cols];
2515 Dij -= cum[j + (i - 1) * n_cols];
2521 if (proc->statistics & (1u << CRS_ST_BTAU))
2522 v[3] = (P - Q) / sqrt (Dr * Dc);
2523 if (proc->statistics & (1u << CRS_ST_CTAU))
2524 v[4] = (q * (P - Q)) / (pow2 (xt->total) * (q - 1));
2525 if (proc->statistics & (1u << CRS_ST_GAMMA))
2526 v[5] = (P - Q) / (P + Q);
2528 /* ASE for tau-b, tau-c, gamma. Calculations could be
2529 eliminated here, at expense of memory. */
2534 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2535 for (i = 0; i < n_rows; i++)
2539 for (j = 1; j < n_cols; j++)
2540 Cij += xt->col_tot[j] - cum[j + i * n_cols];
2543 for (j = 1; j < n_cols; j++)
2544 Dij += cum[j + (i - 1) * n_cols];
2548 double fij = xt->mat[j + i * n_cols];
2550 if (proc->statistics & (1u << CRS_ST_BTAU))
2552 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2553 + v[3] * (xt->row_tot[i] * Dc
2554 + xt->col_tot[j] * Dr));
2555 btau_cum += fij * temp * temp;
2559 const double temp = Cij - Dij;
2560 ctau_cum += fij * temp * temp;
2563 if (proc->statistics & (1u << CRS_ST_GAMMA))
2565 const double temp = Q * Cij - P * Dij;
2566 gamma_cum += fij * temp * temp;
2569 if (proc->statistics & (1u << CRS_ST_D))
2571 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2572 - (P - Q) * (xt->total - xt->row_tot[i]));
2573 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2574 - (Q - P) * (xt->total - xt->col_tot[j]));
2579 assert (j < n_cols);
2581 Cij -= xt->col_tot[j] - cum[j + i * n_cols];
2582 Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols];
2586 Cij += cum[j - 1 + (i - 1) * n_cols];
2587 Dij -= cum[j + (i - 1) * n_cols];
2593 btau_var = ((btau_cum
2594 - (xt->total * pow2 (xt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2596 if (proc->statistics & (1u << CRS_ST_BTAU))
2598 ase[3] = sqrt (btau_var);
2599 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / xt->total)
2602 if (proc->statistics & (1u << CRS_ST_CTAU))
2604 ase[4] = ((2 * q / ((q - 1) * pow2 (xt->total)))
2605 * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total));
2606 t[4] = v[4] / ase[4];
2608 if (proc->statistics & (1u << CRS_ST_GAMMA))
2610 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2611 t[5] = v[5] / (2. / (P + Q)
2612 * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total));
2614 if (proc->statistics & (1u << CRS_ST_D))
2616 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2617 somers_d_ase[0] = SYSMIS;
2618 somers_d_t[0] = (somers_d_v[0]
2620 * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2621 somers_d_v[1] = (P - Q) / Dc;
2622 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2623 somers_d_t[1] = (somers_d_v[1]
2625 * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2626 somers_d_v[2] = (P - Q) / Dr;
2627 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2628 somers_d_t[2] = (somers_d_v[2]
2630 * sqrt (ctau_cum - pow2 (P - Q) / xt->total)));
2636 /* Spearman correlation, Pearson's r. */
2637 if (proc->statistics & (1u << CRS_ST_CORR))
2639 double *R = xmalloc (sizeof *R * n_rows);
2640 double *C = xmalloc (sizeof *C * n_cols);
2643 double y, t, c = 0., s = 0.;
2648 R[i] = s + (xt->row_tot[i] + 1.) / 2.;
2649 y = xt->row_tot[i] - c;
2655 assert (i < n_rows);
2660 double y, t, c = 0., s = 0.;
2665 C[j] = s + (xt->col_tot[j] + 1.) / 2;
2666 y = xt->col_tot[j] - c;
2672 assert (j < n_cols);
2676 calc_r (xt, R, C, &v[6], &t[6], &ase[6]);
2681 calc_r (xt, (double *) xt->vars[ROW_VAR].values,
2682 (double *) xt->vars[COL_VAR].values,
2683 &v[7], &t[7], &ase[7]);
2686 /* Cohen's kappa. */
2687 if (proc->statistics & (1u << CRS_ST_KAPPA) && xt->ns_rows == xt->ns_cols)
2689 double ase_under_h0;
2690 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2693 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2694 i < xt->ns_rows; i++, j++)
2698 while (xt->col_tot[j] == 0.)
2701 prod = xt->row_tot[i] * xt->col_tot[j];
2702 sum = xt->row_tot[i] + xt->col_tot[j];
2704 sum_fii += xt->mat[j + i * n_cols];
2706 sum_fiiri_ci += xt->mat[j + i * n_cols] * sum;
2707 sum_riciri_ci += prod * sum;
2709 for (sum_fijri_ci2 = 0., i = 0; i < xt->ns_rows; i++)
2710 for (j = 0; j < xt->ns_cols; j++)
2712 double sum = xt->row_tot[i] + xt->col_tot[j];
2713 sum_fijri_ci2 += xt->mat[j + i * n_cols] * sum * sum;
2716 v[8] = (xt->total * sum_fii - sum_rici) / (pow2 (xt->total) - sum_rici);
2718 ase_under_h0 = sqrt ((pow2 (xt->total) * sum_rici
2719 + sum_rici * sum_rici
2720 - xt->total * sum_riciri_ci)
2721 / (xt->total * (pow2 (xt->total) - sum_rici) * (pow2 (xt->total) - sum_rici)));
2723 ase[8] = sqrt (xt->total * (((sum_fii * (xt->total - sum_fii))
2724 / pow2 (pow2 (xt->total) - sum_rici))
2725 + ((2. * (xt->total - sum_fii)
2726 * (2. * sum_fii * sum_rici
2727 - xt->total * sum_fiiri_ci))
2728 / pow3 (pow2 (xt->total) - sum_rici))
2729 + (pow2 (xt->total - sum_fii)
2730 * (xt->total * sum_fijri_ci2 - 4.
2731 * sum_rici * sum_rici)
2732 / pow4 (pow2 (xt->total) - sum_rici))));
2734 t[8] = v[8] / ase_under_h0;
2740 /* Calculate risk estimate. */
2742 calc_risk (struct crosstabulation *xt,
2743 double *value, double *upper, double *lower, union value *c)
2745 size_t n_cols = xt->vars[COL_VAR].n_values;
2746 double f11, f12, f21, f22;
2752 for (i = 0; i < 3; i++)
2753 value[i] = upper[i] = lower[i] = SYSMIS;
2756 if (xt->ns_rows != 2 || xt->ns_cols != 2)
2763 for (i = j = 0; i < n_cols; i++)
2764 if (xt->col_tot[i] != 0.)
2773 f11 = xt->mat[nz_cols[0]];
2774 f12 = xt->mat[nz_cols[1]];
2775 f21 = xt->mat[nz_cols[0] + n_cols];
2776 f22 = xt->mat[nz_cols[1] + n_cols];
2778 c[0] = xt->vars[COL_VAR].values[nz_cols[0]];
2779 c[1] = xt->vars[COL_VAR].values[nz_cols[1]];
2782 value[0] = (f11 * f22) / (f12 * f21);
2783 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2784 lower[0] = value[0] * exp (-1.960 * v);
2785 upper[0] = value[0] * exp (1.960 * v);
2787 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2788 v = sqrt ((f12 / (f11 * (f11 + f12)))
2789 + (f22 / (f21 * (f21 + f22))));
2790 lower[1] = value[1] * exp (-1.960 * v);
2791 upper[1] = value[1] * exp (1.960 * v);
2793 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2794 v = sqrt ((f11 / (f12 * (f11 + f12)))
2795 + (f21 / (f22 * (f21 + f22))));
2796 lower[2] = value[2] * exp (-1.960 * v);
2797 upper[2] = value[2] * exp (1.960 * v);
2802 /* Calculate directional measures. */
2804 calc_directional (struct crosstabs_proc *proc, struct crosstabulation *xt,
2805 double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2806 double t[N_DIRECTIONAL], double sig[N_DIRECTIONAL])
2808 size_t n_rows = xt->vars[ROW_VAR].n_values;
2809 size_t n_cols = xt->vars[COL_VAR].n_values;
2813 for (i = 0; i < N_DIRECTIONAL; i++)
2814 v[i] = ase[i] = t[i] = sig[i] = SYSMIS;
2818 if (proc->statistics & (1u << CRS_ST_LAMBDA))
2820 double *fim = xnmalloc (n_rows, sizeof *fim);
2821 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2822 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2823 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2824 double sum_fim, sum_fmj;
2826 int rm_index, cm_index;
2829 /* Find maximum for each row and their sum. */
2830 for (sum_fim = 0., i = 0; i < n_rows; i++)
2832 double max = xt->mat[i * n_cols];
2835 for (j = 1; j < n_cols; j++)
2836 if (xt->mat[j + i * n_cols] > max)
2838 max = xt->mat[j + i * n_cols];
2842 sum_fim += fim[i] = max;
2843 fim_index[i] = index;
2846 /* Find maximum for each column. */
2847 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2849 double max = xt->mat[j];
2852 for (i = 1; i < n_rows; i++)
2853 if (xt->mat[j + i * n_cols] > max)
2855 max = xt->mat[j + i * n_cols];
2859 sum_fmj += fmj[j] = max;
2860 fmj_index[j] = index;
2863 /* Find maximum row total. */
2864 rm = xt->row_tot[0];
2866 for (i = 1; i < n_rows; i++)
2867 if (xt->row_tot[i] > rm)
2869 rm = xt->row_tot[i];
2873 /* Find maximum column total. */
2874 cm = xt->col_tot[0];
2876 for (j = 1; j < n_cols; j++)
2877 if (xt->col_tot[j] > cm)
2879 cm = xt->col_tot[j];
2883 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * xt->total - rm - cm);
2884 v[1] = (sum_fmj - rm) / (xt->total - rm);
2885 v[2] = (sum_fim - cm) / (xt->total - cm);
2887 /* ASE1 for Y given XT. */
2892 for (i = 0; i < n_rows; i++)
2893 if (cm_index == fim_index[i])
2895 ase[2] = sqrt ((xt->total - sum_fim) * (sum_fim + cm - 2. * accum)
2896 / pow3 (xt->total - cm));
2899 /* ASE0 for Y given XT. */
2903 for (accum = 0., i = 0; i < n_rows; i++)
2904 if (cm_index != fim_index[i])
2905 accum += (xt->mat[i * n_cols + fim_index[i]]
2906 + xt->mat[i * n_cols + cm_index]);
2907 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / xt->total) / (xt->total - cm));
2910 /* ASE1 for XT given Y. */
2915 for (j = 0; j < n_cols; j++)
2916 if (rm_index == fmj_index[j])
2918 ase[1] = sqrt ((xt->total - sum_fmj) * (sum_fmj + rm - 2. * accum)
2919 / pow3 (xt->total - rm));
2922 /* ASE0 for XT given Y. */
2926 for (accum = 0., j = 0; j < n_cols; j++)
2927 if (rm_index != fmj_index[j])
2928 accum += (xt->mat[j + n_cols * fmj_index[j]]
2929 + xt->mat[j + n_cols * rm_index]);
2930 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / xt->total) / (xt->total - rm));
2933 /* Symmetric ASE0 and ASE1. */
2938 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2939 for (j = 0; j < n_cols; j++)
2941 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2942 int temp1 = (i == rm_index) + (j == cm_index);
2943 accum0 += xt->mat[j + i * n_cols] * pow2 (temp0 - temp1);
2944 accum1 += (xt->mat[j + i * n_cols]
2945 * pow2 (temp0 + (v[0] - 1.) * temp1));
2947 ase[0] = sqrt (accum1 - 4. * xt->total * v[0] * v[0]) / (2. * xt->total - rm - cm);
2948 t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / xt->total)
2949 / (2. * xt->total - rm - cm));
2952 for (i = 0; i < 3; i++)
2953 sig[i] = 2 * gsl_cdf_ugaussian_Q (t[i]);
2962 double sum_fij2_ri, sum_fij2_ci;
2963 double sum_ri2, sum_cj2;
2965 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
2966 for (j = 0; j < n_cols; j++)
2968 double temp = pow2 (xt->mat[j + i * n_cols]);
2969 sum_fij2_ri += temp / xt->row_tot[i];
2970 sum_fij2_ci += temp / xt->col_tot[j];
2973 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
2974 sum_ri2 += pow2 (xt->row_tot[i]);
2976 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
2977 sum_cj2 += pow2 (xt->col_tot[j]);
2979 v[3] = (xt->total * sum_fij2_ci - sum_ri2) / (pow2 (xt->total) - sum_ri2);
2980 v[4] = (xt->total * sum_fij2_ri - sum_cj2) / (pow2 (xt->total) - sum_cj2);
2984 if (proc->statistics & (1u << CRS_ST_UC))
2986 double UX, UY, UXY, P;
2987 double ase1_yx, ase1_xy, ase1_sym;
2990 for (UX = 0., i = 0; i < n_rows; i++)
2991 if (xt->row_tot[i] > 0.)
2992 UX -= xt->row_tot[i] / xt->total * log (xt->row_tot[i] / xt->total);
2994 for (UY = 0., j = 0; j < n_cols; j++)
2995 if (xt->col_tot[j] > 0.)
2996 UY -= xt->col_tot[j] / xt->total * log (xt->col_tot[j] / xt->total);
2998 for (UXY = P = 0., i = 0; i < n_rows; i++)
2999 for (j = 0; j < n_cols; j++)
3001 double entry = xt->mat[j + i * n_cols];
3006 P += entry * pow2 (log (xt->col_tot[j] * xt->row_tot[i] / (xt->total * entry)));
3007 UXY -= entry / xt->total * log (entry / xt->total);
3010 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3011 for (j = 0; j < n_cols; j++)
3013 double entry = xt->mat[j + i * n_cols];
3018 ase1_yx += entry * pow2 (UY * log (entry / xt->row_tot[i])
3019 + (UX - UXY) * log (xt->col_tot[j] / xt->total));
3020 ase1_xy += entry * pow2 (UX * log (entry / xt->col_tot[j])
3021 + (UY - UXY) * log (xt->row_tot[i] / xt->total));
3022 ase1_sym += entry * pow2 ((UXY
3023 * log (xt->row_tot[i] * xt->col_tot[j] / pow2 (xt->total)))
3024 - (UX + UY) * log (entry / xt->total));
3027 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3028 ase[5] = (2. / (xt->total * pow2 (UX + UY))) * sqrt (ase1_sym);
3031 v[6] = (UX + UY - UXY) / UX;
3032 ase[6] = sqrt (ase1_xy) / (xt->total * UX * UX);
3033 t[6] = v[6] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UX));
3035 v[7] = (UX + UY - UXY) / UY;
3036 ase[7] = sqrt (ase1_yx) / (xt->total * UY * UY);
3037 t[7] = v[7] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UY));
3041 if (proc->statistics & (1u << CRS_ST_D))
3043 double v_dummy[N_SYMMETRIC];
3044 double ase_dummy[N_SYMMETRIC];
3045 double t_dummy[N_SYMMETRIC];
3046 double somers_d_v[3];
3047 double somers_d_ase[3];
3048 double somers_d_t[3];
3050 if (calc_symmetric (proc, xt, v_dummy, ase_dummy, t_dummy,
3051 somers_d_v, somers_d_ase, somers_d_t))
3054 for (i = 0; i < 3; i++)
3056 v[8 + i] = somers_d_v[i];
3057 ase[8 + i] = somers_d_ase[i];
3058 t[8 + i] = somers_d_t[i];
3059 sig[8 + i] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t[i]));
3065 if (proc->statistics & (1u << CRS_ST_ETA))
3068 double sum_Xr, sum_X2r;
3072 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3074 sum_Xr += xt->vars[ROW_VAR].values[i].f * xt->row_tot[i];
3075 sum_X2r += pow2 (xt->vars[ROW_VAR].values[i].f) * xt->row_tot[i];
3077 SX = sum_X2r - pow2 (sum_Xr) / xt->total;
3079 for (SXW = 0., j = 0; j < n_cols; j++)
3083 for (cum = 0., i = 0; i < n_rows; i++)
3085 SXW += (pow2 (xt->vars[ROW_VAR].values[i].f)
3086 * xt->mat[j + i * n_cols]);
3087 cum += (xt->vars[ROW_VAR].values[i].f
3088 * xt->mat[j + i * n_cols]);
3091 SXW -= cum * cum / xt->col_tot[j];
3093 v[11] = sqrt (1. - SXW / SX);
3097 double sum_Yc, sum_Y2c;
3101 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3103 sum_Yc += xt->vars[COL_VAR].values[i].f * xt->col_tot[i];
3104 sum_Y2c += pow2 (xt->vars[COL_VAR].values[i].f) * xt->col_tot[i];
3106 SY = sum_Y2c - sum_Yc * sum_Yc / xt->total;
3108 for (SYW = 0., i = 0; i < n_rows; i++)
3112 for (cum = 0., j = 0; j < n_cols; j++)
3114 SYW += (pow2 (xt->vars[COL_VAR].values[j].f)
3115 * xt->mat[j + i * n_cols]);
3116 cum += (xt->vars[COL_VAR].values[j].f
3117 * xt->mat[j + i * n_cols]);
3120 SYW -= cum * cum / xt->row_tot[i];
3122 v[12] = sqrt (1. - SYW / SY);