1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
137 get_var_trimmed_width (const struct variable *v)
139 int width = var_get_width (v);
140 return MIN (width, MAX_SHORT_STRING);
143 /* Indexes into crosstab.v. */
150 /* General mode crosstabulation table. */
151 static struct hsh_table *gen_tab; /* Hash table. */
152 static int n_sorted_tab; /* Number of entries in sorted_tab. */
153 static struct table_entry **sorted_tab; /* Sorted table. */
155 /* Variables specifies on VARIABLES. */
156 static const struct variable **variables;
157 static size_t variables_cnt;
160 static struct crosstab **xtab;
163 /* Integer or general mode? */
172 static int num_cells; /* Number of cells requested. */
173 static int cells[8]; /* Cells requested. */
176 static int write_style; /* One of WR_* that specifies the WRITE style. */
178 /* Command parsing info. */
179 static struct cmd_crosstabs cmd;
182 static struct pool *pl_tc; /* For table cells. */
183 static struct pool *pl_col; /* For column data. */
185 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
186 static void precalc (struct casereader *, const struct dataset *);
187 static void calc_general (struct ccase *, const struct dataset *);
188 static void calc_integer (struct ccase *, const struct dataset *);
189 static void postcalc (const struct dataset *);
190 static void submit (struct tab_table *);
192 static void format_short (char *s, const struct fmt_spec *fp,
193 const union value *v);
195 /* Parse and execute CROSSTABS, then clean up. */
197 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
199 int result = internal_cmd_crosstabs (lexer, ds);
203 pool_destroy (pl_tc);
204 pool_destroy (pl_col);
206 for (i = 0; i < nxtab; i++)
213 /* Parses and executes the CROSSTABS procedure. */
215 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
217 struct casegrouper *grouper;
218 struct casereader *input, *group;
226 pl_tc = pool_create ();
227 pl_col = pool_create ();
229 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
232 mode = variables ? INTEGER : GENERAL;
237 cmd.a_cells[CRS_CL_COUNT] = 1;
243 for (i = 0; i < CRS_CL_count; i++)
248 cmd.a_cells[CRS_CL_COUNT] = 1;
249 cmd.a_cells[CRS_CL_ROW] = 1;
250 cmd.a_cells[CRS_CL_COLUMN] = 1;
251 cmd.a_cells[CRS_CL_TOTAL] = 1;
253 if (cmd.a_cells[CRS_CL_ALL])
255 for (i = 0; i < CRS_CL_count; i++)
257 cmd.a_cells[CRS_CL_ALL] = 0;
259 cmd.a_cells[CRS_CL_NONE] = 0;
261 for (num_cells = i = 0; i < CRS_CL_count; i++)
263 cells[num_cells++] = i;
266 if (cmd.sbc_statistics)
271 for (i = 0; i < CRS_ST_count; i++)
272 if (cmd.a_statistics[i])
275 cmd.a_statistics[CRS_ST_CHISQ] = 1;
276 if (cmd.a_statistics[CRS_ST_ALL])
277 for (i = 0; i < CRS_ST_count; i++)
278 cmd.a_statistics[i] = 1;
282 if (cmd.miss == CRS_REPORT && mode == GENERAL)
284 msg (SE, _("Missing mode REPORT not allowed in general mode. "
285 "Assuming MISSING=TABLE."));
286 cmd.miss = CRS_TABLE;
290 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
291 cmd.a_write[CRS_WR_ALL] = 0;
292 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
294 msg (SE, _("Write mode ALL not allowed in general mode. "
295 "Assuming WRITE=CELLS."));
296 cmd.a_write[CRS_WR_CELLS] = 1;
299 && (cmd.a_write[CRS_WR_NONE]
300 + cmd.a_write[CRS_WR_ALL]
301 + cmd.a_write[CRS_WR_CELLS] == 0))
302 cmd.a_write[CRS_WR_CELLS] = 1;
303 if (cmd.a_write[CRS_WR_CELLS])
304 write_style = CRS_WR_CELLS;
305 else if (cmd.a_write[CRS_WR_ALL])
306 write_style = CRS_WR_ALL;
308 write_style = CRS_WR_NONE;
310 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
312 grouper = casegrouper_create_splits (input, dataset_dict (ds));
313 while (casegrouper_get_next_group (grouper, &group))
319 for (; casereader_read (group, &c); case_destroy (&c))
322 calc_general (&c, ds);
324 calc_integer (&c, ds);
326 casereader_destroy (group);
330 ok = casegrouper_destroy (grouper);
331 ok = proc_commit (ds) && ok;
333 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
336 /* Parses the TABLES subcommand. */
338 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
340 struct const_var_set *var_set;
342 const struct variable ***by = NULL;
343 size_t *by_nvar = NULL;
347 /* Ensure that this is a TABLES subcommand. */
348 if (!lex_match_id (lexer, "TABLES")
349 && (lex_token (lexer) != T_ID ||
350 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
351 && lex_token (lexer) != T_ALL)
353 lex_match (lexer, '=');
355 if (variables != NULL)
356 var_set = const_var_set_create_from_array (variables, variables_cnt);
358 var_set = const_var_set_create_from_dict (dataset_dict (ds));
359 assert (var_set != NULL);
363 by = xnrealloc (by, n_by + 1, sizeof *by);
364 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
365 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
366 PV_NO_DUPLICATE | PV_NO_SCRATCH))
368 if (xalloc_oversized (nx, by_nvar[n_by]))
370 msg (SE, _("Too many cross-tabulation variables or dimensions."));
376 if (!lex_match (lexer, T_BY))
380 lex_error (lexer, _("expecting BY"));
389 int *by_iter = xcalloc (n_by, sizeof *by_iter);
392 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
393 for (i = 0; i < nx; i++)
397 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
404 for (i = 0; i < n_by; i++)
405 x->vars[i] = by[i][by_iter[i]];
411 for (i = n_by - 1; i >= 0; i--)
413 if (++by_iter[i] < by_nvar[i])
426 /* All return paths lead here. */
430 for (i = 0; i < n_by; i++)
436 const_var_set_destroy (var_set);
441 /* Parses the VARIABLES subcommand. */
443 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
447 msg (SE, _("VARIABLES must be specified before TABLES."));
451 lex_match (lexer, '=');
455 size_t orig_nv = variables_cnt;
460 if (!parse_variables_const (lexer, dataset_dict (ds),
461 &variables, &variables_cnt,
462 (PV_APPEND | PV_NUMERIC
463 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
466 if (lex_token (lexer) != '(')
468 lex_error (lexer, "expecting `('");
473 if (!lex_force_int (lexer))
475 min = lex_integer (lexer);
478 lex_match (lexer, ',');
480 if (!lex_force_int (lexer))
482 max = lex_integer (lexer);
485 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
491 if (lex_token (lexer) != ')')
493 lex_error (lexer, "expecting `)'");
498 for (i = orig_nv; i < variables_cnt; i++)
500 struct var_range *vr = xmalloc (sizeof *vr);
503 vr->count = max - min + 1;
504 var_attach_aux (variables[i], vr, var_dtor_free);
507 if (lex_token (lexer) == '/')
519 /* Data file processing. */
521 static int compare_table_entry (const void *, const void *, const void *);
522 static unsigned hash_table_entry (const void *, const void *);
524 /* Set up the crosstabulation tables for processing. */
526 precalc (struct casereader *input, const struct dataset *ds)
530 if (casereader_peek (input, 0, &c))
532 output_split_file_values (ds, &c);
538 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
548 for (i = 0; i < nxtab; i++)
550 struct crosstab *x = xtab[i];
555 x->ofs = n_sorted_tab;
557 for (j = 2; j < x->nvar; j++)
558 count *= get_var_range (x->vars[j - 2])->count;
560 sorted_tab = xnrealloc (sorted_tab,
561 n_sorted_tab + count, sizeof *sorted_tab);
562 v = xmalloca (sizeof *v * x->nvar);
563 for (j = 2; j < x->nvar; j++)
564 v[j] = get_var_range (x->vars[j])->min;
565 for (j = 0; j < count; j++)
567 struct table_entry *te;
570 te = sorted_tab[n_sorted_tab++]
571 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
575 int row_cnt = get_var_range (x->vars[0])->count;
576 int col_cnt = get_var_range (x->vars[1])->count;
577 const int mat_size = row_cnt * col_cnt;
580 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
581 for (m = 0; m < mat_size; m++)
585 for (k = 2; k < x->nvar; k++)
586 te->values[k].f = v[k];
587 for (k = 2; k < x->nvar; k++)
589 struct var_range *vr = get_var_range (x->vars[k]);
590 if (++v[k] >= vr->max)
599 sorted_tab = xnrealloc (sorted_tab,
600 n_sorted_tab + 1, sizeof *sorted_tab);
601 sorted_tab[n_sorted_tab] = NULL;
606 /* Form crosstabulations for general mode. */
608 calc_general (struct ccase *c, const struct dataset *ds)
610 /* Missing values to exclude. */
611 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
612 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
616 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
618 /* Flattened current table index. */
621 for (t = 0; t < nxtab; t++)
623 struct crosstab *x = xtab[t];
624 const size_t entry_size = (sizeof (struct table_entry)
625 + sizeof (union value) * (x->nvar - 1));
626 struct table_entry *te = xmalloca (entry_size);
628 /* Construct table entry for the current record and table. */
634 for (j = 0; j < x->nvar; j++)
636 const union value *v = case_data (c, x->vars[j]);
637 if (var_is_value_missing (x->vars[j], v, exclude))
639 x->missing += weight;
643 if (var_is_numeric (x->vars[j]))
644 te->values[j].f = case_num (c, x->vars[j]);
647 size_t n = get_var_trimmed_width (x->vars[j]);
648 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
650 /* Necessary in order to simplify comparisons. */
651 memset (&te->values[j].s[n], 0, sizeof (union value) - n);
656 /* Add record to hash table. */
658 struct table_entry **tepp
659 = (struct table_entry **) hsh_probe (gen_tab, te);
662 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
665 memcpy (tep, te, entry_size);
670 (*tepp)->u.freq += weight;
679 calc_integer (struct ccase *c, const struct dataset *ds)
681 bool bad_warn = true;
684 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
686 /* Flattened current table index. */
689 for (t = 0; t < nxtab; t++)
691 struct crosstab *x = xtab[t];
696 for (i = 0; i < x->nvar; i++)
698 const struct variable *const v = x->vars[i];
699 struct var_range *vr = get_var_range (v);
700 double value = case_num (c, v);
702 /* Note that the first test also rules out SYSMIS. */
703 if ((value < vr->min || value >= vr->max)
704 || (cmd.miss == CRS_TABLE
705 && var_is_num_missing (v, value, MV_USER)))
707 x->missing += weight;
713 ofs += fact * ((int) value - vr->min);
719 const struct variable *row_var = x->vars[ROW_VAR];
720 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
722 const struct variable *col_var = x->vars[COL_VAR];
723 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
725 const int col_dim = get_var_range (col_var)->count;
727 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
734 /* Compare the table_entry's at A and B and return a strcmp()-type
737 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
739 const struct table_entry *a = a_;
740 const struct table_entry *b = b_;
742 if (a->table > b->table)
744 else if (a->table < b->table)
748 const struct crosstab *x = xtab[a->table];
751 for (i = x->nvar - 1; i >= 0; i--)
752 if (var_is_numeric (x->vars[i]))
754 const double diffnum = a->values[i].f - b->values[i].f;
757 else if (diffnum > 0)
762 int width = get_var_trimmed_width (x->vars[i]);
763 const int diffstr = strncmp (a->values[i].s, b->values[i].s, width);
772 /* Calculate a hash value from table_entry A. */
774 hash_table_entry (const void *a_, const void *aux UNUSED)
776 const struct table_entry *a = a_;
781 for (i = 0; i < xtab[a->table]->nvar; i++)
782 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
787 /* Post-data reading calculations. */
789 static struct table_entry **find_pivot_extent (struct table_entry **,
790 int *cnt, int pivot);
791 static void enum_var_values (struct table_entry **entries, int entry_cnt,
793 union value **values, int *value_cnt);
794 static void output_pivot_table (struct table_entry **, struct table_entry **,
795 const struct dictionary *,
796 double **, double **, double **,
797 int *, int *, int *);
798 static void make_summary_table (const struct dictionary *);
801 postcalc (const struct dataset *ds)
805 n_sorted_tab = hsh_count (gen_tab);
806 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
809 make_summary_table (dataset_dict (ds));
811 /* Identify all the individual crosstabulation tables, and deal with
814 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
815 int pc = n_sorted_tab; /* Pivot count. */
817 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
818 int maxrows = 0, maxcols = 0, maxcells = 0;
822 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
826 output_pivot_table (pb, pe, dataset_dict (ds),
827 &mat, &row_tot, &col_tot,
828 &maxrows, &maxcols, &maxcells);
837 hsh_destroy (gen_tab);
841 for (i = 0; i < n_sorted_tab; i++)
843 free (sorted_tab[i]->u.data);
844 free (sorted_tab[i]);
850 static void insert_summary (struct tab_table *, int tab_index,
851 const struct dictionary *,
854 /* Output a table summarizing the cases processed. */
856 make_summary_table (const struct dictionary *dict)
858 struct tab_table *summary;
860 struct table_entry **pb = sorted_tab, **pe;
861 int pc = n_sorted_tab;
864 summary = tab_create (7, 3 + nxtab, 1);
865 tab_title (summary, _("Summary."));
866 tab_headers (summary, 1, 0, 3, 0);
867 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
868 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
869 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
870 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
871 tab_hline (summary, TAL_1, 1, 6, 1);
872 tab_hline (summary, TAL_1, 1, 6, 2);
873 tab_vline (summary, TAL_1, 3, 1, 1);
874 tab_vline (summary, TAL_1, 5, 1, 1);
878 for (i = 0; i < 3; i++)
880 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
881 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
884 tab_offset (summary, 0, 3);
890 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
894 while (cur_tab < (*pb)->table)
895 insert_summary (summary, cur_tab++, dict, 0.);
898 for (valid = 0.; pb < pe; pb++)
899 valid += (*pb)->u.freq;
902 const struct crosstab *const x = xtab[(*pb)->table];
903 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
904 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
905 const int count = n_cols * n_rows;
907 for (valid = 0.; pb < pe; pb++)
909 const double *data = (*pb)->u.data;
912 for (i = 0; i < count; i++)
916 insert_summary (summary, cur_tab++, dict, valid);
921 while (cur_tab < nxtab)
922 insert_summary (summary, cur_tab++, dict, 0.);
927 /* Inserts a line into T describing the crosstabulation at index
928 TAB_INDEX, which has VALID valid observations. */
930 insert_summary (struct tab_table *t, int tab_index,
931 const struct dictionary *dict,
934 struct crosstab *x = xtab[tab_index];
936 const struct variable *wv = dict_get_weight (dict);
937 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
939 tab_hline (t, TAL_1, 0, 6, 0);
941 /* Crosstabulation name. */
943 char *buf = xmalloca (128 * x->nvar);
947 for (i = 0; i < x->nvar; i++)
950 cp = stpcpy (cp, " * ");
952 cp = stpcpy (cp, var_to_string (x->vars[i]));
954 tab_text (t, 0, 0, TAB_LEFT, buf);
959 /* Counts and percentages. */
969 for (i = 0; i < 3; i++)
971 tab_double (t, i * 2 + 1, 0, TAB_RIGHT, n[i], wfmt);
972 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
983 static struct tab_table *table; /* Crosstabulation table. */
984 static struct tab_table *chisq; /* Chi-square table. */
985 static struct tab_table *sym; /* Symmetric measures table. */
986 static struct tab_table *risk; /* Risk estimate table. */
987 static struct tab_table *direct; /* Directional measures table. */
990 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
992 /* Column values, number of columns. */
993 static union value *cols;
996 /* Row values, number of rows. */
997 static union value *rows;
1000 /* Number of statistically interesting columns/rows (columns/rows with
1002 static int ns_cols, ns_rows;
1004 /* Crosstabulation. */
1005 static const struct crosstab *x;
1007 /* Number of variables from the crosstabulation to consider. This is
1008 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1011 /* Matrix contents. */
1012 static double *mat; /* Matrix proper. */
1013 static double *row_tot; /* Row totals. */
1014 static double *col_tot; /* Column totals. */
1015 static double W; /* Grand total. */
1017 static void display_dimensions (struct tab_table *, int first_difference,
1018 struct table_entry *);
1019 static void display_crosstabulation (void);
1020 static void display_chisq (const struct dictionary *);
1021 static void display_symmetric (const struct dictionary *);
1022 static void display_risk (const struct dictionary *);
1023 static void display_directional (void);
1024 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1025 static void table_value_missing (struct tab_table *table, int c, int r,
1026 unsigned char opt, const union value *v,
1027 const struct variable *var);
1028 static void delete_missing (void);
1030 /* Output pivot table beginning at PB and continuing until PE,
1031 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1032 hold *MAXROWS entries. */
1034 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1035 const struct dictionary *dict,
1036 double **matp, double **row_totp, double **col_totp,
1037 int *maxrows, int *maxcols, int *maxcells)
1040 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1041 int tc = pe - pb; /* Table count. */
1043 /* Table entry for header comparison. */
1044 struct table_entry *cmp = NULL;
1046 x = xtab[(*pb)->table];
1047 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1049 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1051 /* Crosstabulation table initialization. */
1054 table = tab_create (nvar + n_cols,
1055 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1056 tab_headers (table, nvar - 1, 0, 2, 0);
1058 /* First header line. */
1059 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1060 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1062 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1064 /* Second header line. */
1068 for (i = 2; i < nvar; i++)
1069 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1070 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1071 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1072 var_get_name (x->vars[ROW_VAR]));
1073 for (i = 0; i < n_cols; i++)
1074 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1076 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1079 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1080 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1084 char *title = xmalloca (x->nvar * 64 + 128);
1088 if (cmd.pivot == CRS_PIVOT)
1089 for (i = 0; i < nvar; i++)
1092 cp = stpcpy (cp, " by ");
1093 cp = stpcpy (cp, var_get_name (x->vars[i]));
1097 cp = spprintf (cp, "%s by %s for",
1098 var_get_name (x->vars[0]),
1099 var_get_name (x->vars[1]));
1100 for (i = 2; i < nvar; i++)
1102 char buf[64], *bufp;
1107 cp = stpcpy (cp, var_get_name (x->vars[i]));
1109 format_short (buf, var_get_print_format (x->vars[i]),
1111 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1113 cp = stpcpy (cp, bufp);
1117 cp = stpcpy (cp, " [");
1118 for (i = 0; i < num_cells; i++)
1126 static const struct tuple cell_names[] =
1128 {CRS_CL_COUNT, N_("count")},
1129 {CRS_CL_ROW, N_("row %")},
1130 {CRS_CL_COLUMN, N_("column %")},
1131 {CRS_CL_TOTAL, N_("total %")},
1132 {CRS_CL_EXPECTED, N_("expected")},
1133 {CRS_CL_RESIDUAL, N_("residual")},
1134 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1135 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1139 const struct tuple *t;
1141 for (t = cell_names; t->value != cells[i]; t++)
1142 assert (t->value != -1);
1144 cp = stpcpy (cp, ", ");
1145 cp = stpcpy (cp, gettext (t->name));
1149 tab_title (table, "%s", title);
1153 tab_offset (table, 0, 2);
1158 /* Chi-square table initialization. */
1159 if (cmd.a_statistics[CRS_ST_CHISQ])
1161 chisq = tab_create (6 + (nvar - 2),
1162 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1163 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1165 tab_title (chisq, _("Chi-square tests."));
1167 tab_offset (chisq, nvar - 2, 0);
1168 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1169 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1170 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1171 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1172 _("Asymp. Sig. (2-sided)"));
1173 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1174 _("Exact. Sig. (2-sided)"));
1175 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1176 _("Exact. Sig. (1-sided)"));
1178 tab_offset (chisq, 0, 1);
1183 /* Symmetric measures. */
1184 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1185 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1186 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1187 || cmd.a_statistics[CRS_ST_KAPPA])
1189 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1190 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1191 tab_title (sym, _("Symmetric measures."));
1193 tab_offset (sym, nvar - 2, 0);
1194 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1195 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1196 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1197 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1198 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1199 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1200 tab_offset (sym, 0, 1);
1205 /* Risk estimate. */
1206 if (cmd.a_statistics[CRS_ST_RISK])
1208 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1209 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1210 tab_title (risk, _("Risk estimate."));
1212 tab_offset (risk, nvar - 2, 0);
1213 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1214 _("95%% Confidence Interval"));
1215 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1216 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1217 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1218 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1219 tab_hline (risk, TAL_1, 2, 3, 1);
1220 tab_vline (risk, TAL_1, 2, 0, 1);
1221 tab_offset (risk, 0, 2);
1226 /* Directional measures. */
1227 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1228 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1230 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1231 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1232 tab_title (direct, _("Directional measures."));
1234 tab_offset (direct, nvar - 2, 0);
1235 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1236 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1237 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1238 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1239 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1240 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1241 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1242 tab_offset (direct, 0, 1);
1249 /* Find pivot subtable if applicable. */
1250 te = find_pivot_extent (tb, &tc, 0);
1254 /* Find all the row variable values. */
1255 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1257 /* Allocate memory space for the column and row totals. */
1258 if (n_rows > *maxrows)
1260 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1261 row_tot = *row_totp;
1264 if (n_cols > *maxcols)
1266 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1267 col_tot = *col_totp;
1271 /* Allocate table space for the matrix. */
1272 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1273 tab_realloc (table, -1,
1274 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1275 tab_nr (table) * (pe - pb) / (te - tb)));
1277 if (mode == GENERAL)
1279 /* Allocate memory space for the matrix. */
1280 if (n_cols * n_rows > *maxcells)
1282 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1283 *maxcells = n_cols * n_rows;
1288 /* Build the matrix and calculate column totals. */
1290 union value *cur_col = cols;
1291 union value *cur_row = rows;
1293 double *cp = col_tot;
1294 struct table_entry **p;
1297 for (p = &tb[0]; p < te; p++)
1299 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1303 for (; cur_row < &rows[n_rows]; cur_row++)
1309 mp = &mat[cur_col - cols];
1312 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1319 *cp += *mp = (*p)->u.freq;
1324 /* Zero out the rest of the matrix. */
1325 for (; cur_row < &rows[n_rows]; cur_row++)
1331 if (cur_col < &cols[n_cols])
1333 const int rem_cols = n_cols - (cur_col - cols);
1336 for (c = 0; c < rem_cols; c++)
1338 mp = &mat[cur_col - cols];
1339 for (r = 0; r < n_rows; r++)
1341 for (c = 0; c < rem_cols; c++)
1343 mp += n_cols - rem_cols;
1351 double *tp = col_tot;
1353 assert (mode == INTEGER);
1354 mat = (*tb)->u.data;
1357 /* Calculate column totals. */
1358 for (c = 0; c < n_cols; c++)
1361 double *cp = &mat[c];
1363 for (r = 0; r < n_rows; r++)
1364 cum += cp[r * n_cols];
1372 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1373 ns_cols += *cp != 0.;
1376 /* Calculate row totals. */
1379 double *rp = row_tot;
1382 for (ns_rows = 0, r = n_rows; r--; )
1385 for (c = n_cols; c--; )
1393 /* Calculate grand total. */
1399 if (n_rows < n_cols)
1400 tp = row_tot, n = n_rows;
1402 tp = col_tot, n = n_cols;
1408 /* Find the first variable that differs from the last subtable,
1409 then display the values of the dimensioning variables for
1410 each table that needs it. */
1412 int first_difference = nvar - 1;
1415 for (; ; first_difference--)
1417 assert (first_difference >= 2);
1418 if (memcmp (&cmp->values[first_difference],
1419 &(*tb)->values[first_difference],
1420 sizeof *cmp->values))
1426 display_dimensions (table, first_difference, *tb);
1428 display_dimensions (chisq, first_difference, *tb);
1430 display_dimensions (sym, first_difference, *tb);
1432 display_dimensions (risk, first_difference, *tb);
1434 display_dimensions (direct, first_difference, *tb);
1438 display_crosstabulation ();
1439 if (cmd.miss == CRS_REPORT)
1442 display_chisq (dict);
1444 display_symmetric (dict);
1446 display_risk (dict);
1448 display_directional ();
1459 tab_resize (chisq, 4 + (nvar - 2), -1);
1470 /* Delete missing rows and columns for statistical analysis when
1473 delete_missing (void)
1478 for (r = 0; r < n_rows; r++)
1479 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1483 for (c = 0; c < n_cols; c++)
1484 mat[c + r * n_cols] = 0.;
1492 for (c = 0; c < n_cols; c++)
1493 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1497 for (r = 0; r < n_rows; r++)
1498 mat[c + r * n_cols] = 0.;
1504 /* Prepare table T for submission, and submit it. */
1506 submit (struct tab_table *t)
1513 tab_resize (t, -1, 0);
1514 if (tab_nr (t) == tab_t (t))
1519 tab_offset (t, 0, 0);
1521 for (i = 2; i < nvar; i++)
1522 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1523 var_to_string (x->vars[i]));
1524 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1525 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1527 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1529 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1530 tab_dim (t, crosstabs_dim);
1534 /* Sets the widths of all the columns and heights of all the rows in
1535 table T for driver D. */
1537 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1541 /* Width of a numerical column. */
1542 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1543 if (cmd.miss == CRS_REPORT)
1544 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1546 /* Set width for header columns. */
1552 w = d->width - c * (t->nc - t->l);
1553 for (i = 0; i <= t->nc; i++)
1557 if (w < d->prop_em_width * 8)
1558 w = d->prop_em_width * 8;
1560 if (w > d->prop_em_width * 15)
1561 w = d->prop_em_width * 15;
1563 for (i = 0; i < t->l; i++)
1567 for (i = t->l; i < t->nc; i++)
1570 for (i = 0; i < t->nr; i++)
1571 t->h[i] = tab_natural_height (t, d, i);
1574 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1575 int *cnt, int pivot);
1576 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1577 int *cnt, int pivot);
1579 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1581 static struct table_entry **
1582 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1584 return (mode == GENERAL
1585 ? find_pivot_extent_general (tp, cnt, pivot)
1586 : find_pivot_extent_integer (tp, cnt, pivot));
1589 /* Find the extent of a region in TP that contains one table. If
1590 PIVOT != 0 that means a set of table entries with identical table
1591 number; otherwise they also have to have the same values for every
1592 dimension after the row and column dimensions. The table that is
1593 searched starts at TP and has length CNT. Returns the first entry
1594 after the last one in the table; sets *CNT to the number of
1595 remaining values. If there are no entries in TP at all, returns
1596 NULL. A yucky interface, admittedly, but it works. */
1597 static struct table_entry **
1598 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1600 struct table_entry *fp = *tp;
1605 x = xtab[(*tp)->table];
1613 if ((*tp)->table != fp->table)
1618 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1625 /* Integer mode correspondent to find_pivot_extent_general(). This
1626 could be optimized somewhat, but I just don't give a crap about
1627 CROSSTABS performance in integer mode, which is just a
1628 CROSSTABS wart as far as I'm concerned.
1630 That said, feel free to send optimization patches to me. */
1631 static struct table_entry **
1632 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1634 struct table_entry *fp = *tp;
1639 x = xtab[(*tp)->table];
1647 if ((*tp)->table != fp->table)
1652 if (memcmp (&(*tp)->values[2], &fp->values[2],
1653 sizeof (union value) * (x->nvar - 2)))
1660 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1661 result. WIDTH_ points to an int which is either 0 for a
1662 numeric value or a string width for a string value. */
1664 compare_value (const void *a_, const void *b_, const void *width_)
1666 const union value *a = a_;
1667 const union value *b = b_;
1668 const int *pwidth = width_;
1669 const int width = *pwidth;
1672 return (a->f < b->f) ? -1 : (a->f > b->f);
1674 return strncmp (a->s, b->s, width);
1677 /* Given an array of ENTRY_CNT table_entry structures starting at
1678 ENTRIES, creates a sorted list of the values that the variable
1679 with index VAR_IDX takes on. The values are returned as a
1680 malloc()'darray stored in *VALUES, with the number of values
1681 stored in *VALUE_CNT.
1684 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1685 union value **values, int *value_cnt)
1687 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1689 if (mode == GENERAL)
1691 int width = get_var_trimmed_width (v);
1694 *values = xnmalloc (entry_cnt, sizeof **values);
1695 for (i = 0; i < entry_cnt; i++)
1696 (*values)[i] = entries[i]->values[var_idx];
1697 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1698 compare_value, &width);
1702 struct var_range *vr = get_var_range (v);
1705 assert (mode == INTEGER);
1706 *values = xnmalloc (vr->count, sizeof **values);
1707 for (i = 0; i < vr->count; i++)
1708 (*values)[i].f = i + vr->min;
1709 *value_cnt = vr->count;
1713 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1714 from V, displayed with print format spec from variable VAR. When
1715 in REPORT missing-value mode, missing values have an M appended. */
1717 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1718 const union value *v, const struct variable *var)
1721 const struct fmt_spec *print = var_get_print_format (var);
1723 const char *label = var_lookup_value_label (var, v);
1726 tab_text (table, c, r, TAB_LEFT, label);
1730 s.string = tab_alloc (table, print->w);
1731 format_short (s.string, print, v);
1732 s.length = strlen (s.string);
1733 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1734 s.string[s.length++] = 'M';
1735 while (s.length && *s.string == ' ')
1740 tab_raw (table, c, r, opt, &s);
1743 /* Draws a line across TABLE at the current row to indicate the most
1744 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1745 that changed, and puts the values that changed into the table. TB
1746 and X must be the corresponding table_entry and crosstab,
1749 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1751 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1753 for (; first_difference >= 2; first_difference--)
1754 table_value_missing (table, nvar - first_difference - 1, 0,
1755 TAB_RIGHT, &tb->values[first_difference],
1756 x->vars[first_difference]);
1759 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1760 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1761 additionally suffixed with a letter `M'. */
1763 format_cell_entry (struct tab_table *table, int c, int r, double value,
1764 char suffix, bool mark_missing)
1766 const struct fmt_spec f = {FMT_F, 10, 1};
1771 s.string = tab_alloc (table, 16);
1773 data_out (&v, &f, s.string);
1774 while (*s.string == ' ')
1780 s.string[s.length++] = suffix;
1782 s.string[s.length++] = 'M';
1784 tab_raw (table, c, r, TAB_RIGHT, &s);
1787 /* Displays the crosstabulation table. */
1789 display_crosstabulation (void)
1794 for (r = 0; r < n_rows; r++)
1795 table_value_missing (table, nvar - 2, r * num_cells,
1796 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1798 tab_text (table, nvar - 2, n_rows * num_cells,
1799 TAB_LEFT, _("Total"));
1801 /* Put in the actual cells. */
1806 tab_offset (table, nvar - 1, -1);
1807 for (r = 0; r < n_rows; r++)
1810 tab_hline (table, TAL_1, -1, n_cols, 0);
1811 for (c = 0; c < n_cols; c++)
1813 bool mark_missing = false;
1814 double expected_value = row_tot[r] * col_tot[c] / W;
1815 if (cmd.miss == CRS_REPORT
1816 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1817 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1819 mark_missing = true;
1820 for (i = 0; i < num_cells; i++)
1831 v = *mp / row_tot[r] * 100.;
1835 v = *mp / col_tot[c] * 100.;
1842 case CRS_CL_EXPECTED:
1845 case CRS_CL_RESIDUAL:
1846 v = *mp - expected_value;
1848 case CRS_CL_SRESIDUAL:
1849 v = (*mp - expected_value) / sqrt (expected_value);
1851 case CRS_CL_ASRESIDUAL:
1852 v = ((*mp - expected_value)
1853 / sqrt (expected_value
1854 * (1. - row_tot[r] / W)
1855 * (1. - col_tot[c] / W)));
1861 format_cell_entry (table, c, i, v, suffix, mark_missing);
1867 tab_offset (table, -1, tab_row (table) + num_cells);
1875 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1876 for (r = 0; r < n_rows; r++)
1878 bool mark_missing = false;
1880 if (cmd.miss == CRS_REPORT
1881 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1882 mark_missing = true;
1884 for (i = 0; i < num_cells; i++)
1899 v = row_tot[r] / W * 100.;
1903 v = row_tot[r] / W * 100.;
1906 case CRS_CL_EXPECTED:
1907 case CRS_CL_RESIDUAL:
1908 case CRS_CL_SRESIDUAL:
1909 case CRS_CL_ASRESIDUAL:
1916 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1917 tab_next_row (table);
1922 /* Column totals, grand total. */
1928 tab_hline (table, TAL_1, -1, n_cols, 0);
1929 for (c = 0; c <= n_cols; c++)
1931 double ct = c < n_cols ? col_tot[c] : W;
1932 bool mark_missing = false;
1935 if (cmd.miss == CRS_REPORT && c < n_cols
1936 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1937 mark_missing = true;
1939 for (i = 0; i < num_cells; i++)
1961 case CRS_CL_EXPECTED:
1962 case CRS_CL_RESIDUAL:
1963 case CRS_CL_SRESIDUAL:
1964 case CRS_CL_ASRESIDUAL:
1970 format_cell_entry (table, c, i, v, suffix, mark_missing);
1975 tab_offset (table, -1, tab_row (table) + last_row);
1978 tab_offset (table, 0, -1);
1981 static void calc_r (double *X, double *Y, double *, double *, double *);
1982 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1984 /* Display chi-square statistics. */
1986 display_chisq (const struct dictionary *dict)
1988 const struct variable *wv = dict_get_weight (dict);
1989 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1991 static const char *chisq_stats[N_CHISQ] =
1993 N_("Pearson Chi-Square"),
1994 N_("Likelihood Ratio"),
1995 N_("Fisher's Exact Test"),
1996 N_("Continuity Correction"),
1997 N_("Linear-by-Linear Association"),
1999 double chisq_v[N_CHISQ];
2000 double fisher1, fisher2;
2006 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2008 tab_offset (chisq, nvar - 2, -1);
2010 for (i = 0; i < N_CHISQ; i++)
2012 if ((i != 2 && chisq_v[i] == SYSMIS)
2013 || (i == 2 && fisher1 == SYSMIS))
2017 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2020 tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL);
2021 tab_double (chisq, 2, 0, TAB_RIGHT, df[i], wfmt);
2022 tab_double (chisq, 3, 0, TAB_RIGHT,
2023 gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL);
2028 tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL);
2029 tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL);
2031 tab_next_row (chisq);
2034 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2035 tab_double (chisq, 1, 0, TAB_RIGHT, W, wfmt);
2036 tab_next_row (chisq);
2038 tab_offset (chisq, 0, -1);
2041 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2042 double[N_SYMMETRIC]);
2044 /* Display symmetric measures. */
2046 display_symmetric (const struct dictionary *dict)
2048 const struct variable *wv = dict_get_weight (dict);
2049 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2051 static const char *categories[] =
2053 N_("Nominal by Nominal"),
2054 N_("Ordinal by Ordinal"),
2055 N_("Interval by Interval"),
2056 N_("Measure of Agreement"),
2059 static const char *stats[N_SYMMETRIC] =
2063 N_("Contingency Coefficient"),
2064 N_("Kendall's tau-b"),
2065 N_("Kendall's tau-c"),
2067 N_("Spearman Correlation"),
2072 static const int stats_categories[N_SYMMETRIC] =
2074 0, 0, 0, 1, 1, 1, 1, 2, 3,
2078 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2081 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2084 tab_offset (sym, nvar - 2, -1);
2086 for (i = 0; i < N_SYMMETRIC; i++)
2088 if (sym_v[i] == SYSMIS)
2091 if (stats_categories[i] != last_cat)
2093 last_cat = stats_categories[i];
2094 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2097 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2098 tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL);
2099 if (sym_ase[i] != SYSMIS)
2100 tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL);
2101 if (sym_t[i] != SYSMIS)
2102 tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL);
2103 /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/
2107 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2108 tab_double (sym, 2, 0, TAB_RIGHT, W, wfmt);
2111 tab_offset (sym, 0, -1);
2114 static int calc_risk (double[], double[], double[], union value *);
2116 /* Display risk estimate. */
2118 display_risk (const struct dictionary *dict)
2120 const struct variable *wv = dict_get_weight (dict);
2121 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2124 double risk_v[3], lower[3], upper[3];
2128 if (!calc_risk (risk_v, upper, lower, c))
2131 tab_offset (risk, nvar - 2, -1);
2133 for (i = 0; i < 3; i++)
2135 if (risk_v[i] == SYSMIS)
2141 if (var_is_numeric (x->vars[COL_VAR]))
2142 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2143 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2145 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2146 var_get_name (x->vars[COL_VAR]),
2147 get_var_trimmed_width (x->vars[COL_VAR]), c[0].s,
2148 get_var_trimmed_width (x->vars[COL_VAR]), c[1].s);
2152 if (var_is_numeric (x->vars[ROW_VAR]))
2153 sprintf (buf, _("For cohort %s = %g"),
2154 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2156 sprintf (buf, _("For cohort %s = %.*s"),
2157 var_get_name (x->vars[ROW_VAR]),
2158 get_var_trimmed_width (x->vars[ROW_VAR]), rows[i - 1].s);
2162 tab_text (risk, 0, 0, TAB_LEFT, buf);
2163 tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL);
2164 tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL);
2165 tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL);
2166 tab_next_row (risk);
2169 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2170 tab_double (risk, 1, 0, TAB_RIGHT, W, wfmt);
2171 tab_next_row (risk);
2173 tab_offset (risk, 0, -1);
2176 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2177 double[N_DIRECTIONAL]);
2179 /* Display directional measures. */
2181 display_directional (void)
2183 static const char *categories[] =
2185 N_("Nominal by Nominal"),
2186 N_("Ordinal by Ordinal"),
2187 N_("Nominal by Interval"),
2190 static const char *stats[] =
2193 N_("Goodman and Kruskal tau"),
2194 N_("Uncertainty Coefficient"),
2199 static const char *types[] =
2206 static const int stats_categories[N_DIRECTIONAL] =
2208 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2211 static const int stats_stats[N_DIRECTIONAL] =
2213 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2216 static const int stats_types[N_DIRECTIONAL] =
2218 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2221 static const int *stats_lookup[] =
2228 static const char **stats_names[] =
2240 double direct_v[N_DIRECTIONAL];
2241 double direct_ase[N_DIRECTIONAL];
2242 double direct_t[N_DIRECTIONAL];
2246 if (!calc_directional (direct_v, direct_ase, direct_t))
2249 tab_offset (direct, nvar - 2, -1);
2251 for (i = 0; i < N_DIRECTIONAL; i++)
2253 if (direct_v[i] == SYSMIS)
2259 for (j = 0; j < 3; j++)
2260 if (last[j] != stats_lookup[j][i])
2263 tab_hline (direct, TAL_1, j, 6, 0);
2268 int k = last[j] = stats_lookup[j][i];
2273 string = var_get_name (x->vars[0]);
2275 string = var_get_name (x->vars[1]);
2277 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2278 gettext (stats_names[j][k]), string);
2283 tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL);
2284 if (direct_ase[i] != SYSMIS)
2285 tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL);
2286 if (direct_t[i] != SYSMIS)
2287 tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL);
2288 /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/
2289 tab_next_row (direct);
2292 tab_offset (direct, 0, -1);
2295 /* Statistical calculations. */
2297 /* Returns the value of the gamma (factorial) function for an integer
2300 gamma_int (double x)
2305 for (i = 2; i < x; i++)
2310 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2312 static inline double
2313 Pr (int a, int b, int c, int d)
2315 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2316 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2317 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2318 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2319 / gamma_int (a + b + c + d + 1.));
2322 /* Swap the contents of A and B. */
2324 swap (int *a, int *b)
2331 /* Calculate significance for Fisher's exact test as specified in
2332 _SPSS Statistical Algorithms_, Appendix 5. */
2334 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2338 if (MIN (c, d) < MIN (a, b))
2339 swap (&a, &c), swap (&b, &d);
2340 if (MIN (b, d) < MIN (a, c))
2341 swap (&a, &b), swap (&c, &d);
2345 swap (&a, &b), swap (&c, &d);
2347 swap (&a, &c), swap (&b, &d);
2351 for (x = 0; x <= a; x++)
2352 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2354 *fisher2 = *fisher1;
2355 for (x = 1; x <= b; x++)
2356 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2359 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2360 columns with values COLS and N_ROWS rows with values ROWS. Values
2361 in the matrix sum to W. */
2363 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2364 double *fisher1, double *fisher2)
2368 chisq[0] = chisq[1] = 0.;
2369 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2370 *fisher1 = *fisher2 = SYSMIS;
2372 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2374 if (ns_rows <= 1 || ns_cols <= 1)
2376 chisq[0] = chisq[1] = SYSMIS;
2380 for (r = 0; r < n_rows; r++)
2381 for (c = 0; c < n_cols; c++)
2383 const double expected = row_tot[r] * col_tot[c] / W;
2384 const double freq = mat[n_cols * r + c];
2385 const double residual = freq - expected;
2387 chisq[0] += residual * residual / expected;
2389 chisq[1] += freq * log (expected / freq);
2400 /* Calculate Yates and Fisher exact test. */
2401 if (ns_cols == 2 && ns_rows == 2)
2403 double f11, f12, f21, f22;
2409 for (i = j = 0; i < n_cols; i++)
2410 if (col_tot[i] != 0.)
2419 f11 = mat[nz_cols[0]];
2420 f12 = mat[nz_cols[1]];
2421 f21 = mat[nz_cols[0] + n_cols];
2422 f22 = mat[nz_cols[1] + n_cols];
2427 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2430 chisq[3] = (W * x * x
2431 / (f11 + f12) / (f21 + f22)
2432 / (f11 + f21) / (f12 + f22));
2440 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2441 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2444 /* Calculate Mantel-Haenszel. */
2445 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2447 double r, ase_0, ase_1;
2448 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2450 chisq[4] = (W - 1.) * r * r;
2455 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2456 ASE_1, and ase_0 into ASE_0. The row and column values must be
2457 passed in X and Y. */
2459 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2461 double SX, SY, S, T;
2463 double sum_XYf, sum_X2Y2f;
2464 double sum_Xr, sum_X2r;
2465 double sum_Yc, sum_Y2c;
2468 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2469 for (j = 0; j < n_cols; j++)
2471 double fij = mat[j + i * n_cols];
2472 double product = X[i] * Y[j];
2473 double temp = fij * product;
2475 sum_X2Y2f += temp * product;
2478 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2480 sum_Xr += X[i] * row_tot[i];
2481 sum_X2r += X[i] * X[i] * row_tot[i];
2485 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2487 sum_Yc += Y[i] * col_tot[i];
2488 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2492 S = sum_XYf - sum_Xr * sum_Yc / W;
2493 SX = sum_X2r - sum_Xr * sum_Xr / W;
2494 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2497 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2502 for (s = c = 0., i = 0; i < n_rows; i++)
2503 for (j = 0; j < n_cols; j++)
2505 double Xresid, Yresid;
2508 Xresid = X[i] - Xbar;
2509 Yresid = Y[j] - Ybar;
2510 temp = (T * Xresid * Yresid
2512 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2513 y = mat[j + i * n_cols] * temp * temp - c;
2518 *ase_1 = sqrt (s) / (T * T);
2522 static double somers_d_v[3];
2523 static double somers_d_ase[3];
2524 static double somers_d_t[3];
2526 /* Calculate symmetric statistics and their asymptotic standard
2527 errors. Returns 0 if none could be calculated. */
2529 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2530 double t[N_SYMMETRIC])
2532 int q = MIN (ns_rows, ns_cols);
2541 for (i = 0; i < N_SYMMETRIC; i++)
2542 v[i] = ase[i] = t[i] = SYSMIS;
2545 /* Phi, Cramer's V, contingency coefficient. */
2546 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2548 double Xp = 0.; /* Pearson chi-square. */
2553 for (r = 0; r < n_rows; r++)
2554 for (c = 0; c < n_cols; c++)
2556 const double expected = row_tot[r] * col_tot[c] / W;
2557 const double freq = mat[n_cols * r + c];
2558 const double residual = freq - expected;
2560 Xp += residual * residual / expected;
2564 if (cmd.a_statistics[CRS_ST_PHI])
2566 v[0] = sqrt (Xp / W);
2567 v[1] = sqrt (Xp / (W * (q - 1)));
2569 if (cmd.a_statistics[CRS_ST_CC])
2570 v[2] = sqrt (Xp / (Xp + W));
2573 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2574 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2579 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2586 for (r = 0; r < n_rows; r++)
2587 Dr -= row_tot[r] * row_tot[r];
2588 for (c = 0; c < n_cols; c++)
2589 Dc -= col_tot[c] * col_tot[c];
2595 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2596 for (c = 0; c < n_cols; c++)
2600 for (r = 0; r < n_rows; r++)
2601 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2611 for (i = 0; i < n_rows; i++)
2615 for (j = 1; j < n_cols; j++)
2616 Cij += col_tot[j] - cum[j + i * n_cols];
2619 for (j = 1; j < n_cols; j++)
2620 Dij += cum[j + (i - 1) * n_cols];
2624 double fij = mat[j + i * n_cols];
2630 assert (j < n_cols);
2632 Cij -= col_tot[j] - cum[j + i * n_cols];
2633 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2637 Cij += cum[j - 1 + (i - 1) * n_cols];
2638 Dij -= cum[j + (i - 1) * n_cols];
2644 if (cmd.a_statistics[CRS_ST_BTAU])
2645 v[3] = (P - Q) / sqrt (Dr * Dc);
2646 if (cmd.a_statistics[CRS_ST_CTAU])
2647 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2648 if (cmd.a_statistics[CRS_ST_GAMMA])
2649 v[5] = (P - Q) / (P + Q);
2651 /* ASE for tau-b, tau-c, gamma. Calculations could be
2652 eliminated here, at expense of memory. */
2657 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2658 for (i = 0; i < n_rows; i++)
2662 for (j = 1; j < n_cols; j++)
2663 Cij += col_tot[j] - cum[j + i * n_cols];
2666 for (j = 1; j < n_cols; j++)
2667 Dij += cum[j + (i - 1) * n_cols];
2671 double fij = mat[j + i * n_cols];
2673 if (cmd.a_statistics[CRS_ST_BTAU])
2675 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2676 + v[3] * (row_tot[i] * Dc
2677 + col_tot[j] * Dr));
2678 btau_cum += fij * temp * temp;
2682 const double temp = Cij - Dij;
2683 ctau_cum += fij * temp * temp;
2686 if (cmd.a_statistics[CRS_ST_GAMMA])
2688 const double temp = Q * Cij - P * Dij;
2689 gamma_cum += fij * temp * temp;
2692 if (cmd.a_statistics[CRS_ST_D])
2694 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2695 - (P - Q) * (W - row_tot[i]));
2696 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2697 - (Q - P) * (W - col_tot[j]));
2702 assert (j < n_cols);
2704 Cij -= col_tot[j] - cum[j + i * n_cols];
2705 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2709 Cij += cum[j - 1 + (i - 1) * n_cols];
2710 Dij -= cum[j + (i - 1) * n_cols];
2716 btau_var = ((btau_cum
2717 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2719 if (cmd.a_statistics[CRS_ST_BTAU])
2721 ase[3] = sqrt (btau_var);
2722 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2725 if (cmd.a_statistics[CRS_ST_CTAU])
2727 ase[4] = ((2 * q / ((q - 1) * W * W))
2728 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2729 t[4] = v[4] / ase[4];
2731 if (cmd.a_statistics[CRS_ST_GAMMA])
2733 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2734 t[5] = v[5] / (2. / (P + Q)
2735 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2737 if (cmd.a_statistics[CRS_ST_D])
2739 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2740 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2741 somers_d_t[0] = (somers_d_v[0]
2743 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2744 somers_d_v[1] = (P - Q) / Dc;
2745 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2746 somers_d_t[1] = (somers_d_v[1]
2748 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2749 somers_d_v[2] = (P - Q) / Dr;
2750 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2751 somers_d_t[2] = (somers_d_v[2]
2753 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2759 /* Spearman correlation, Pearson's r. */
2760 if (cmd.a_statistics[CRS_ST_CORR])
2762 double *R = xmalloca (sizeof *R * n_rows);
2763 double *C = xmalloca (sizeof *C * n_cols);
2766 double y, t, c = 0., s = 0.;
2771 R[i] = s + (row_tot[i] + 1.) / 2.;
2778 assert (i < n_rows);
2783 double y, t, c = 0., s = 0.;
2788 C[j] = s + (col_tot[j] + 1.) / 2;
2795 assert (j < n_cols);
2799 calc_r (R, C, &v[6], &t[6], &ase[6]);
2805 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2809 /* Cohen's kappa. */
2810 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2812 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2815 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2816 i < ns_rows; i++, j++)
2820 while (col_tot[j] == 0.)
2823 prod = row_tot[i] * col_tot[j];
2824 sum = row_tot[i] + col_tot[j];
2826 sum_fii += mat[j + i * n_cols];
2828 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2829 sum_riciri_ci += prod * sum;
2831 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2832 for (j = 0; j < ns_cols; j++)
2834 double sum = row_tot[i] + col_tot[j];
2835 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2838 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2840 ase[8] = sqrt ((W * W * sum_rici
2841 + sum_rici * sum_rici
2842 - W * sum_riciri_ci)
2843 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2845 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2846 / pow2 (W * W - sum_rici))
2847 + ((2. * (W - sum_fii)
2848 * (2. * sum_fii * sum_rici
2849 - W * sum_fiiri_ci))
2850 / cube (W * W - sum_rici))
2851 + (pow2 (W - sum_fii)
2852 * (W * sum_fijri_ci2 - 4.
2853 * sum_rici * sum_rici)
2854 / pow4 (W * W - sum_rici))));
2856 t[8] = v[8] / ase[8];
2863 /* Calculate risk estimate. */
2865 calc_risk (double *value, double *upper, double *lower, union value *c)
2867 double f11, f12, f21, f22;
2873 for (i = 0; i < 3; i++)
2874 value[i] = upper[i] = lower[i] = SYSMIS;
2877 if (ns_rows != 2 || ns_cols != 2)
2884 for (i = j = 0; i < n_cols; i++)
2885 if (col_tot[i] != 0.)
2894 f11 = mat[nz_cols[0]];
2895 f12 = mat[nz_cols[1]];
2896 f21 = mat[nz_cols[0] + n_cols];
2897 f22 = mat[nz_cols[1] + n_cols];
2899 c[0] = cols[nz_cols[0]];
2900 c[1] = cols[nz_cols[1]];
2903 value[0] = (f11 * f22) / (f12 * f21);
2904 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2905 lower[0] = value[0] * exp (-1.960 * v);
2906 upper[0] = value[0] * exp (1.960 * v);
2908 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2909 v = sqrt ((f12 / (f11 * (f11 + f12)))
2910 + (f22 / (f21 * (f21 + f22))));
2911 lower[1] = value[1] * exp (-1.960 * v);
2912 upper[1] = value[1] * exp (1.960 * v);
2914 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2915 v = sqrt ((f11 / (f12 * (f11 + f12)))
2916 + (f21 / (f22 * (f21 + f22))));
2917 lower[2] = value[2] * exp (-1.960 * v);
2918 upper[2] = value[2] * exp (1.960 * v);
2923 /* Calculate directional measures. */
2925 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2926 double t[N_DIRECTIONAL])
2931 for (i = 0; i < N_DIRECTIONAL; i++)
2932 v[i] = ase[i] = t[i] = SYSMIS;
2936 if (cmd.a_statistics[CRS_ST_LAMBDA])
2938 double *fim = xnmalloc (n_rows, sizeof *fim);
2939 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2940 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2941 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2942 double sum_fim, sum_fmj;
2944 int rm_index, cm_index;
2947 /* Find maximum for each row and their sum. */
2948 for (sum_fim = 0., i = 0; i < n_rows; i++)
2950 double max = mat[i * n_cols];
2953 for (j = 1; j < n_cols; j++)
2954 if (mat[j + i * n_cols] > max)
2956 max = mat[j + i * n_cols];
2960 sum_fim += fim[i] = max;
2961 fim_index[i] = index;
2964 /* Find maximum for each column. */
2965 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2967 double max = mat[j];
2970 for (i = 1; i < n_rows; i++)
2971 if (mat[j + i * n_cols] > max)
2973 max = mat[j + i * n_cols];
2977 sum_fmj += fmj[j] = max;
2978 fmj_index[j] = index;
2981 /* Find maximum row total. */
2984 for (i = 1; i < n_rows; i++)
2985 if (row_tot[i] > rm)
2991 /* Find maximum column total. */
2994 for (j = 1; j < n_cols; j++)
2995 if (col_tot[j] > cm)
3001 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3002 v[1] = (sum_fmj - rm) / (W - rm);
3003 v[2] = (sum_fim - cm) / (W - cm);
3005 /* ASE1 for Y given X. */
3009 for (accum = 0., i = 0; i < n_rows; i++)
3010 for (j = 0; j < n_cols; j++)
3012 const int deltaj = j == cm_index;
3013 accum += (mat[j + i * n_cols]
3014 * pow2 ((j == fim_index[i])
3019 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3022 /* ASE0 for Y given X. */
3026 for (accum = 0., i = 0; i < n_rows; i++)
3027 if (cm_index != fim_index[i])
3028 accum += (mat[i * n_cols + fim_index[i]]
3029 + mat[i * n_cols + cm_index]);
3030 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3033 /* ASE1 for X given Y. */
3037 for (accum = 0., i = 0; i < n_rows; i++)
3038 for (j = 0; j < n_cols; j++)
3040 const int deltaj = i == rm_index;
3041 accum += (mat[j + i * n_cols]
3042 * pow2 ((i == fmj_index[j])
3047 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3050 /* ASE0 for X given Y. */
3054 for (accum = 0., j = 0; j < n_cols; j++)
3055 if (rm_index != fmj_index[j])
3056 accum += (mat[j + n_cols * fmj_index[j]]
3057 + mat[j + n_cols * rm_index]);
3058 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3061 /* Symmetric ASE0 and ASE1. */
3066 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3067 for (j = 0; j < n_cols; j++)
3069 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3070 int temp1 = (i == rm_index) + (j == cm_index);
3071 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3072 accum1 += (mat[j + i * n_cols]
3073 * pow2 (temp0 + (v[0] - 1.) * temp1));
3075 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3076 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3077 / (2. * W - rm - cm));
3086 double sum_fij2_ri, sum_fij2_ci;
3087 double sum_ri2, sum_cj2;
3089 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3090 for (j = 0; j < n_cols; j++)
3092 double temp = pow2 (mat[j + i * n_cols]);
3093 sum_fij2_ri += temp / row_tot[i];
3094 sum_fij2_ci += temp / col_tot[j];
3097 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3098 sum_ri2 += row_tot[i] * row_tot[i];
3100 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3101 sum_cj2 += col_tot[j] * col_tot[j];
3103 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3104 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3108 if (cmd.a_statistics[CRS_ST_UC])
3110 double UX, UY, UXY, P;
3111 double ase1_yx, ase1_xy, ase1_sym;
3114 for (UX = 0., i = 0; i < n_rows; i++)
3115 if (row_tot[i] > 0.)
3116 UX -= row_tot[i] / W * log (row_tot[i] / W);
3118 for (UY = 0., j = 0; j < n_cols; j++)
3119 if (col_tot[j] > 0.)
3120 UY -= col_tot[j] / W * log (col_tot[j] / W);
3122 for (UXY = P = 0., i = 0; i < n_rows; i++)
3123 for (j = 0; j < n_cols; j++)
3125 double entry = mat[j + i * n_cols];
3130 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3131 UXY -= entry / W * log (entry / W);
3134 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3135 for (j = 0; j < n_cols; j++)
3137 double entry = mat[j + i * n_cols];
3142 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3143 + (UX - UXY) * log (col_tot[j] / W));
3144 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3145 + (UY - UXY) * log (row_tot[i] / W));
3146 ase1_sym += entry * pow2 ((UXY
3147 * log (row_tot[i] * col_tot[j] / (W * W)))
3148 - (UX + UY) * log (entry / W));
3151 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3152 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3153 t[5] = v[5] / ((2. / (W * (UX + UY)))
3154 * sqrt (P - pow2 (UX + UY - UXY) / W));
3156 v[6] = (UX + UY - UXY) / UX;
3157 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3158 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3160 v[7] = (UX + UY - UXY) / UY;
3161 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3162 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3166 if (cmd.a_statistics[CRS_ST_D])
3171 calc_symmetric (NULL, NULL, NULL);
3172 for (i = 0; i < 3; i++)
3174 v[8 + i] = somers_d_v[i];
3175 ase[8 + i] = somers_d_ase[i];
3176 t[8 + i] = somers_d_t[i];
3181 if (cmd.a_statistics[CRS_ST_ETA])
3184 double sum_Xr, sum_X2r;
3188 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3190 sum_Xr += rows[i].f * row_tot[i];
3191 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3193 SX = sum_X2r - sum_Xr * sum_Xr / W;
3195 for (SXW = 0., j = 0; j < n_cols; j++)
3199 for (cum = 0., i = 0; i < n_rows; i++)
3201 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3202 cum += rows[i].f * mat[j + i * n_cols];
3205 SXW -= cum * cum / col_tot[j];
3207 v[11] = sqrt (1. - SXW / SX);
3211 double sum_Yc, sum_Y2c;
3215 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3217 sum_Yc += cols[i].f * col_tot[i];
3218 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3220 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3222 for (SYW = 0., i = 0; i < n_rows; i++)
3226 for (cum = 0., j = 0; j < n_cols; j++)
3228 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3229 cum += cols[j].f * mat[j + i * n_cols];
3232 SYW -= cum * cum / row_tot[i];
3234 v[12] = sqrt (1. - SYW / SY);
3241 /* A wrapper around data_out() that limits string output to short
3242 string width and null terminates the result. */
3244 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3246 struct fmt_spec fmt_subst;
3248 /* Limit to short string width. */
3249 if (fmt_is_string (fp->type))
3253 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3254 if (fmt_subst.type == FMT_A)
3255 fmt_subst.w = MIN (8, fmt_subst.w);
3257 fmt_subst.w = MIN (16, fmt_subst.w);
3263 data_out (v, fp, s);
3265 /* Null terminate. */