1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (const struct ccase *, const struct dataset *);
181 static void calc_integer (const struct ccase *, const struct dataset *);
182 static void postcalc (const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
197 pool_destroy (pl_tc);
198 pool_destroy (pl_col);
200 for (i = 0; i < nxtab; i++)
207 /* Parses and executes the CROSSTABS procedure. */
209 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
211 struct casegrouper *grouper;
212 struct casereader *input, *group;
220 pl_tc = pool_create ();
221 pl_col = pool_create ();
223 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
226 mode = variables ? INTEGER : GENERAL;
231 cmd.a_cells[CRS_CL_COUNT] = 1;
237 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_COUNT] = 1;
243 cmd.a_cells[CRS_CL_ROW] = 1;
244 cmd.a_cells[CRS_CL_COLUMN] = 1;
245 cmd.a_cells[CRS_CL_TOTAL] = 1;
247 if (cmd.a_cells[CRS_CL_ALL])
249 for (i = 0; i < CRS_CL_count; i++)
251 cmd.a_cells[CRS_CL_ALL] = 0;
253 cmd.a_cells[CRS_CL_NONE] = 0;
255 for (num_cells = i = 0; i < CRS_CL_count; i++)
257 cells[num_cells++] = i;
260 if (cmd.sbc_statistics)
265 for (i = 0; i < CRS_ST_count; i++)
266 if (cmd.a_statistics[i])
269 cmd.a_statistics[CRS_ST_CHISQ] = 1;
270 if (cmd.a_statistics[CRS_ST_ALL])
271 for (i = 0; i < CRS_ST_count; i++)
272 cmd.a_statistics[i] = 1;
276 if (cmd.miss == CRS_REPORT && mode == GENERAL)
278 msg (SE, _("Missing mode REPORT not allowed in general mode. "
279 "Assuming MISSING=TABLE."));
280 cmd.miss = CRS_TABLE;
284 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
285 cmd.a_write[CRS_WR_ALL] = 0;
286 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
288 msg (SE, _("Write mode ALL not allowed in general mode. "
289 "Assuming WRITE=CELLS."));
290 cmd.a_write[CRS_WR_CELLS] = 1;
293 && (cmd.a_write[CRS_WR_NONE]
294 + cmd.a_write[CRS_WR_ALL]
295 + cmd.a_write[CRS_WR_CELLS] == 0))
296 cmd.a_write[CRS_WR_CELLS] = 1;
297 if (cmd.a_write[CRS_WR_CELLS])
298 write_style = CRS_WR_CELLS;
299 else if (cmd.a_write[CRS_WR_ALL])
300 write_style = CRS_WR_ALL;
302 write_style = CRS_WR_NONE;
304 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
306 grouper = casegrouper_create_splits (input, dataset_dict (ds));
307 while (casegrouper_get_next_group (grouper, &group))
313 for (; (c = casereader_read (group)) != NULL; case_unref (c))
316 calc_general (c, ds);
318 calc_integer (c, ds);
320 casereader_destroy (group);
324 ok = casegrouper_destroy (grouper);
325 ok = proc_commit (ds) && ok;
327 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
330 /* Parses the TABLES subcommand. */
332 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
334 struct const_var_set *var_set;
336 const struct variable ***by = NULL;
337 size_t *by_nvar = NULL;
341 /* Ensure that this is a TABLES subcommand. */
342 if (!lex_match_id (lexer, "TABLES")
343 && (lex_token (lexer) != T_ID ||
344 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
345 && lex_token (lexer) != T_ALL)
347 lex_match (lexer, '=');
349 if (variables != NULL)
350 var_set = const_var_set_create_from_array (variables, variables_cnt);
352 var_set = const_var_set_create_from_dict (dataset_dict (ds));
353 assert (var_set != NULL);
357 by = xnrealloc (by, n_by + 1, sizeof *by);
358 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
359 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
360 PV_NO_DUPLICATE | PV_NO_SCRATCH))
362 if (xalloc_oversized (nx, by_nvar[n_by]))
364 msg (SE, _("Too many cross-tabulation variables or dimensions."));
370 if (!lex_match (lexer, T_BY))
374 lex_error (lexer, _("expecting BY"));
383 int *by_iter = xcalloc (n_by, sizeof *by_iter);
386 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
387 for (i = 0; i < nx; i++)
391 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
398 for (i = 0; i < n_by; i++)
399 x->vars[i] = by[i][by_iter[i]];
405 for (i = n_by - 1; i >= 0; i--)
407 if (++by_iter[i] < by_nvar[i])
420 /* All return paths lead here. */
424 for (i = 0; i < n_by; i++)
430 const_var_set_destroy (var_set);
435 /* Parses the VARIABLES subcommand. */
437 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
441 msg (SE, _("VARIABLES must be specified before TABLES."));
445 lex_match (lexer, '=');
449 size_t orig_nv = variables_cnt;
454 if (!parse_variables_const (lexer, dataset_dict (ds),
455 &variables, &variables_cnt,
456 (PV_APPEND | PV_NUMERIC
457 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
460 if (lex_token (lexer) != '(')
462 lex_error (lexer, "expecting `('");
467 if (!lex_force_int (lexer))
469 min = lex_integer (lexer);
472 lex_match (lexer, ',');
474 if (!lex_force_int (lexer))
476 max = lex_integer (lexer);
479 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
485 if (lex_token (lexer) != ')')
487 lex_error (lexer, "expecting `)'");
492 for (i = orig_nv; i < variables_cnt; i++)
494 struct var_range *vr = xmalloc (sizeof *vr);
497 vr->count = max - min + 1;
498 var_attach_aux (variables[i], vr, var_dtor_free);
501 if (lex_token (lexer) == '/')
513 /* Data file processing. */
515 static int compare_table_entry (const void *, const void *, const void *);
516 static unsigned hash_table_entry (const void *, const void *);
518 /* Set up the crosstabulation tables for processing. */
520 precalc (struct casereader *input, const struct dataset *ds)
524 c = casereader_peek (input, 0);
527 output_split_file_values (ds, c);
533 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
543 for (i = 0; i < nxtab; i++)
545 struct crosstab *x = xtab[i];
550 x->ofs = n_sorted_tab;
552 for (j = 2; j < x->nvar; j++)
553 count *= get_var_range (x->vars[j - 2])->count;
555 sorted_tab = xnrealloc (sorted_tab,
556 n_sorted_tab + count, sizeof *sorted_tab);
557 v = xmalloca (sizeof *v * x->nvar);
558 for (j = 2; j < x->nvar; j++)
559 v[j] = get_var_range (x->vars[j])->min;
560 for (j = 0; j < count; j++)
562 struct table_entry *te;
565 te = sorted_tab[n_sorted_tab++]
566 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
570 int row_cnt = get_var_range (x->vars[0])->count;
571 int col_cnt = get_var_range (x->vars[1])->count;
572 const int mat_size = row_cnt * col_cnt;
575 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
576 for (m = 0; m < mat_size; m++)
580 for (k = 2; k < x->nvar; k++)
581 te->values[k].f = v[k];
582 for (k = 2; k < x->nvar; k++)
584 struct var_range *vr = get_var_range (x->vars[k]);
585 if (++v[k] >= vr->max)
594 sorted_tab = xnrealloc (sorted_tab,
595 n_sorted_tab + 1, sizeof *sorted_tab);
596 sorted_tab[n_sorted_tab] = NULL;
601 /* Form crosstabulations for general mode. */
603 calc_general (const struct ccase *c, const struct dataset *ds)
605 /* Missing values to exclude. */
606 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
607 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
611 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
613 /* Flattened current table index. */
616 for (t = 0; t < nxtab; t++)
618 struct crosstab *x = xtab[t];
619 const size_t entry_size = (sizeof (struct table_entry)
620 + sizeof (union value) * (x->nvar - 1));
621 struct table_entry *te = xmalloca (entry_size);
623 /* Construct table entry for the current record and table. */
629 for (j = 0; j < x->nvar; j++)
631 const union value *v = case_data (c, x->vars[j]);
632 if (var_is_value_missing (x->vars[j], v, exclude))
634 x->missing += weight;
638 if (var_is_numeric (x->vars[j]))
639 te->values[j].f = case_num (c, x->vars[j]);
642 size_t n = var_get_width (x->vars[j]);
643 if (n > MAX_SHORT_STRING)
644 n = MAX_SHORT_STRING;
645 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
647 /* Necessary in order to simplify comparisons. */
648 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
649 sizeof (union value) - n);
654 /* Add record to hash table. */
656 struct table_entry **tepp
657 = (struct table_entry **) hsh_probe (gen_tab, te);
660 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
663 memcpy (tep, te, entry_size);
668 (*tepp)->u.freq += weight;
677 calc_integer (const struct ccase *c, const struct dataset *ds)
679 bool bad_warn = true;
682 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
684 /* Flattened current table index. */
687 for (t = 0; t < nxtab; t++)
689 struct crosstab *x = xtab[t];
694 for (i = 0; i < x->nvar; i++)
696 const struct variable *const v = x->vars[i];
697 struct var_range *vr = get_var_range (v);
698 double value = case_num (c, v);
700 /* Note that the first test also rules out SYSMIS. */
701 if ((value < vr->min || value >= vr->max)
702 || (cmd.miss == CRS_TABLE
703 && var_is_num_missing (v, value, MV_USER)))
705 x->missing += weight;
711 ofs += fact * ((int) value - vr->min);
717 const struct variable *row_var = x->vars[ROW_VAR];
718 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
720 const struct variable *col_var = x->vars[COL_VAR];
721 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
723 const int col_dim = get_var_range (col_var)->count;
725 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
732 /* Compare the table_entry's at A and B and return a strcmp()-type
735 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
737 const struct table_entry *a = a_;
738 const struct table_entry *b = b_;
740 if (a->table > b->table)
742 else if (a->table < b->table)
746 const struct crosstab *x = xtab[a->table];
749 for (i = x->nvar - 1; i >= 0; i--)
750 if (var_is_numeric (x->vars[i]))
752 const double diffnum = a->values[i].f - b->values[i].f;
755 else if (diffnum > 0)
760 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
761 var_get_width (x->vars[i]));
770 /* Calculate a hash value from table_entry A. */
772 hash_table_entry (const void *a_, const void *aux UNUSED)
774 const struct table_entry *a = a_;
779 for (i = 0; i < xtab[a->table]->nvar; i++)
780 hash = hash_bytes (&a->values[i], sizeof a->values[i], hash);
785 /* Post-data reading calculations. */
787 static struct table_entry **find_pivot_extent (struct table_entry **,
788 int *cnt, int pivot);
789 static void enum_var_values (struct table_entry **entries, int entry_cnt,
791 union value **values, int *value_cnt);
792 static void output_pivot_table (struct table_entry **, struct table_entry **,
793 const struct dictionary *,
794 double **, double **, double **,
795 int *, int *, int *);
796 static void make_summary_table (const struct dictionary *);
799 postcalc (const struct dataset *ds)
803 n_sorted_tab = hsh_count (gen_tab);
804 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
807 make_summary_table (dataset_dict (ds));
809 /* Identify all the individual crosstabulation tables, and deal with
812 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
813 int pc = n_sorted_tab; /* Pivot count. */
815 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
816 int maxrows = 0, maxcols = 0, maxcells = 0;
820 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
824 output_pivot_table (pb, pe, dataset_dict (ds),
825 &mat, &row_tot, &col_tot,
826 &maxrows, &maxcols, &maxcells);
835 hsh_destroy (gen_tab);
839 for (i = 0; i < n_sorted_tab; i++)
841 free (sorted_tab[i]->u.data);
842 free (sorted_tab[i]);
848 static void insert_summary (struct tab_table *, int tab_index,
849 const struct dictionary *,
852 /* Output a table summarizing the cases processed. */
854 make_summary_table (const struct dictionary *dict)
856 struct tab_table *summary;
858 struct table_entry **pb = sorted_tab, **pe;
859 int pc = n_sorted_tab;
862 summary = tab_create (7, 3 + nxtab, 1);
863 tab_title (summary, _("Summary."));
864 tab_headers (summary, 1, 0, 3, 0);
865 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
866 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
867 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
868 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
869 tab_hline (summary, TAL_1, 1, 6, 1);
870 tab_hline (summary, TAL_1, 1, 6, 2);
871 tab_vline (summary, TAL_1, 3, 1, 1);
872 tab_vline (summary, TAL_1, 5, 1, 1);
876 for (i = 0; i < 3; i++)
878 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
879 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
882 tab_offset (summary, 0, 3);
888 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
892 while (cur_tab < (*pb)->table)
893 insert_summary (summary, cur_tab++, dict, 0.);
896 for (valid = 0.; pb < pe; pb++)
897 valid += (*pb)->u.freq;
900 const struct crosstab *const x = xtab[(*pb)->table];
901 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
902 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
903 const int count = n_cols * n_rows;
905 for (valid = 0.; pb < pe; pb++)
907 const double *data = (*pb)->u.data;
910 for (i = 0; i < count; i++)
914 insert_summary (summary, cur_tab++, dict, valid);
919 while (cur_tab < nxtab)
920 insert_summary (summary, cur_tab++, dict, 0.);
925 /* Inserts a line into T describing the crosstabulation at index
926 TAB_INDEX, which has VALID valid observations. */
928 insert_summary (struct tab_table *t, int tab_index,
929 const struct dictionary *dict,
932 struct crosstab *x = xtab[tab_index];
934 const struct variable *wv = dict_get_weight (dict);
935 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
937 tab_hline (t, TAL_1, 0, 6, 0);
939 /* Crosstabulation name. */
941 char *buf = xmalloca (128 * x->nvar);
945 for (i = 0; i < x->nvar; i++)
948 cp = stpcpy (cp, " * ");
950 cp = stpcpy (cp, var_to_string (x->vars[i]));
952 tab_text (t, 0, 0, TAB_LEFT, buf);
957 /* Counts and percentages. */
967 for (i = 0; i < 3; i++)
969 tab_double (t, i * 2 + 1, 0, TAB_RIGHT, n[i], wfmt);
970 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
981 static struct tab_table *table; /* Crosstabulation table. */
982 static struct tab_table *chisq; /* Chi-square table. */
983 static struct tab_table *sym; /* Symmetric measures table. */
984 static struct tab_table *risk; /* Risk estimate table. */
985 static struct tab_table *direct; /* Directional measures table. */
988 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
990 /* Column values, number of columns. */
991 static union value *cols;
994 /* Row values, number of rows. */
995 static union value *rows;
998 /* Number of statistically interesting columns/rows (columns/rows with
1000 static int ns_cols, ns_rows;
1002 /* Crosstabulation. */
1003 static const struct crosstab *x;
1005 /* Number of variables from the crosstabulation to consider. This is
1006 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1009 /* Matrix contents. */
1010 static double *mat; /* Matrix proper. */
1011 static double *row_tot; /* Row totals. */
1012 static double *col_tot; /* Column totals. */
1013 static double W; /* Grand total. */
1015 static void display_dimensions (struct tab_table *, int first_difference,
1016 struct table_entry *);
1017 static void display_crosstabulation (void);
1018 static void display_chisq (const struct dictionary *);
1019 static void display_symmetric (const struct dictionary *);
1020 static void display_risk (const struct dictionary *);
1021 static void display_directional (void);
1022 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1023 static void table_value_missing (struct tab_table *table, int c, int r,
1024 unsigned char opt, const union value *v,
1025 const struct variable *var);
1026 static void delete_missing (void);
1028 /* Output pivot table beginning at PB and continuing until PE,
1029 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1030 hold *MAXROWS entries. */
1032 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1033 const struct dictionary *dict,
1034 double **matp, double **row_totp, double **col_totp,
1035 int *maxrows, int *maxcols, int *maxcells)
1038 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1039 int tc = pe - pb; /* Table count. */
1041 /* Table entry for header comparison. */
1042 struct table_entry *cmp = NULL;
1044 x = xtab[(*pb)->table];
1045 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1047 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1049 /* Crosstabulation table initialization. */
1052 table = tab_create (nvar + n_cols,
1053 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1054 tab_headers (table, nvar - 1, 0, 2, 0);
1056 /* First header line. */
1057 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1058 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1060 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1062 /* Second header line. */
1066 for (i = 2; i < nvar; i++)
1067 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1068 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1069 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1070 var_get_name (x->vars[ROW_VAR]));
1071 for (i = 0; i < n_cols; i++)
1072 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1074 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1077 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1078 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1082 char *title = xmalloca (x->nvar * 64 + 128);
1086 if (cmd.pivot == CRS_PIVOT)
1087 for (i = 0; i < nvar; i++)
1090 cp = stpcpy (cp, " by ");
1091 cp = stpcpy (cp, var_get_name (x->vars[i]));
1095 cp = spprintf (cp, "%s by %s for",
1096 var_get_name (x->vars[0]),
1097 var_get_name (x->vars[1]));
1098 for (i = 2; i < nvar; i++)
1100 char buf[64], *bufp;
1105 cp = stpcpy (cp, var_get_name (x->vars[i]));
1107 format_short (buf, var_get_print_format (x->vars[i]),
1109 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1111 cp = stpcpy (cp, bufp);
1115 cp = stpcpy (cp, " [");
1116 for (i = 0; i < num_cells; i++)
1124 static const struct tuple cell_names[] =
1126 {CRS_CL_COUNT, N_("count")},
1127 {CRS_CL_ROW, N_("row %")},
1128 {CRS_CL_COLUMN, N_("column %")},
1129 {CRS_CL_TOTAL, N_("total %")},
1130 {CRS_CL_EXPECTED, N_("expected")},
1131 {CRS_CL_RESIDUAL, N_("residual")},
1132 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1133 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1137 const struct tuple *t;
1139 for (t = cell_names; t->value != cells[i]; t++)
1140 assert (t->value != -1);
1142 cp = stpcpy (cp, ", ");
1143 cp = stpcpy (cp, gettext (t->name));
1147 tab_title (table, "%s", title);
1151 tab_offset (table, 0, 2);
1156 /* Chi-square table initialization. */
1157 if (cmd.a_statistics[CRS_ST_CHISQ])
1159 chisq = tab_create (6 + (nvar - 2),
1160 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1161 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1163 tab_title (chisq, _("Chi-square tests."));
1165 tab_offset (chisq, nvar - 2, 0);
1166 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1167 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1168 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1169 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1170 _("Asymp. Sig. (2-sided)"));
1171 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1172 _("Exact. Sig. (2-sided)"));
1173 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1174 _("Exact. Sig. (1-sided)"));
1176 tab_offset (chisq, 0, 1);
1181 /* Symmetric measures. */
1182 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1183 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1184 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1185 || cmd.a_statistics[CRS_ST_KAPPA])
1187 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1188 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1189 tab_title (sym, _("Symmetric measures."));
1191 tab_offset (sym, nvar - 2, 0);
1192 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1193 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1194 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1195 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1196 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1197 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1198 tab_offset (sym, 0, 1);
1203 /* Risk estimate. */
1204 if (cmd.a_statistics[CRS_ST_RISK])
1206 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1207 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1208 tab_title (risk, _("Risk estimate."));
1210 tab_offset (risk, nvar - 2, 0);
1211 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1212 _("95%% Confidence Interval"));
1213 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1214 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1215 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1216 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1217 tab_hline (risk, TAL_1, 2, 3, 1);
1218 tab_vline (risk, TAL_1, 2, 0, 1);
1219 tab_offset (risk, 0, 2);
1224 /* Directional measures. */
1225 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1226 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1228 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1229 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1230 tab_title (direct, _("Directional measures."));
1232 tab_offset (direct, nvar - 2, 0);
1233 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1234 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1235 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1236 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1237 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1238 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1239 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1240 tab_offset (direct, 0, 1);
1247 /* Find pivot subtable if applicable. */
1248 te = find_pivot_extent (tb, &tc, 0);
1252 /* Find all the row variable values. */
1253 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1255 /* Allocate memory space for the column and row totals. */
1256 if (n_rows > *maxrows)
1258 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1259 row_tot = *row_totp;
1262 if (n_cols > *maxcols)
1264 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1265 col_tot = *col_totp;
1269 /* Allocate table space for the matrix. */
1270 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1271 tab_realloc (table, -1,
1272 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1273 tab_nr (table) * (pe - pb) / (te - tb)));
1275 if (mode == GENERAL)
1277 /* Allocate memory space for the matrix. */
1278 if (n_cols * n_rows > *maxcells)
1280 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1281 *maxcells = n_cols * n_rows;
1286 /* Build the matrix and calculate column totals. */
1288 union value *cur_col = cols;
1289 union value *cur_row = rows;
1291 double *cp = col_tot;
1292 struct table_entry **p;
1295 for (p = &tb[0]; p < te; p++)
1297 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1301 for (; cur_row < &rows[n_rows]; cur_row++)
1307 mp = &mat[cur_col - cols];
1310 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1317 *cp += *mp = (*p)->u.freq;
1322 /* Zero out the rest of the matrix. */
1323 for (; cur_row < &rows[n_rows]; cur_row++)
1329 if (cur_col < &cols[n_cols])
1331 const int rem_cols = n_cols - (cur_col - cols);
1334 for (c = 0; c < rem_cols; c++)
1336 mp = &mat[cur_col - cols];
1337 for (r = 0; r < n_rows; r++)
1339 for (c = 0; c < rem_cols; c++)
1341 mp += n_cols - rem_cols;
1349 double *tp = col_tot;
1351 assert (mode == INTEGER);
1352 mat = (*tb)->u.data;
1355 /* Calculate column totals. */
1356 for (c = 0; c < n_cols; c++)
1359 double *cp = &mat[c];
1361 for (r = 0; r < n_rows; r++)
1362 cum += cp[r * n_cols];
1370 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1371 ns_cols += *cp != 0.;
1374 /* Calculate row totals. */
1377 double *rp = row_tot;
1380 for (ns_rows = 0, r = n_rows; r--; )
1383 for (c = n_cols; c--; )
1391 /* Calculate grand total. */
1397 if (n_rows < n_cols)
1398 tp = row_tot, n = n_rows;
1400 tp = col_tot, n = n_cols;
1406 /* Find the first variable that differs from the last subtable,
1407 then display the values of the dimensioning variables for
1408 each table that needs it. */
1410 int first_difference = nvar - 1;
1413 for (; ; first_difference--)
1415 assert (first_difference >= 2);
1416 if (memcmp (&cmp->values[first_difference],
1417 &(*tb)->values[first_difference],
1418 sizeof *cmp->values))
1424 display_dimensions (table, first_difference, *tb);
1426 display_dimensions (chisq, first_difference, *tb);
1428 display_dimensions (sym, first_difference, *tb);
1430 display_dimensions (risk, first_difference, *tb);
1432 display_dimensions (direct, first_difference, *tb);
1436 display_crosstabulation ();
1437 if (cmd.miss == CRS_REPORT)
1440 display_chisq (dict);
1442 display_symmetric (dict);
1444 display_risk (dict);
1446 display_directional ();
1457 tab_resize (chisq, 4 + (nvar - 2), -1);
1468 /* Delete missing rows and columns for statistical analysis when
1471 delete_missing (void)
1476 for (r = 0; r < n_rows; r++)
1477 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1481 for (c = 0; c < n_cols; c++)
1482 mat[c + r * n_cols] = 0.;
1490 for (c = 0; c < n_cols; c++)
1491 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1495 for (r = 0; r < n_rows; r++)
1496 mat[c + r * n_cols] = 0.;
1502 /* Prepare table T for submission, and submit it. */
1504 submit (struct tab_table *t)
1511 tab_resize (t, -1, 0);
1512 if (tab_nr (t) == tab_t (t))
1517 tab_offset (t, 0, 0);
1519 for (i = 2; i < nvar; i++)
1520 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1521 var_to_string (x->vars[i]));
1522 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1523 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1525 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1527 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1528 tab_dim (t, crosstabs_dim);
1532 /* Sets the widths of all the columns and heights of all the rows in
1533 table T for driver D. */
1535 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1539 /* Width of a numerical column. */
1540 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1541 if (cmd.miss == CRS_REPORT)
1542 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1544 /* Set width for header columns. */
1550 w = d->width - c * (t->nc - t->l);
1551 for (i = 0; i <= t->nc; i++)
1555 if (w < d->prop_em_width * 8)
1556 w = d->prop_em_width * 8;
1558 if (w > d->prop_em_width * 15)
1559 w = d->prop_em_width * 15;
1561 for (i = 0; i < t->l; i++)
1565 for (i = t->l; i < t->nc; i++)
1568 for (i = 0; i < t->nr; i++)
1569 t->h[i] = tab_natural_height (t, d, i);
1572 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1573 int *cnt, int pivot);
1574 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1575 int *cnt, int pivot);
1577 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1579 static struct table_entry **
1580 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1582 return (mode == GENERAL
1583 ? find_pivot_extent_general (tp, cnt, pivot)
1584 : find_pivot_extent_integer (tp, cnt, pivot));
1587 /* Find the extent of a region in TP that contains one table. If
1588 PIVOT != 0 that means a set of table entries with identical table
1589 number; otherwise they also have to have the same values for every
1590 dimension after the row and column dimensions. The table that is
1591 searched starts at TP and has length CNT. Returns the first entry
1592 after the last one in the table; sets *CNT to the number of
1593 remaining values. If there are no entries in TP at all, returns
1594 NULL. A yucky interface, admittedly, but it works. */
1595 static struct table_entry **
1596 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1598 struct table_entry *fp = *tp;
1603 x = xtab[(*tp)->table];
1611 if ((*tp)->table != fp->table)
1616 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1623 /* Integer mode correspondent to find_pivot_extent_general(). This
1624 could be optimized somewhat, but I just don't give a crap about
1625 CROSSTABS performance in integer mode, which is just a
1626 CROSSTABS wart as far as I'm concerned.
1628 That said, feel free to send optimization patches to me. */
1629 static struct table_entry **
1630 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1632 struct table_entry *fp = *tp;
1637 x = xtab[(*tp)->table];
1645 if ((*tp)->table != fp->table)
1650 if (memcmp (&(*tp)->values[2], &fp->values[2],
1651 sizeof (union value) * (x->nvar - 2)))
1658 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1659 result. WIDTH_ points to an int which is either 0 for a
1660 numeric value or a string width for a string value. */
1662 compare_value (const void *a_, const void *b_, const void *width_)
1664 const union value *a = a_;
1665 const union value *b = b_;
1666 const int *pwidth = width_;
1667 const int width = *pwidth;
1670 return (a->f < b->f) ? -1 : (a->f > b->f);
1672 return strncmp (a->s, b->s, width);
1675 /* Given an array of ENTRY_CNT table_entry structures starting at
1676 ENTRIES, creates a sorted list of the values that the variable
1677 with index VAR_IDX takes on. The values are returned as a
1678 malloc()'darray stored in *VALUES, with the number of values
1679 stored in *VALUE_CNT.
1682 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1683 union value **values, int *value_cnt)
1685 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1687 if (mode == GENERAL)
1689 int width = MIN (var_get_width (v), MAX_SHORT_STRING);
1692 *values = xnmalloc (entry_cnt, sizeof **values);
1693 for (i = 0; i < entry_cnt; i++)
1694 (*values)[i] = entries[i]->values[var_idx];
1695 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1696 compare_value, &width);
1700 struct var_range *vr = get_var_range (v);
1703 assert (mode == INTEGER);
1704 *values = xnmalloc (vr->count, sizeof **values);
1705 for (i = 0; i < vr->count; i++)
1706 (*values)[i].f = i + vr->min;
1707 *value_cnt = vr->count;
1711 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1712 from V, displayed with print format spec from variable VAR. When
1713 in REPORT missing-value mode, missing values have an M appended. */
1715 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1716 const union value *v, const struct variable *var)
1719 const struct fmt_spec *print = var_get_print_format (var);
1721 const char *label = var_lookup_value_label (var, v);
1724 tab_text (table, c, r, TAB_LEFT, label);
1728 s.string = tab_alloc (table, print->w);
1729 format_short (s.string, print, v);
1730 s.length = strlen (s.string);
1731 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1732 s.string[s.length++] = 'M';
1733 while (s.length && *s.string == ' ')
1738 tab_raw (table, c, r, opt, &s);
1741 /* Draws a line across TABLE at the current row to indicate the most
1742 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1743 that changed, and puts the values that changed into the table. TB
1744 and X must be the corresponding table_entry and crosstab,
1747 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1749 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1751 for (; first_difference >= 2; first_difference--)
1752 table_value_missing (table, nvar - first_difference - 1, 0,
1753 TAB_RIGHT, &tb->values[first_difference],
1754 x->vars[first_difference]);
1757 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1758 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1759 additionally suffixed with a letter `M'. */
1761 format_cell_entry (struct tab_table *table, int c, int r, double value,
1762 char suffix, bool mark_missing)
1764 const struct fmt_spec f = {FMT_F, 10, 1};
1769 s.string = tab_alloc (table, 16);
1771 data_out (&v, &f, s.string);
1772 while (*s.string == ' ')
1778 s.string[s.length++] = suffix;
1780 s.string[s.length++] = 'M';
1782 tab_raw (table, c, r, TAB_RIGHT, &s);
1785 /* Displays the crosstabulation table. */
1787 display_crosstabulation (void)
1792 for (r = 0; r < n_rows; r++)
1793 table_value_missing (table, nvar - 2, r * num_cells,
1794 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1796 tab_text (table, nvar - 2, n_rows * num_cells,
1797 TAB_LEFT, _("Total"));
1799 /* Put in the actual cells. */
1804 tab_offset (table, nvar - 1, -1);
1805 for (r = 0; r < n_rows; r++)
1808 tab_hline (table, TAL_1, -1, n_cols, 0);
1809 for (c = 0; c < n_cols; c++)
1811 bool mark_missing = false;
1812 double expected_value = row_tot[r] * col_tot[c] / W;
1813 if (cmd.miss == CRS_REPORT
1814 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1815 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1817 mark_missing = true;
1818 for (i = 0; i < num_cells; i++)
1829 v = *mp / row_tot[r] * 100.;
1833 v = *mp / col_tot[c] * 100.;
1840 case CRS_CL_EXPECTED:
1843 case CRS_CL_RESIDUAL:
1844 v = *mp - expected_value;
1846 case CRS_CL_SRESIDUAL:
1847 v = (*mp - expected_value) / sqrt (expected_value);
1849 case CRS_CL_ASRESIDUAL:
1850 v = ((*mp - expected_value)
1851 / sqrt (expected_value
1852 * (1. - row_tot[r] / W)
1853 * (1. - col_tot[c] / W)));
1859 format_cell_entry (table, c, i, v, suffix, mark_missing);
1865 tab_offset (table, -1, tab_row (table) + num_cells);
1873 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1874 for (r = 0; r < n_rows; r++)
1876 bool mark_missing = false;
1878 if (cmd.miss == CRS_REPORT
1879 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1880 mark_missing = true;
1882 for (i = 0; i < num_cells; i++)
1897 v = row_tot[r] / W * 100.;
1901 v = row_tot[r] / W * 100.;
1904 case CRS_CL_EXPECTED:
1905 case CRS_CL_RESIDUAL:
1906 case CRS_CL_SRESIDUAL:
1907 case CRS_CL_ASRESIDUAL:
1914 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1915 tab_next_row (table);
1920 /* Column totals, grand total. */
1926 tab_hline (table, TAL_1, -1, n_cols, 0);
1927 for (c = 0; c <= n_cols; c++)
1929 double ct = c < n_cols ? col_tot[c] : W;
1930 bool mark_missing = false;
1933 if (cmd.miss == CRS_REPORT && c < n_cols
1934 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1935 mark_missing = true;
1937 for (i = 0; i < num_cells; i++)
1959 case CRS_CL_EXPECTED:
1960 case CRS_CL_RESIDUAL:
1961 case CRS_CL_SRESIDUAL:
1962 case CRS_CL_ASRESIDUAL:
1968 format_cell_entry (table, c, i, v, suffix, mark_missing);
1973 tab_offset (table, -1, tab_row (table) + last_row);
1976 tab_offset (table, 0, -1);
1979 static void calc_r (double *X, double *Y, double *, double *, double *);
1980 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1982 /* Display chi-square statistics. */
1984 display_chisq (const struct dictionary *dict)
1986 const struct variable *wv = dict_get_weight (dict);
1987 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1989 static const char *chisq_stats[N_CHISQ] =
1991 N_("Pearson Chi-Square"),
1992 N_("Likelihood Ratio"),
1993 N_("Fisher's Exact Test"),
1994 N_("Continuity Correction"),
1995 N_("Linear-by-Linear Association"),
1997 double chisq_v[N_CHISQ];
1998 double fisher1, fisher2;
2004 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2006 tab_offset (chisq, nvar - 2, -1);
2008 for (i = 0; i < N_CHISQ; i++)
2010 if ((i != 2 && chisq_v[i] == SYSMIS)
2011 || (i == 2 && fisher1 == SYSMIS))
2015 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2018 tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL);
2019 tab_double (chisq, 2, 0, TAB_RIGHT, df[i], wfmt);
2020 tab_double (chisq, 3, 0, TAB_RIGHT,
2021 gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL);
2026 tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL);
2027 tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL);
2029 tab_next_row (chisq);
2032 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2033 tab_double (chisq, 1, 0, TAB_RIGHT, W, wfmt);
2034 tab_next_row (chisq);
2036 tab_offset (chisq, 0, -1);
2039 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2040 double[N_SYMMETRIC]);
2042 /* Display symmetric measures. */
2044 display_symmetric (const struct dictionary *dict)
2046 const struct variable *wv = dict_get_weight (dict);
2047 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2049 static const char *categories[] =
2051 N_("Nominal by Nominal"),
2052 N_("Ordinal by Ordinal"),
2053 N_("Interval by Interval"),
2054 N_("Measure of Agreement"),
2057 static const char *stats[N_SYMMETRIC] =
2061 N_("Contingency Coefficient"),
2062 N_("Kendall's tau-b"),
2063 N_("Kendall's tau-c"),
2065 N_("Spearman Correlation"),
2070 static const int stats_categories[N_SYMMETRIC] =
2072 0, 0, 0, 1, 1, 1, 1, 2, 3,
2076 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2079 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2082 tab_offset (sym, nvar - 2, -1);
2084 for (i = 0; i < N_SYMMETRIC; i++)
2086 if (sym_v[i] == SYSMIS)
2089 if (stats_categories[i] != last_cat)
2091 last_cat = stats_categories[i];
2092 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2095 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2096 tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL);
2097 if (sym_ase[i] != SYSMIS)
2098 tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL);
2099 if (sym_t[i] != SYSMIS)
2100 tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL);
2101 /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/
2105 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2106 tab_double (sym, 2, 0, TAB_RIGHT, W, wfmt);
2109 tab_offset (sym, 0, -1);
2112 static int calc_risk (double[], double[], double[], union value *);
2114 /* Display risk estimate. */
2116 display_risk (const struct dictionary *dict)
2118 const struct variable *wv = dict_get_weight (dict);
2119 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2122 double risk_v[3], lower[3], upper[3];
2126 if (!calc_risk (risk_v, upper, lower, c))
2129 tab_offset (risk, nvar - 2, -1);
2131 for (i = 0; i < 3; i++)
2133 if (risk_v[i] == SYSMIS)
2139 if (var_is_numeric (x->vars[COL_VAR]))
2140 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2141 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2143 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2144 var_get_name (x->vars[COL_VAR]),
2145 var_get_width (x->vars[COL_VAR]), c[0].s,
2146 var_get_width (x->vars[COL_VAR]), c[1].s);
2150 if (var_is_numeric (x->vars[ROW_VAR]))
2151 sprintf (buf, _("For cohort %s = %g"),
2152 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2154 sprintf (buf, _("For cohort %s = %.*s"),
2155 var_get_name (x->vars[ROW_VAR]),
2156 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2160 tab_text (risk, 0, 0, TAB_LEFT, buf);
2161 tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL);
2162 tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL);
2163 tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL);
2164 tab_next_row (risk);
2167 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2168 tab_double (risk, 1, 0, TAB_RIGHT, W, wfmt);
2169 tab_next_row (risk);
2171 tab_offset (risk, 0, -1);
2174 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2175 double[N_DIRECTIONAL]);
2177 /* Display directional measures. */
2179 display_directional (void)
2181 static const char *categories[] =
2183 N_("Nominal by Nominal"),
2184 N_("Ordinal by Ordinal"),
2185 N_("Nominal by Interval"),
2188 static const char *stats[] =
2191 N_("Goodman and Kruskal tau"),
2192 N_("Uncertainty Coefficient"),
2197 static const char *types[] =
2204 static const int stats_categories[N_DIRECTIONAL] =
2206 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2209 static const int stats_stats[N_DIRECTIONAL] =
2211 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2214 static const int stats_types[N_DIRECTIONAL] =
2216 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2219 static const int *stats_lookup[] =
2226 static const char **stats_names[] =
2238 double direct_v[N_DIRECTIONAL];
2239 double direct_ase[N_DIRECTIONAL];
2240 double direct_t[N_DIRECTIONAL];
2244 if (!calc_directional (direct_v, direct_ase, direct_t))
2247 tab_offset (direct, nvar - 2, -1);
2249 for (i = 0; i < N_DIRECTIONAL; i++)
2251 if (direct_v[i] == SYSMIS)
2257 for (j = 0; j < 3; j++)
2258 if (last[j] != stats_lookup[j][i])
2261 tab_hline (direct, TAL_1, j, 6, 0);
2266 int k = last[j] = stats_lookup[j][i];
2271 string = var_get_name (x->vars[0]);
2273 string = var_get_name (x->vars[1]);
2275 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2276 gettext (stats_names[j][k]), string);
2281 tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL);
2282 if (direct_ase[i] != SYSMIS)
2283 tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL);
2284 if (direct_t[i] != SYSMIS)
2285 tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL);
2286 /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/
2287 tab_next_row (direct);
2290 tab_offset (direct, 0, -1);
2293 /* Statistical calculations. */
2295 /* Returns the value of the gamma (factorial) function for an integer
2298 gamma_int (double x)
2303 for (i = 2; i < x; i++)
2308 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2310 static inline double
2311 Pr (int a, int b, int c, int d)
2313 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2314 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2315 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2316 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2317 / gamma_int (a + b + c + d + 1.));
2320 /* Swap the contents of A and B. */
2322 swap (int *a, int *b)
2329 /* Calculate significance for Fisher's exact test as specified in
2330 _SPSS Statistical Algorithms_, Appendix 5. */
2332 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2336 if (MIN (c, d) < MIN (a, b))
2337 swap (&a, &c), swap (&b, &d);
2338 if (MIN (b, d) < MIN (a, c))
2339 swap (&a, &b), swap (&c, &d);
2343 swap (&a, &b), swap (&c, &d);
2345 swap (&a, &c), swap (&b, &d);
2349 for (x = 0; x <= a; x++)
2350 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2352 *fisher2 = *fisher1;
2353 for (x = 1; x <= b; x++)
2354 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2357 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2358 columns with values COLS and N_ROWS rows with values ROWS. Values
2359 in the matrix sum to W. */
2361 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2362 double *fisher1, double *fisher2)
2366 chisq[0] = chisq[1] = 0.;
2367 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2368 *fisher1 = *fisher2 = SYSMIS;
2370 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2372 if (ns_rows <= 1 || ns_cols <= 1)
2374 chisq[0] = chisq[1] = SYSMIS;
2378 for (r = 0; r < n_rows; r++)
2379 for (c = 0; c < n_cols; c++)
2381 const double expected = row_tot[r] * col_tot[c] / W;
2382 const double freq = mat[n_cols * r + c];
2383 const double residual = freq - expected;
2385 chisq[0] += residual * residual / expected;
2387 chisq[1] += freq * log (expected / freq);
2398 /* Calculate Yates and Fisher exact test. */
2399 if (ns_cols == 2 && ns_rows == 2)
2401 double f11, f12, f21, f22;
2407 for (i = j = 0; i < n_cols; i++)
2408 if (col_tot[i] != 0.)
2417 f11 = mat[nz_cols[0]];
2418 f12 = mat[nz_cols[1]];
2419 f21 = mat[nz_cols[0] + n_cols];
2420 f22 = mat[nz_cols[1] + n_cols];
2425 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2428 chisq[3] = (W * x * x
2429 / (f11 + f12) / (f21 + f22)
2430 / (f11 + f21) / (f12 + f22));
2438 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2439 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2442 /* Calculate Mantel-Haenszel. */
2443 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2445 double r, ase_0, ase_1;
2446 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2448 chisq[4] = (W - 1.) * r * r;
2453 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2454 ASE_1, and ase_0 into ASE_0. The row and column values must be
2455 passed in X and Y. */
2457 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2459 double SX, SY, S, T;
2461 double sum_XYf, sum_X2Y2f;
2462 double sum_Xr, sum_X2r;
2463 double sum_Yc, sum_Y2c;
2466 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2467 for (j = 0; j < n_cols; j++)
2469 double fij = mat[j + i * n_cols];
2470 double product = X[i] * Y[j];
2471 double temp = fij * product;
2473 sum_X2Y2f += temp * product;
2476 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2478 sum_Xr += X[i] * row_tot[i];
2479 sum_X2r += pow2 (X[i]) * row_tot[i];
2483 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2485 sum_Yc += Y[i] * col_tot[i];
2486 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2490 S = sum_XYf - sum_Xr * sum_Yc / W;
2491 SX = sum_X2r - pow2 (sum_Xr) / W;
2492 SY = sum_Y2c - pow2 (sum_Yc) / W;
2495 *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c));
2500 for (s = c = 0., i = 0; i < n_rows; i++)
2501 for (j = 0; j < n_cols; j++)
2503 double Xresid, Yresid;
2506 Xresid = X[i] - Xbar;
2507 Yresid = Y[j] - Ybar;
2508 temp = (T * Xresid * Yresid
2510 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2511 y = mat[j + i * n_cols] * temp * temp - c;
2516 *ase_1 = sqrt (s) / (T * T);
2520 static double somers_d_v[3];
2521 static double somers_d_ase[3];
2522 static double somers_d_t[3];
2524 /* Calculate symmetric statistics and their asymptotic standard
2525 errors. Returns 0 if none could be calculated. */
2527 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2528 double t[N_SYMMETRIC])
2530 int q = MIN (ns_rows, ns_cols);
2539 for (i = 0; i < N_SYMMETRIC; i++)
2540 v[i] = ase[i] = t[i] = SYSMIS;
2543 /* Phi, Cramer's V, contingency coefficient. */
2544 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2546 double Xp = 0.; /* Pearson chi-square. */
2551 for (r = 0; r < n_rows; r++)
2552 for (c = 0; c < n_cols; c++)
2554 const double expected = row_tot[r] * col_tot[c] / W;
2555 const double freq = mat[n_cols * r + c];
2556 const double residual = freq - expected;
2558 Xp += residual * residual / expected;
2562 if (cmd.a_statistics[CRS_ST_PHI])
2564 v[0] = sqrt (Xp / W);
2565 v[1] = sqrt (Xp / (W * (q - 1)));
2567 if (cmd.a_statistics[CRS_ST_CC])
2568 v[2] = sqrt (Xp / (Xp + W));
2571 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2572 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2577 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2584 for (r = 0; r < n_rows; r++)
2585 Dr -= pow2 (row_tot[r]);
2586 for (c = 0; c < n_cols; c++)
2587 Dc -= pow2 (col_tot[c]);
2593 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2594 for (c = 0; c < n_cols; c++)
2598 for (r = 0; r < n_rows; r++)
2599 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2609 for (i = 0; i < n_rows; i++)
2613 for (j = 1; j < n_cols; j++)
2614 Cij += col_tot[j] - cum[j + i * n_cols];
2617 for (j = 1; j < n_cols; j++)
2618 Dij += cum[j + (i - 1) * n_cols];
2622 double fij = mat[j + i * n_cols];
2628 assert (j < n_cols);
2630 Cij -= col_tot[j] - cum[j + i * n_cols];
2631 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2635 Cij += cum[j - 1 + (i - 1) * n_cols];
2636 Dij -= cum[j + (i - 1) * n_cols];
2642 if (cmd.a_statistics[CRS_ST_BTAU])
2643 v[3] = (P - Q) / sqrt (Dr * Dc);
2644 if (cmd.a_statistics[CRS_ST_CTAU])
2645 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2646 if (cmd.a_statistics[CRS_ST_GAMMA])
2647 v[5] = (P - Q) / (P + Q);
2649 /* ASE for tau-b, tau-c, gamma. Calculations could be
2650 eliminated here, at expense of memory. */
2655 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2656 for (i = 0; i < n_rows; i++)
2660 for (j = 1; j < n_cols; j++)
2661 Cij += col_tot[j] - cum[j + i * n_cols];
2664 for (j = 1; j < n_cols; j++)
2665 Dij += cum[j + (i - 1) * n_cols];
2669 double fij = mat[j + i * n_cols];
2671 if (cmd.a_statistics[CRS_ST_BTAU])
2673 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2674 + v[3] * (row_tot[i] * Dc
2675 + col_tot[j] * Dr));
2676 btau_cum += fij * temp * temp;
2680 const double temp = Cij - Dij;
2681 ctau_cum += fij * temp * temp;
2684 if (cmd.a_statistics[CRS_ST_GAMMA])
2686 const double temp = Q * Cij - P * Dij;
2687 gamma_cum += fij * temp * temp;
2690 if (cmd.a_statistics[CRS_ST_D])
2692 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2693 - (P - Q) * (W - row_tot[i]));
2694 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2695 - (Q - P) * (W - col_tot[j]));
2700 assert (j < n_cols);
2702 Cij -= col_tot[j] - cum[j + i * n_cols];
2703 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2707 Cij += cum[j - 1 + (i - 1) * n_cols];
2708 Dij -= cum[j + (i - 1) * n_cols];
2714 btau_var = ((btau_cum
2715 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2717 if (cmd.a_statistics[CRS_ST_BTAU])
2719 ase[3] = sqrt (btau_var);
2720 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2723 if (cmd.a_statistics[CRS_ST_CTAU])
2725 ase[4] = ((2 * q / ((q - 1) * W * W))
2726 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2727 t[4] = v[4] / ase[4];
2729 if (cmd.a_statistics[CRS_ST_GAMMA])
2731 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2732 t[5] = v[5] / (2. / (P + Q)
2733 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2735 if (cmd.a_statistics[CRS_ST_D])
2737 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2738 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2739 somers_d_t[0] = (somers_d_v[0]
2741 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2742 somers_d_v[1] = (P - Q) / Dc;
2743 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2744 somers_d_t[1] = (somers_d_v[1]
2746 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2747 somers_d_v[2] = (P - Q) / Dr;
2748 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2749 somers_d_t[2] = (somers_d_v[2]
2751 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2757 /* Spearman correlation, Pearson's r. */
2758 if (cmd.a_statistics[CRS_ST_CORR])
2760 double *R = xmalloca (sizeof *R * n_rows);
2761 double *C = xmalloca (sizeof *C * n_cols);
2764 double y, t, c = 0., s = 0.;
2769 R[i] = s + (row_tot[i] + 1.) / 2.;
2776 assert (i < n_rows);
2781 double y, t, c = 0., s = 0.;
2786 C[j] = s + (col_tot[j] + 1.) / 2;
2793 assert (j < n_cols);
2797 calc_r (R, C, &v[6], &t[6], &ase[6]);
2803 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2807 /* Cohen's kappa. */
2808 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2810 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2813 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2814 i < ns_rows; i++, j++)
2818 while (col_tot[j] == 0.)
2821 prod = row_tot[i] * col_tot[j];
2822 sum = row_tot[i] + col_tot[j];
2824 sum_fii += mat[j + i * n_cols];
2826 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2827 sum_riciri_ci += prod * sum;
2829 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2830 for (j = 0; j < ns_cols; j++)
2832 double sum = row_tot[i] + col_tot[j];
2833 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2836 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2838 ase[8] = sqrt ((W * W * sum_rici
2839 + sum_rici * sum_rici
2840 - W * sum_riciri_ci)
2841 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2843 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2844 / pow2 (W * W - sum_rici))
2845 + ((2. * (W - sum_fii)
2846 * (2. * sum_fii * sum_rici
2847 - W * sum_fiiri_ci))
2848 / cube (W * W - sum_rici))
2849 + (pow2 (W - sum_fii)
2850 * (W * sum_fijri_ci2 - 4.
2851 * sum_rici * sum_rici)
2852 / pow4 (W * W - sum_rici))));
2854 t[8] = v[8] / ase[8];
2861 /* Calculate risk estimate. */
2863 calc_risk (double *value, double *upper, double *lower, union value *c)
2865 double f11, f12, f21, f22;
2871 for (i = 0; i < 3; i++)
2872 value[i] = upper[i] = lower[i] = SYSMIS;
2875 if (ns_rows != 2 || ns_cols != 2)
2882 for (i = j = 0; i < n_cols; i++)
2883 if (col_tot[i] != 0.)
2892 f11 = mat[nz_cols[0]];
2893 f12 = mat[nz_cols[1]];
2894 f21 = mat[nz_cols[0] + n_cols];
2895 f22 = mat[nz_cols[1] + n_cols];
2897 c[0] = cols[nz_cols[0]];
2898 c[1] = cols[nz_cols[1]];
2901 value[0] = (f11 * f22) / (f12 * f21);
2902 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2903 lower[0] = value[0] * exp (-1.960 * v);
2904 upper[0] = value[0] * exp (1.960 * v);
2906 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2907 v = sqrt ((f12 / (f11 * (f11 + f12)))
2908 + (f22 / (f21 * (f21 + f22))));
2909 lower[1] = value[1] * exp (-1.960 * v);
2910 upper[1] = value[1] * exp (1.960 * v);
2912 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2913 v = sqrt ((f11 / (f12 * (f11 + f12)))
2914 + (f21 / (f22 * (f21 + f22))));
2915 lower[2] = value[2] * exp (-1.960 * v);
2916 upper[2] = value[2] * exp (1.960 * v);
2921 /* Calculate directional measures. */
2923 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2924 double t[N_DIRECTIONAL])
2929 for (i = 0; i < N_DIRECTIONAL; i++)
2930 v[i] = ase[i] = t[i] = SYSMIS;
2934 if (cmd.a_statistics[CRS_ST_LAMBDA])
2936 double *fim = xnmalloc (n_rows, sizeof *fim);
2937 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2938 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2939 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2940 double sum_fim, sum_fmj;
2942 int rm_index, cm_index;
2945 /* Find maximum for each row and their sum. */
2946 for (sum_fim = 0., i = 0; i < n_rows; i++)
2948 double max = mat[i * n_cols];
2951 for (j = 1; j < n_cols; j++)
2952 if (mat[j + i * n_cols] > max)
2954 max = mat[j + i * n_cols];
2958 sum_fim += fim[i] = max;
2959 fim_index[i] = index;
2962 /* Find maximum for each column. */
2963 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2965 double max = mat[j];
2968 for (i = 1; i < n_rows; i++)
2969 if (mat[j + i * n_cols] > max)
2971 max = mat[j + i * n_cols];
2975 sum_fmj += fmj[j] = max;
2976 fmj_index[j] = index;
2979 /* Find maximum row total. */
2982 for (i = 1; i < n_rows; i++)
2983 if (row_tot[i] > rm)
2989 /* Find maximum column total. */
2992 for (j = 1; j < n_cols; j++)
2993 if (col_tot[j] > cm)
2999 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3000 v[1] = (sum_fmj - rm) / (W - rm);
3001 v[2] = (sum_fim - cm) / (W - cm);
3003 /* ASE1 for Y given X. */
3007 for (accum = 0., i = 0; i < n_rows; i++)
3008 for (j = 0; j < n_cols; j++)
3010 const int deltaj = j == cm_index;
3011 accum += (mat[j + i * n_cols]
3012 * pow2 ((j == fim_index[i])
3017 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3020 /* ASE0 for Y given X. */
3024 for (accum = 0., i = 0; i < n_rows; i++)
3025 if (cm_index != fim_index[i])
3026 accum += (mat[i * n_cols + fim_index[i]]
3027 + mat[i * n_cols + cm_index]);
3028 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3031 /* ASE1 for X given Y. */
3035 for (accum = 0., i = 0; i < n_rows; i++)
3036 for (j = 0; j < n_cols; j++)
3038 const int deltaj = i == rm_index;
3039 accum += (mat[j + i * n_cols]
3040 * pow2 ((i == fmj_index[j])
3045 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3048 /* ASE0 for X given Y. */
3052 for (accum = 0., j = 0; j < n_cols; j++)
3053 if (rm_index != fmj_index[j])
3054 accum += (mat[j + n_cols * fmj_index[j]]
3055 + mat[j + n_cols * rm_index]);
3056 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3059 /* Symmetric ASE0 and ASE1. */
3064 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3065 for (j = 0; j < n_cols; j++)
3067 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3068 int temp1 = (i == rm_index) + (j == cm_index);
3069 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3070 accum1 += (mat[j + i * n_cols]
3071 * pow2 (temp0 + (v[0] - 1.) * temp1));
3073 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3074 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3075 / (2. * W - rm - cm));
3084 double sum_fij2_ri, sum_fij2_ci;
3085 double sum_ri2, sum_cj2;
3087 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3088 for (j = 0; j < n_cols; j++)
3090 double temp = pow2 (mat[j + i * n_cols]);
3091 sum_fij2_ri += temp / row_tot[i];
3092 sum_fij2_ci += temp / col_tot[j];
3095 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3096 sum_ri2 += pow2 (row_tot[i]);
3098 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3099 sum_cj2 += pow2 (col_tot[j]);
3101 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3102 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3106 if (cmd.a_statistics[CRS_ST_UC])
3108 double UX, UY, UXY, P;
3109 double ase1_yx, ase1_xy, ase1_sym;
3112 for (UX = 0., i = 0; i < n_rows; i++)
3113 if (row_tot[i] > 0.)
3114 UX -= row_tot[i] / W * log (row_tot[i] / W);
3116 for (UY = 0., j = 0; j < n_cols; j++)
3117 if (col_tot[j] > 0.)
3118 UY -= col_tot[j] / W * log (col_tot[j] / W);
3120 for (UXY = P = 0., i = 0; i < n_rows; i++)
3121 for (j = 0; j < n_cols; j++)
3123 double entry = mat[j + i * n_cols];
3128 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3129 UXY -= entry / W * log (entry / W);
3132 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3133 for (j = 0; j < n_cols; j++)
3135 double entry = mat[j + i * n_cols];
3140 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3141 + (UX - UXY) * log (col_tot[j] / W));
3142 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3143 + (UY - UXY) * log (row_tot[i] / W));
3144 ase1_sym += entry * pow2 ((UXY
3145 * log (row_tot[i] * col_tot[j] / (W * W)))
3146 - (UX + UY) * log (entry / W));
3149 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3150 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3151 t[5] = v[5] / ((2. / (W * (UX + UY)))
3152 * sqrt (P - pow2 (UX + UY - UXY) / W));
3154 v[6] = (UX + UY - UXY) / UX;
3155 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3156 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3158 v[7] = (UX + UY - UXY) / UY;
3159 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3160 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3164 if (cmd.a_statistics[CRS_ST_D])
3169 calc_symmetric (NULL, NULL, NULL);
3170 for (i = 0; i < 3; i++)
3172 v[8 + i] = somers_d_v[i];
3173 ase[8 + i] = somers_d_ase[i];
3174 t[8 + i] = somers_d_t[i];
3179 if (cmd.a_statistics[CRS_ST_ETA])
3182 double sum_Xr, sum_X2r;
3186 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3188 sum_Xr += rows[i].f * row_tot[i];
3189 sum_X2r += pow2 (rows[i].f) * row_tot[i];
3191 SX = sum_X2r - pow2 (sum_Xr) / W;
3193 for (SXW = 0., j = 0; j < n_cols; j++)
3197 for (cum = 0., i = 0; i < n_rows; i++)
3199 SXW += pow2 (rows[i].f) * mat[j + i * n_cols];
3200 cum += rows[i].f * mat[j + i * n_cols];
3203 SXW -= cum * cum / col_tot[j];
3205 v[11] = sqrt (1. - SXW / SX);
3209 double sum_Yc, sum_Y2c;
3213 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3215 sum_Yc += cols[i].f * col_tot[i];
3216 sum_Y2c += pow2 (cols[i].f) * col_tot[i];
3218 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3220 for (SYW = 0., i = 0; i < n_rows; i++)
3224 for (cum = 0., j = 0; j < n_cols; j++)
3226 SYW += pow2 (cols[j].f) * mat[j + i * n_cols];
3227 cum += cols[j].f * mat[j + i * n_cols];
3230 SYW -= cum * cum / row_tot[i];
3232 v[12] = sqrt (1. - SYW / SY);
3239 /* A wrapper around data_out() that limits string output to short
3240 string width and null terminates the result. */
3242 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3244 struct fmt_spec fmt_subst;
3246 /* Limit to short string width. */
3247 if (fmt_is_string (fp->type))
3251 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3252 if (fmt_subst.type == FMT_A)
3253 fmt_subst.w = MIN (8, fmt_subst.w);
3255 fmt_subst.w = MIN (16, fmt_subst.w);
3261 data_out (v, fp, s);
3263 /* Null terminate. */