1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (struct ccase *, const struct dataset *);
181 static void calc_integer (struct ccase *, const struct dataset *);
182 static void postcalc (const struct dataset *);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
192 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
199 for (i = 0; i < nxtab; i++)
206 /* Parses and executes the CROSSTABS procedure. */
208 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
210 struct casegrouper *grouper;
211 struct casereader *input, *group;
219 pl_tc = pool_create ();
220 pl_col = pool_create ();
222 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
225 mode = variables ? INTEGER : GENERAL;
230 cmd.a_cells[CRS_CL_COUNT] = 1;
236 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_COUNT] = 1;
242 cmd.a_cells[CRS_CL_ROW] = 1;
243 cmd.a_cells[CRS_CL_COLUMN] = 1;
244 cmd.a_cells[CRS_CL_TOTAL] = 1;
246 if (cmd.a_cells[CRS_CL_ALL])
248 for (i = 0; i < CRS_CL_count; i++)
250 cmd.a_cells[CRS_CL_ALL] = 0;
252 cmd.a_cells[CRS_CL_NONE] = 0;
254 for (num_cells = i = 0; i < CRS_CL_count; i++)
256 cells[num_cells++] = i;
259 if (cmd.sbc_statistics)
264 for (i = 0; i < CRS_ST_count; i++)
265 if (cmd.a_statistics[i])
268 cmd.a_statistics[CRS_ST_CHISQ] = 1;
269 if (cmd.a_statistics[CRS_ST_ALL])
270 for (i = 0; i < CRS_ST_count; i++)
271 cmd.a_statistics[i] = 1;
275 if (cmd.miss == CRS_REPORT && mode == GENERAL)
277 msg (SE, _("Missing mode REPORT not allowed in general mode. "
278 "Assuming MISSING=TABLE."));
279 cmd.miss = CRS_TABLE;
283 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
284 cmd.a_write[CRS_WR_ALL] = 0;
285 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
287 msg (SE, _("Write mode ALL not allowed in general mode. "
288 "Assuming WRITE=CELLS."));
289 cmd.a_write[CRS_WR_CELLS] = 1;
292 && (cmd.a_write[CRS_WR_NONE]
293 + cmd.a_write[CRS_WR_ALL]
294 + cmd.a_write[CRS_WR_CELLS] == 0))
295 cmd.a_write[CRS_WR_CELLS] = 1;
296 if (cmd.a_write[CRS_WR_CELLS])
297 write_style = CRS_WR_CELLS;
298 else if (cmd.a_write[CRS_WR_ALL])
299 write_style = CRS_WR_ALL;
301 write_style = CRS_WR_NONE;
303 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
305 grouper = casegrouper_create_splits (input, dataset_dict (ds));
306 while (casegrouper_get_next_group (grouper, &group))
312 for (; casereader_read (group, &c); case_destroy (&c))
315 calc_general (&c, ds);
317 calc_integer (&c, ds);
319 casereader_destroy (group);
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
326 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
329 /* Parses the TABLES subcommand. */
331 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
333 struct const_var_set *var_set;
335 const struct variable ***by = NULL;
336 size_t *by_nvar = NULL;
340 /* Ensure that this is a TABLES subcommand. */
341 if (!lex_match_id (lexer, "TABLES")
342 && (lex_token (lexer) != T_ID ||
343 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
344 && lex_token (lexer) != T_ALL)
346 lex_match (lexer, '=');
348 if (variables != NULL)
349 var_set = const_var_set_create_from_array (variables, variables_cnt);
351 var_set = const_var_set_create_from_dict (dataset_dict (ds));
352 assert (var_set != NULL);
356 by = xnrealloc (by, n_by + 1, sizeof *by);
357 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
358 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
359 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (xalloc_oversized (nx, by_nvar[n_by]))
363 msg (SE, _("Too many cross-tabulation variables or dimensions."));
369 if (!lex_match (lexer, T_BY))
373 lex_error (lexer, _("expecting BY"));
382 int *by_iter = xcalloc (n_by, sizeof *by_iter);
385 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
386 for (i = 0; i < nx; i++)
390 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
397 for (i = 0; i < n_by; i++)
398 x->vars[i] = by[i][by_iter[i]];
404 for (i = n_by - 1; i >= 0; i--)
406 if (++by_iter[i] < by_nvar[i])
419 /* All return paths lead here. */
423 for (i = 0; i < n_by; i++)
429 const_var_set_destroy (var_set);
434 /* Parses the VARIABLES subcommand. */
436 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
440 msg (SE, _("VARIABLES must be specified before TABLES."));
444 lex_match (lexer, '=');
448 size_t orig_nv = variables_cnt;
453 if (!parse_variables_const (lexer, dataset_dict (ds),
454 &variables, &variables_cnt,
455 (PV_APPEND | PV_NUMERIC
456 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 if (lex_token (lexer) != '(')
461 lex_error (lexer, "expecting `('");
466 if (!lex_force_int (lexer))
468 min = lex_integer (lexer);
471 lex_match (lexer, ',');
473 if (!lex_force_int (lexer))
475 max = lex_integer (lexer);
478 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 if (lex_token (lexer) != ')')
486 lex_error (lexer, "expecting `)'");
491 for (i = orig_nv; i < variables_cnt; i++)
493 struct var_range *vr = xmalloc (sizeof *vr);
496 vr->count = max - min + 1;
497 var_attach_aux (variables[i], vr, var_dtor_free);
500 if (lex_token (lexer) == '/')
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, const void *);
515 static unsigned hash_table_entry (const void *, const void *);
517 /* Set up the crosstabulation tables for processing. */
519 precalc (struct casereader *input, const struct dataset *ds)
523 if (casereader_peek (input, 0, &c))
525 output_split_file_values (ds, &c);
531 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
541 for (i = 0; i < nxtab; i++)
543 struct crosstab *x = xtab[i];
548 x->ofs = n_sorted_tab;
550 for (j = 2; j < x->nvar; j++)
551 count *= get_var_range (x->vars[j - 2])->count;
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + count, sizeof *sorted_tab);
555 v = xmalloca (sizeof *v * x->nvar);
556 for (j = 2; j < x->nvar; j++)
557 v[j] = get_var_range (x->vars[j])->min;
558 for (j = 0; j < count; j++)
560 struct table_entry *te;
563 te = sorted_tab[n_sorted_tab++]
564 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
568 int row_cnt = get_var_range (x->vars[0])->count;
569 int col_cnt = get_var_range (x->vars[1])->count;
570 const int mat_size = row_cnt * col_cnt;
573 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
574 for (m = 0; m < mat_size; m++)
578 for (k = 2; k < x->nvar; k++)
579 te->values[k].f = v[k];
580 for (k = 2; k < x->nvar; k++)
582 struct var_range *vr = get_var_range (x->vars[k]);
583 if (++v[k] >= vr->max)
592 sorted_tab = xnrealloc (sorted_tab,
593 n_sorted_tab + 1, sizeof *sorted_tab);
594 sorted_tab[n_sorted_tab] = NULL;
599 /* Form crosstabulations for general mode. */
601 calc_general (struct ccase *c, const struct dataset *ds)
603 /* Missing values to exclude. */
604 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
605 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
609 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
611 /* Flattened current table index. */
614 for (t = 0; t < nxtab; t++)
616 struct crosstab *x = xtab[t];
617 const size_t entry_size = (sizeof (struct table_entry)
618 + sizeof (union value) * (x->nvar - 1));
619 struct table_entry *te = xmalloca (entry_size);
621 /* Construct table entry for the current record and table. */
627 for (j = 0; j < x->nvar; j++)
629 const union value *v = case_data (c, x->vars[j]);
630 if (var_is_value_missing (x->vars[j], v, exclude))
632 x->missing += weight;
636 if (var_is_numeric (x->vars[j]))
637 te->values[j].f = case_num (c, x->vars[j]);
640 size_t n = var_get_width (x->vars[j]);
641 if (n > MAX_SHORT_STRING)
642 n = MAX_SHORT_STRING;
643 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
645 /* Necessary in order to simplify comparisons. */
646 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
647 sizeof (union value) - n);
652 /* Add record to hash table. */
654 struct table_entry **tepp
655 = (struct table_entry **) hsh_probe (gen_tab, te);
658 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
661 memcpy (tep, te, entry_size);
666 (*tepp)->u.freq += weight;
675 calc_integer (struct ccase *c, const struct dataset *ds)
677 bool bad_warn = true;
680 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
682 /* Flattened current table index. */
685 for (t = 0; t < nxtab; t++)
687 struct crosstab *x = xtab[t];
692 for (i = 0; i < x->nvar; i++)
694 const struct variable *const v = x->vars[i];
695 struct var_range *vr = get_var_range (v);
696 double value = case_num (c, v);
698 /* Note that the first test also rules out SYSMIS. */
699 if ((value < vr->min || value >= vr->max)
700 || (cmd.miss == CRS_TABLE
701 && var_is_num_missing (v, value, MV_USER)))
703 x->missing += weight;
709 ofs += fact * ((int) value - vr->min);
715 const struct variable *row_var = x->vars[ROW_VAR];
716 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
718 const struct variable *col_var = x->vars[COL_VAR];
719 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
721 const int col_dim = get_var_range (col_var)->count;
723 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
730 /* Compare the table_entry's at A and B and return a strcmp()-type
733 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
735 const struct table_entry *a = a_;
736 const struct table_entry *b = b_;
738 if (a->table > b->table)
740 else if (a->table < b->table)
744 const struct crosstab *x = xtab[a->table];
747 for (i = x->nvar - 1; i >= 0; i--)
748 if (var_is_numeric (x->vars[i]))
750 const double diffnum = a->values[i].f - b->values[i].f;
753 else if (diffnum > 0)
758 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
759 var_get_width (x->vars[i]));
768 /* Calculate a hash value from table_entry A. */
770 hash_table_entry (const void *a_, const void *aux UNUSED)
772 const struct table_entry *a = a_;
777 for (i = 0; i < xtab[a->table]->nvar; i++)
778 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
783 /* Post-data reading calculations. */
785 static struct table_entry **find_pivot_extent (struct table_entry **,
786 int *cnt, int pivot);
787 static void enum_var_values (struct table_entry **entries, int entry_cnt,
789 union value **values, int *value_cnt);
790 static void output_pivot_table (struct table_entry **, struct table_entry **,
791 const struct dictionary *,
792 double **, double **, double **,
793 int *, int *, int *);
794 static void make_summary_table (const struct dictionary *);
797 postcalc (const struct dataset *ds)
801 n_sorted_tab = hsh_count (gen_tab);
802 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
805 make_summary_table (dataset_dict (ds));
807 /* Identify all the individual crosstabulation tables, and deal with
810 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
811 int pc = n_sorted_tab; /* Pivot count. */
813 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
814 int maxrows = 0, maxcols = 0, maxcells = 0;
818 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
822 output_pivot_table (pb, pe, dataset_dict (ds),
823 &mat, &row_tot, &col_tot,
824 &maxrows, &maxcols, &maxcells);
833 hsh_destroy (gen_tab);
837 for (i = 0; i < n_sorted_tab; i++)
839 free (sorted_tab[i]->u.data);
840 free (sorted_tab[i]);
846 static void insert_summary (struct tab_table *, int tab_index,
847 const struct dictionary *,
850 /* Output a table summarizing the cases processed. */
852 make_summary_table (const struct dictionary *dict)
854 struct tab_table *summary;
856 struct table_entry **pb = sorted_tab, **pe;
857 int pc = n_sorted_tab;
860 summary = tab_create (7, 3 + nxtab, 1);
861 tab_title (summary, _("Summary."));
862 tab_headers (summary, 1, 0, 3, 0);
863 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
864 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
865 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
866 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
867 tab_hline (summary, TAL_1, 1, 6, 1);
868 tab_hline (summary, TAL_1, 1, 6, 2);
869 tab_vline (summary, TAL_1, 3, 1, 1);
870 tab_vline (summary, TAL_1, 5, 1, 1);
874 for (i = 0; i < 3; i++)
876 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
877 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
880 tab_offset (summary, 0, 3);
886 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
890 while (cur_tab < (*pb)->table)
891 insert_summary (summary, cur_tab++, dict, 0.);
894 for (valid = 0.; pb < pe; pb++)
895 valid += (*pb)->u.freq;
898 const struct crosstab *const x = xtab[(*pb)->table];
899 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
900 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
901 const int count = n_cols * n_rows;
903 for (valid = 0.; pb < pe; pb++)
905 const double *data = (*pb)->u.data;
908 for (i = 0; i < count; i++)
912 insert_summary (summary, cur_tab++, dict, valid);
917 while (cur_tab < nxtab)
918 insert_summary (summary, cur_tab++, dict, 0.);
923 /* Inserts a line into T describing the crosstabulation at index
924 TAB_INDEX, which has VALID valid observations. */
926 insert_summary (struct tab_table *t, int tab_index,
927 const struct dictionary *dict,
930 struct crosstab *x = xtab[tab_index];
932 const struct variable *wv = dict_get_weight (dict);
933 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
935 tab_hline (t, TAL_1, 0, 6, 0);
937 /* Crosstabulation name. */
939 char *buf = xmalloca (128 * x->nvar);
943 for (i = 0; i < x->nvar; i++)
946 cp = stpcpy (cp, " * ");
948 cp = stpcpy (cp, var_to_string (x->vars[i]));
950 tab_text (t, 0, 0, TAB_LEFT, buf);
955 /* Counts and percentages. */
965 for (i = 0; i < 3; i++)
967 tab_double (t, i * 2 + 1, 0, TAB_RIGHT, n[i], wfmt);
968 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
979 static struct tab_table *table; /* Crosstabulation table. */
980 static struct tab_table *chisq; /* Chi-square table. */
981 static struct tab_table *sym; /* Symmetric measures table. */
982 static struct tab_table *risk; /* Risk estimate table. */
983 static struct tab_table *direct; /* Directional measures table. */
986 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
988 /* Column values, number of columns. */
989 static union value *cols;
992 /* Row values, number of rows. */
993 static union value *rows;
996 /* Number of statistically interesting columns/rows (columns/rows with
998 static int ns_cols, ns_rows;
1000 /* Crosstabulation. */
1001 static const struct crosstab *x;
1003 /* Number of variables from the crosstabulation to consider. This is
1004 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1007 /* Matrix contents. */
1008 static double *mat; /* Matrix proper. */
1009 static double *row_tot; /* Row totals. */
1010 static double *col_tot; /* Column totals. */
1011 static double W; /* Grand total. */
1013 static void display_dimensions (struct tab_table *, int first_difference,
1014 struct table_entry *);
1015 static void display_crosstabulation (void);
1016 static void display_chisq (const struct dictionary *);
1017 static void display_symmetric (const struct dictionary *);
1018 static void display_risk (const struct dictionary *);
1019 static void display_directional (void);
1020 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1021 static void table_value_missing (struct tab_table *table, int c, int r,
1022 unsigned char opt, const union value *v,
1023 const struct variable *var);
1024 static void delete_missing (void);
1026 /* Output pivot table beginning at PB and continuing until PE,
1027 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1028 hold *MAXROWS entries. */
1030 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1031 const struct dictionary *dict,
1032 double **matp, double **row_totp, double **col_totp,
1033 int *maxrows, int *maxcols, int *maxcells)
1036 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1037 int tc = pe - pb; /* Table count. */
1039 /* Table entry for header comparison. */
1040 struct table_entry *cmp = NULL;
1042 x = xtab[(*pb)->table];
1043 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1045 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1047 /* Crosstabulation table initialization. */
1050 table = tab_create (nvar + n_cols,
1051 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1052 tab_headers (table, nvar - 1, 0, 2, 0);
1054 /* First header line. */
1055 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1056 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1058 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1060 /* Second header line. */
1064 for (i = 2; i < nvar; i++)
1065 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1066 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1067 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1068 var_get_name (x->vars[ROW_VAR]));
1069 for (i = 0; i < n_cols; i++)
1070 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1072 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1075 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1076 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1080 char *title = xmalloca (x->nvar * 64 + 128);
1084 if (cmd.pivot == CRS_PIVOT)
1085 for (i = 0; i < nvar; i++)
1088 cp = stpcpy (cp, " by ");
1089 cp = stpcpy (cp, var_get_name (x->vars[i]));
1093 cp = spprintf (cp, "%s by %s for",
1094 var_get_name (x->vars[0]),
1095 var_get_name (x->vars[1]));
1096 for (i = 2; i < nvar; i++)
1098 char buf[64], *bufp;
1103 cp = stpcpy (cp, var_get_name (x->vars[i]));
1105 format_short (buf, var_get_print_format (x->vars[i]),
1107 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1109 cp = stpcpy (cp, bufp);
1113 cp = stpcpy (cp, " [");
1114 for (i = 0; i < num_cells; i++)
1122 static const struct tuple cell_names[] =
1124 {CRS_CL_COUNT, N_("count")},
1125 {CRS_CL_ROW, N_("row %")},
1126 {CRS_CL_COLUMN, N_("column %")},
1127 {CRS_CL_TOTAL, N_("total %")},
1128 {CRS_CL_EXPECTED, N_("expected")},
1129 {CRS_CL_RESIDUAL, N_("residual")},
1130 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1131 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1135 const struct tuple *t;
1137 for (t = cell_names; t->value != cells[i]; t++)
1138 assert (t->value != -1);
1140 cp = stpcpy (cp, ", ");
1141 cp = stpcpy (cp, gettext (t->name));
1145 tab_title (table, "%s", title);
1149 tab_offset (table, 0, 2);
1154 /* Chi-square table initialization. */
1155 if (cmd.a_statistics[CRS_ST_CHISQ])
1157 chisq = tab_create (6 + (nvar - 2),
1158 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1159 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1161 tab_title (chisq, _("Chi-square tests."));
1163 tab_offset (chisq, nvar - 2, 0);
1164 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1165 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1166 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1167 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1168 _("Asymp. Sig. (2-sided)"));
1169 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1170 _("Exact. Sig. (2-sided)"));
1171 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1172 _("Exact. Sig. (1-sided)"));
1174 tab_offset (chisq, 0, 1);
1179 /* Symmetric measures. */
1180 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1181 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1182 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1183 || cmd.a_statistics[CRS_ST_KAPPA])
1185 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1186 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1187 tab_title (sym, _("Symmetric measures."));
1189 tab_offset (sym, nvar - 2, 0);
1190 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1191 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1192 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1193 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1194 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1195 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1196 tab_offset (sym, 0, 1);
1201 /* Risk estimate. */
1202 if (cmd.a_statistics[CRS_ST_RISK])
1204 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1205 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1206 tab_title (risk, _("Risk estimate."));
1208 tab_offset (risk, nvar - 2, 0);
1209 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1210 _("95%% Confidence Interval"));
1211 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1212 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1213 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1214 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1215 tab_hline (risk, TAL_1, 2, 3, 1);
1216 tab_vline (risk, TAL_1, 2, 0, 1);
1217 tab_offset (risk, 0, 2);
1222 /* Directional measures. */
1223 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1224 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1226 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1227 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1228 tab_title (direct, _("Directional measures."));
1230 tab_offset (direct, nvar - 2, 0);
1231 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1232 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1233 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1234 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1235 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1236 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1237 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1238 tab_offset (direct, 0, 1);
1245 /* Find pivot subtable if applicable. */
1246 te = find_pivot_extent (tb, &tc, 0);
1250 /* Find all the row variable values. */
1251 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1253 /* Allocate memory space for the column and row totals. */
1254 if (n_rows > *maxrows)
1256 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1257 row_tot = *row_totp;
1260 if (n_cols > *maxcols)
1262 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1263 col_tot = *col_totp;
1267 /* Allocate table space for the matrix. */
1268 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1269 tab_realloc (table, -1,
1270 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1271 tab_nr (table) * (pe - pb) / (te - tb)));
1273 if (mode == GENERAL)
1275 /* Allocate memory space for the matrix. */
1276 if (n_cols * n_rows > *maxcells)
1278 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1279 *maxcells = n_cols * n_rows;
1284 /* Build the matrix and calculate column totals. */
1286 union value *cur_col = cols;
1287 union value *cur_row = rows;
1289 double *cp = col_tot;
1290 struct table_entry **p;
1293 for (p = &tb[0]; p < te; p++)
1295 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1299 for (; cur_row < &rows[n_rows]; cur_row++)
1305 mp = &mat[cur_col - cols];
1308 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1315 *cp += *mp = (*p)->u.freq;
1320 /* Zero out the rest of the matrix. */
1321 for (; cur_row < &rows[n_rows]; cur_row++)
1327 if (cur_col < &cols[n_cols])
1329 const int rem_cols = n_cols - (cur_col - cols);
1332 for (c = 0; c < rem_cols; c++)
1334 mp = &mat[cur_col - cols];
1335 for (r = 0; r < n_rows; r++)
1337 for (c = 0; c < rem_cols; c++)
1339 mp += n_cols - rem_cols;
1347 double *tp = col_tot;
1349 assert (mode == INTEGER);
1350 mat = (*tb)->u.data;
1353 /* Calculate column totals. */
1354 for (c = 0; c < n_cols; c++)
1357 double *cp = &mat[c];
1359 for (r = 0; r < n_rows; r++)
1360 cum += cp[r * n_cols];
1368 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1369 ns_cols += *cp != 0.;
1372 /* Calculate row totals. */
1375 double *rp = row_tot;
1378 for (ns_rows = 0, r = n_rows; r--; )
1381 for (c = n_cols; c--; )
1389 /* Calculate grand total. */
1395 if (n_rows < n_cols)
1396 tp = row_tot, n = n_rows;
1398 tp = col_tot, n = n_cols;
1404 /* Find the first variable that differs from the last subtable,
1405 then display the values of the dimensioning variables for
1406 each table that needs it. */
1408 int first_difference = nvar - 1;
1411 for (; ; first_difference--)
1413 assert (first_difference >= 2);
1414 if (memcmp (&cmp->values[first_difference],
1415 &(*tb)->values[first_difference],
1416 sizeof *cmp->values))
1422 display_dimensions (table, first_difference, *tb);
1424 display_dimensions (chisq, first_difference, *tb);
1426 display_dimensions (sym, first_difference, *tb);
1428 display_dimensions (risk, first_difference, *tb);
1430 display_dimensions (direct, first_difference, *tb);
1434 display_crosstabulation ();
1435 if (cmd.miss == CRS_REPORT)
1438 display_chisq (dict);
1440 display_symmetric (dict);
1442 display_risk (dict);
1444 display_directional ();
1455 tab_resize (chisq, 4 + (nvar - 2), -1);
1466 /* Delete missing rows and columns for statistical analysis when
1469 delete_missing (void)
1474 for (r = 0; r < n_rows; r++)
1475 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1479 for (c = 0; c < n_cols; c++)
1480 mat[c + r * n_cols] = 0.;
1488 for (c = 0; c < n_cols; c++)
1489 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1493 for (r = 0; r < n_rows; r++)
1494 mat[c + r * n_cols] = 0.;
1500 /* Prepare table T for submission, and submit it. */
1502 submit (struct tab_table *t)
1509 tab_resize (t, -1, 0);
1510 if (tab_nr (t) == tab_t (t))
1515 tab_offset (t, 0, 0);
1517 for (i = 2; i < nvar; i++)
1518 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1519 var_to_string (x->vars[i]));
1520 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1521 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1523 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1525 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1526 tab_dim (t, crosstabs_dim);
1530 /* Sets the widths of all the columns and heights of all the rows in
1531 table T for driver D. */
1533 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1537 /* Width of a numerical column. */
1538 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1539 if (cmd.miss == CRS_REPORT)
1540 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1542 /* Set width for header columns. */
1548 w = d->width - c * (t->nc - t->l);
1549 for (i = 0; i <= t->nc; i++)
1553 if (w < d->prop_em_width * 8)
1554 w = d->prop_em_width * 8;
1556 if (w > d->prop_em_width * 15)
1557 w = d->prop_em_width * 15;
1559 for (i = 0; i < t->l; i++)
1563 for (i = t->l; i < t->nc; i++)
1566 for (i = 0; i < t->nr; i++)
1567 t->h[i] = tab_natural_height (t, d, i);
1570 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1571 int *cnt, int pivot);
1572 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1573 int *cnt, int pivot);
1575 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1577 static struct table_entry **
1578 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1580 return (mode == GENERAL
1581 ? find_pivot_extent_general (tp, cnt, pivot)
1582 : find_pivot_extent_integer (tp, cnt, pivot));
1585 /* Find the extent of a region in TP that contains one table. If
1586 PIVOT != 0 that means a set of table entries with identical table
1587 number; otherwise they also have to have the same values for every
1588 dimension after the row and column dimensions. The table that is
1589 searched starts at TP and has length CNT. Returns the first entry
1590 after the last one in the table; sets *CNT to the number of
1591 remaining values. If there are no entries in TP at all, returns
1592 NULL. A yucky interface, admittedly, but it works. */
1593 static struct table_entry **
1594 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1596 struct table_entry *fp = *tp;
1601 x = xtab[(*tp)->table];
1609 if ((*tp)->table != fp->table)
1614 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1621 /* Integer mode correspondent to find_pivot_extent_general(). This
1622 could be optimized somewhat, but I just don't give a crap about
1623 CROSSTABS performance in integer mode, which is just a
1624 CROSSTABS wart as far as I'm concerned.
1626 That said, feel free to send optimization patches to me. */
1627 static struct table_entry **
1628 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1630 struct table_entry *fp = *tp;
1635 x = xtab[(*tp)->table];
1643 if ((*tp)->table != fp->table)
1648 if (memcmp (&(*tp)->values[2], &fp->values[2],
1649 sizeof (union value) * (x->nvar - 2)))
1656 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1657 result. WIDTH_ points to an int which is either 0 for a
1658 numeric value or a string width for a string value. */
1660 compare_value (const void *a_, const void *b_, const void *width_)
1662 const union value *a = a_;
1663 const union value *b = b_;
1664 const int *pwidth = width_;
1665 const int width = *pwidth;
1668 return (a->f < b->f) ? -1 : (a->f > b->f);
1670 return strncmp (a->s, b->s, width);
1673 /* Given an array of ENTRY_CNT table_entry structures starting at
1674 ENTRIES, creates a sorted list of the values that the variable
1675 with index VAR_IDX takes on. The values are returned as a
1676 malloc()'darray stored in *VALUES, with the number of values
1677 stored in *VALUE_CNT.
1680 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1681 union value **values, int *value_cnt)
1683 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1685 if (mode == GENERAL)
1687 int width = MIN (var_get_width (v), MAX_SHORT_STRING);
1690 *values = xnmalloc (entry_cnt, sizeof **values);
1691 for (i = 0; i < entry_cnt; i++)
1692 (*values)[i] = entries[i]->values[var_idx];
1693 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1694 compare_value, &width);
1698 struct var_range *vr = get_var_range (v);
1701 assert (mode == INTEGER);
1702 *values = xnmalloc (vr->count, sizeof **values);
1703 for (i = 0; i < vr->count; i++)
1704 (*values)[i].f = i + vr->min;
1705 *value_cnt = vr->count;
1709 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1710 from V, displayed with print format spec from variable VAR. When
1711 in REPORT missing-value mode, missing values have an M appended. */
1713 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1714 const union value *v, const struct variable *var)
1717 const struct fmt_spec *print = var_get_print_format (var);
1719 const char *label = var_lookup_value_label (var, v);
1722 tab_text (table, c, r, TAB_LEFT, label);
1726 s.string = tab_alloc (table, print->w);
1727 format_short (s.string, print, v);
1728 s.length = strlen (s.string);
1729 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1730 s.string[s.length++] = 'M';
1731 while (s.length && *s.string == ' ')
1736 tab_raw (table, c, r, opt, &s);
1739 /* Draws a line across TABLE at the current row to indicate the most
1740 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1741 that changed, and puts the values that changed into the table. TB
1742 and X must be the corresponding table_entry and crosstab,
1745 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1747 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1749 for (; first_difference >= 2; first_difference--)
1750 table_value_missing (table, nvar - first_difference - 1, 0,
1751 TAB_RIGHT, &tb->values[first_difference],
1752 x->vars[first_difference]);
1755 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1756 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1757 additionally suffixed with a letter `M'. */
1759 format_cell_entry (struct tab_table *table, int c, int r, double value,
1760 char suffix, bool mark_missing)
1762 const struct fmt_spec f = {FMT_F, 10, 1};
1767 s.string = tab_alloc (table, 16);
1769 data_out (&v, &f, s.string);
1770 while (*s.string == ' ')
1776 s.string[s.length++] = suffix;
1778 s.string[s.length++] = 'M';
1780 tab_raw (table, c, r, TAB_RIGHT, &s);
1783 /* Displays the crosstabulation table. */
1785 display_crosstabulation (void)
1790 for (r = 0; r < n_rows; r++)
1791 table_value_missing (table, nvar - 2, r * num_cells,
1792 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1794 tab_text (table, nvar - 2, n_rows * num_cells,
1795 TAB_LEFT, _("Total"));
1797 /* Put in the actual cells. */
1802 tab_offset (table, nvar - 1, -1);
1803 for (r = 0; r < n_rows; r++)
1806 tab_hline (table, TAL_1, -1, n_cols, 0);
1807 for (c = 0; c < n_cols; c++)
1809 bool mark_missing = false;
1810 double expected_value = row_tot[r] * col_tot[c] / W;
1811 if (cmd.miss == CRS_REPORT
1812 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1813 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1815 mark_missing = true;
1816 for (i = 0; i < num_cells; i++)
1827 v = *mp / row_tot[r] * 100.;
1831 v = *mp / col_tot[c] * 100.;
1838 case CRS_CL_EXPECTED:
1841 case CRS_CL_RESIDUAL:
1842 v = *mp - expected_value;
1844 case CRS_CL_SRESIDUAL:
1845 v = (*mp - expected_value) / sqrt (expected_value);
1847 case CRS_CL_ASRESIDUAL:
1848 v = ((*mp - expected_value)
1849 / sqrt (expected_value
1850 * (1. - row_tot[r] / W)
1851 * (1. - col_tot[c] / W)));
1857 format_cell_entry (table, c, i, v, suffix, mark_missing);
1863 tab_offset (table, -1, tab_row (table) + num_cells);
1871 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1872 for (r = 0; r < n_rows; r++)
1874 bool mark_missing = false;
1876 if (cmd.miss == CRS_REPORT
1877 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1878 mark_missing = true;
1880 for (i = 0; i < num_cells; i++)
1895 v = row_tot[r] / W * 100.;
1899 v = row_tot[r] / W * 100.;
1902 case CRS_CL_EXPECTED:
1903 case CRS_CL_RESIDUAL:
1904 case CRS_CL_SRESIDUAL:
1905 case CRS_CL_ASRESIDUAL:
1912 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1913 tab_next_row (table);
1918 /* Column totals, grand total. */
1924 tab_hline (table, TAL_1, -1, n_cols, 0);
1925 for (c = 0; c <= n_cols; c++)
1927 double ct = c < n_cols ? col_tot[c] : W;
1928 bool mark_missing = false;
1931 if (cmd.miss == CRS_REPORT && c < n_cols
1932 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1933 mark_missing = true;
1935 for (i = 0; i < num_cells; i++)
1957 case CRS_CL_EXPECTED:
1958 case CRS_CL_RESIDUAL:
1959 case CRS_CL_SRESIDUAL:
1960 case CRS_CL_ASRESIDUAL:
1966 format_cell_entry (table, c, i, v, suffix, mark_missing);
1971 tab_offset (table, -1, tab_row (table) + last_row);
1974 tab_offset (table, 0, -1);
1977 static void calc_r (double *X, double *Y, double *, double *, double *);
1978 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1980 /* Display chi-square statistics. */
1982 display_chisq (const struct dictionary *dict)
1984 const struct variable *wv = dict_get_weight (dict);
1985 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
1987 static const char *chisq_stats[N_CHISQ] =
1989 N_("Pearson Chi-Square"),
1990 N_("Likelihood Ratio"),
1991 N_("Fisher's Exact Test"),
1992 N_("Continuity Correction"),
1993 N_("Linear-by-Linear Association"),
1995 double chisq_v[N_CHISQ];
1996 double fisher1, fisher2;
2002 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2004 tab_offset (chisq, nvar - 2, -1);
2006 for (i = 0; i < N_CHISQ; i++)
2008 if ((i != 2 && chisq_v[i] == SYSMIS)
2009 || (i == 2 && fisher1 == SYSMIS))
2013 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2016 tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL);
2017 tab_double (chisq, 2, 0, TAB_RIGHT, df[i], wfmt);
2018 tab_double (chisq, 3, 0, TAB_RIGHT,
2019 gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL);
2024 tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL);
2025 tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL);
2027 tab_next_row (chisq);
2030 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2031 tab_double (chisq, 1, 0, TAB_RIGHT, W, wfmt);
2032 tab_next_row (chisq);
2034 tab_offset (chisq, 0, -1);
2037 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2038 double[N_SYMMETRIC]);
2040 /* Display symmetric measures. */
2042 display_symmetric (const struct dictionary *dict)
2044 const struct variable *wv = dict_get_weight (dict);
2045 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2047 static const char *categories[] =
2049 N_("Nominal by Nominal"),
2050 N_("Ordinal by Ordinal"),
2051 N_("Interval by Interval"),
2052 N_("Measure of Agreement"),
2055 static const char *stats[N_SYMMETRIC] =
2059 N_("Contingency Coefficient"),
2060 N_("Kendall's tau-b"),
2061 N_("Kendall's tau-c"),
2063 N_("Spearman Correlation"),
2068 static const int stats_categories[N_SYMMETRIC] =
2070 0, 0, 0, 1, 1, 1, 1, 2, 3,
2074 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2077 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2080 tab_offset (sym, nvar - 2, -1);
2082 for (i = 0; i < N_SYMMETRIC; i++)
2084 if (sym_v[i] == SYSMIS)
2087 if (stats_categories[i] != last_cat)
2089 last_cat = stats_categories[i];
2090 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2093 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2094 tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL);
2095 if (sym_ase[i] != SYSMIS)
2096 tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL);
2097 if (sym_t[i] != SYSMIS)
2098 tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL);
2099 /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/
2103 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2104 tab_double (sym, 2, 0, TAB_RIGHT, W, wfmt);
2107 tab_offset (sym, 0, -1);
2110 static int calc_risk (double[], double[], double[], union value *);
2112 /* Display risk estimate. */
2114 display_risk (const struct dictionary *dict)
2116 const struct variable *wv = dict_get_weight (dict);
2117 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
2120 double risk_v[3], lower[3], upper[3];
2124 if (!calc_risk (risk_v, upper, lower, c))
2127 tab_offset (risk, nvar - 2, -1);
2129 for (i = 0; i < 3; i++)
2131 if (risk_v[i] == SYSMIS)
2137 if (var_is_numeric (x->vars[COL_VAR]))
2138 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2139 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2141 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2142 var_get_name (x->vars[COL_VAR]),
2143 var_get_width (x->vars[COL_VAR]), c[0].s,
2144 var_get_width (x->vars[COL_VAR]), c[1].s);
2148 if (var_is_numeric (x->vars[ROW_VAR]))
2149 sprintf (buf, _("For cohort %s = %g"),
2150 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2152 sprintf (buf, _("For cohort %s = %.*s"),
2153 var_get_name (x->vars[ROW_VAR]),
2154 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2158 tab_text (risk, 0, 0, TAB_LEFT, buf);
2159 tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL);
2160 tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL);
2161 tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL);
2162 tab_next_row (risk);
2165 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2166 tab_double (risk, 1, 0, TAB_RIGHT, W, wfmt);
2167 tab_next_row (risk);
2169 tab_offset (risk, 0, -1);
2172 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2173 double[N_DIRECTIONAL]);
2175 /* Display directional measures. */
2177 display_directional (void)
2179 static const char *categories[] =
2181 N_("Nominal by Nominal"),
2182 N_("Ordinal by Ordinal"),
2183 N_("Nominal by Interval"),
2186 static const char *stats[] =
2189 N_("Goodman and Kruskal tau"),
2190 N_("Uncertainty Coefficient"),
2195 static const char *types[] =
2202 static const int stats_categories[N_DIRECTIONAL] =
2204 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2207 static const int stats_stats[N_DIRECTIONAL] =
2209 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2212 static const int stats_types[N_DIRECTIONAL] =
2214 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2217 static const int *stats_lookup[] =
2224 static const char **stats_names[] =
2236 double direct_v[N_DIRECTIONAL];
2237 double direct_ase[N_DIRECTIONAL];
2238 double direct_t[N_DIRECTIONAL];
2242 if (!calc_directional (direct_v, direct_ase, direct_t))
2245 tab_offset (direct, nvar - 2, -1);
2247 for (i = 0; i < N_DIRECTIONAL; i++)
2249 if (direct_v[i] == SYSMIS)
2255 for (j = 0; j < 3; j++)
2256 if (last[j] != stats_lookup[j][i])
2259 tab_hline (direct, TAL_1, j, 6, 0);
2264 int k = last[j] = stats_lookup[j][i];
2269 string = var_get_name (x->vars[0]);
2271 string = var_get_name (x->vars[1]);
2273 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2274 gettext (stats_names[j][k]), string);
2279 tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL);
2280 if (direct_ase[i] != SYSMIS)
2281 tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL);
2282 if (direct_t[i] != SYSMIS)
2283 tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL);
2284 /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/
2285 tab_next_row (direct);
2288 tab_offset (direct, 0, -1);
2291 /* Statistical calculations. */
2293 /* Returns the value of the gamma (factorial) function for an integer
2296 gamma_int (double x)
2301 for (i = 2; i < x; i++)
2306 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2308 static inline double
2309 Pr (int a, int b, int c, int d)
2311 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2312 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2313 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2314 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2315 / gamma_int (a + b + c + d + 1.));
2318 /* Swap the contents of A and B. */
2320 swap (int *a, int *b)
2327 /* Calculate significance for Fisher's exact test as specified in
2328 _SPSS Statistical Algorithms_, Appendix 5. */
2330 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2334 if (MIN (c, d) < MIN (a, b))
2335 swap (&a, &c), swap (&b, &d);
2336 if (MIN (b, d) < MIN (a, c))
2337 swap (&a, &b), swap (&c, &d);
2341 swap (&a, &b), swap (&c, &d);
2343 swap (&a, &c), swap (&b, &d);
2347 for (x = 0; x <= a; x++)
2348 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2350 *fisher2 = *fisher1;
2351 for (x = 1; x <= b; x++)
2352 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2355 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2356 columns with values COLS and N_ROWS rows with values ROWS. Values
2357 in the matrix sum to W. */
2359 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2360 double *fisher1, double *fisher2)
2364 chisq[0] = chisq[1] = 0.;
2365 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2366 *fisher1 = *fisher2 = SYSMIS;
2368 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2370 if (ns_rows <= 1 || ns_cols <= 1)
2372 chisq[0] = chisq[1] = SYSMIS;
2376 for (r = 0; r < n_rows; r++)
2377 for (c = 0; c < n_cols; c++)
2379 const double expected = row_tot[r] * col_tot[c] / W;
2380 const double freq = mat[n_cols * r + c];
2381 const double residual = freq - expected;
2383 chisq[0] += residual * residual / expected;
2385 chisq[1] += freq * log (expected / freq);
2396 /* Calculate Yates and Fisher exact test. */
2397 if (ns_cols == 2 && ns_rows == 2)
2399 double f11, f12, f21, f22;
2405 for (i = j = 0; i < n_cols; i++)
2406 if (col_tot[i] != 0.)
2415 f11 = mat[nz_cols[0]];
2416 f12 = mat[nz_cols[1]];
2417 f21 = mat[nz_cols[0] + n_cols];
2418 f22 = mat[nz_cols[1] + n_cols];
2423 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2426 chisq[3] = (W * x * x
2427 / (f11 + f12) / (f21 + f22)
2428 / (f11 + f21) / (f12 + f22));
2436 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2437 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2440 /* Calculate Mantel-Haenszel. */
2441 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2443 double r, ase_0, ase_1;
2444 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2446 chisq[4] = (W - 1.) * r * r;
2451 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2452 ASE_1, and ase_0 into ASE_0. The row and column values must be
2453 passed in X and Y. */
2455 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2457 double SX, SY, S, T;
2459 double sum_XYf, sum_X2Y2f;
2460 double sum_Xr, sum_X2r;
2461 double sum_Yc, sum_Y2c;
2464 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2465 for (j = 0; j < n_cols; j++)
2467 double fij = mat[j + i * n_cols];
2468 double product = X[i] * Y[j];
2469 double temp = fij * product;
2471 sum_X2Y2f += temp * product;
2474 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2476 sum_Xr += X[i] * row_tot[i];
2477 sum_X2r += X[i] * X[i] * row_tot[i];
2481 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2483 sum_Yc += Y[i] * col_tot[i];
2484 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2488 S = sum_XYf - sum_Xr * sum_Yc / W;
2489 SX = sum_X2r - sum_Xr * sum_Xr / W;
2490 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2493 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2498 for (s = c = 0., i = 0; i < n_rows; i++)
2499 for (j = 0; j < n_cols; j++)
2501 double Xresid, Yresid;
2504 Xresid = X[i] - Xbar;
2505 Yresid = Y[j] - Ybar;
2506 temp = (T * Xresid * Yresid
2508 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2509 y = mat[j + i * n_cols] * temp * temp - c;
2514 *ase_1 = sqrt (s) / (T * T);
2518 static double somers_d_v[3];
2519 static double somers_d_ase[3];
2520 static double somers_d_t[3];
2522 /* Calculate symmetric statistics and their asymptotic standard
2523 errors. Returns 0 if none could be calculated. */
2525 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2526 double t[N_SYMMETRIC])
2528 int q = MIN (ns_rows, ns_cols);
2537 for (i = 0; i < N_SYMMETRIC; i++)
2538 v[i] = ase[i] = t[i] = SYSMIS;
2541 /* Phi, Cramer's V, contingency coefficient. */
2542 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2544 double Xp = 0.; /* Pearson chi-square. */
2549 for (r = 0; r < n_rows; r++)
2550 for (c = 0; c < n_cols; c++)
2552 const double expected = row_tot[r] * col_tot[c] / W;
2553 const double freq = mat[n_cols * r + c];
2554 const double residual = freq - expected;
2556 Xp += residual * residual / expected;
2560 if (cmd.a_statistics[CRS_ST_PHI])
2562 v[0] = sqrt (Xp / W);
2563 v[1] = sqrt (Xp / (W * (q - 1)));
2565 if (cmd.a_statistics[CRS_ST_CC])
2566 v[2] = sqrt (Xp / (Xp + W));
2569 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2570 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2575 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2582 for (r = 0; r < n_rows; r++)
2583 Dr -= row_tot[r] * row_tot[r];
2584 for (c = 0; c < n_cols; c++)
2585 Dc -= col_tot[c] * col_tot[c];
2591 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2592 for (c = 0; c < n_cols; c++)
2596 for (r = 0; r < n_rows; r++)
2597 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2607 for (i = 0; i < n_rows; i++)
2611 for (j = 1; j < n_cols; j++)
2612 Cij += col_tot[j] - cum[j + i * n_cols];
2615 for (j = 1; j < n_cols; j++)
2616 Dij += cum[j + (i - 1) * n_cols];
2620 double fij = mat[j + i * n_cols];
2626 assert (j < n_cols);
2628 Cij -= col_tot[j] - cum[j + i * n_cols];
2629 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2633 Cij += cum[j - 1 + (i - 1) * n_cols];
2634 Dij -= cum[j + (i - 1) * n_cols];
2640 if (cmd.a_statistics[CRS_ST_BTAU])
2641 v[3] = (P - Q) / sqrt (Dr * Dc);
2642 if (cmd.a_statistics[CRS_ST_CTAU])
2643 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2644 if (cmd.a_statistics[CRS_ST_GAMMA])
2645 v[5] = (P - Q) / (P + Q);
2647 /* ASE for tau-b, tau-c, gamma. Calculations could be
2648 eliminated here, at expense of memory. */
2653 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2654 for (i = 0; i < n_rows; i++)
2658 for (j = 1; j < n_cols; j++)
2659 Cij += col_tot[j] - cum[j + i * n_cols];
2662 for (j = 1; j < n_cols; j++)
2663 Dij += cum[j + (i - 1) * n_cols];
2667 double fij = mat[j + i * n_cols];
2669 if (cmd.a_statistics[CRS_ST_BTAU])
2671 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2672 + v[3] * (row_tot[i] * Dc
2673 + col_tot[j] * Dr));
2674 btau_cum += fij * temp * temp;
2678 const double temp = Cij - Dij;
2679 ctau_cum += fij * temp * temp;
2682 if (cmd.a_statistics[CRS_ST_GAMMA])
2684 const double temp = Q * Cij - P * Dij;
2685 gamma_cum += fij * temp * temp;
2688 if (cmd.a_statistics[CRS_ST_D])
2690 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2691 - (P - Q) * (W - row_tot[i]));
2692 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2693 - (Q - P) * (W - col_tot[j]));
2698 assert (j < n_cols);
2700 Cij -= col_tot[j] - cum[j + i * n_cols];
2701 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2705 Cij += cum[j - 1 + (i - 1) * n_cols];
2706 Dij -= cum[j + (i - 1) * n_cols];
2712 btau_var = ((btau_cum
2713 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2715 if (cmd.a_statistics[CRS_ST_BTAU])
2717 ase[3] = sqrt (btau_var);
2718 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2721 if (cmd.a_statistics[CRS_ST_CTAU])
2723 ase[4] = ((2 * q / ((q - 1) * W * W))
2724 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2725 t[4] = v[4] / ase[4];
2727 if (cmd.a_statistics[CRS_ST_GAMMA])
2729 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2730 t[5] = v[5] / (2. / (P + Q)
2731 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2733 if (cmd.a_statistics[CRS_ST_D])
2735 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2736 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2737 somers_d_t[0] = (somers_d_v[0]
2739 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2740 somers_d_v[1] = (P - Q) / Dc;
2741 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2742 somers_d_t[1] = (somers_d_v[1]
2744 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2745 somers_d_v[2] = (P - Q) / Dr;
2746 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2747 somers_d_t[2] = (somers_d_v[2]
2749 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2755 /* Spearman correlation, Pearson's r. */
2756 if (cmd.a_statistics[CRS_ST_CORR])
2758 double *R = xmalloca (sizeof *R * n_rows);
2759 double *C = xmalloca (sizeof *C * n_cols);
2762 double y, t, c = 0., s = 0.;
2767 R[i] = s + (row_tot[i] + 1.) / 2.;
2774 assert (i < n_rows);
2779 double y, t, c = 0., s = 0.;
2784 C[j] = s + (col_tot[j] + 1.) / 2;
2791 assert (j < n_cols);
2795 calc_r (R, C, &v[6], &t[6], &ase[6]);
2801 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2805 /* Cohen's kappa. */
2806 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2808 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2811 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2812 i < ns_rows; i++, j++)
2816 while (col_tot[j] == 0.)
2819 prod = row_tot[i] * col_tot[j];
2820 sum = row_tot[i] + col_tot[j];
2822 sum_fii += mat[j + i * n_cols];
2824 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2825 sum_riciri_ci += prod * sum;
2827 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2828 for (j = 0; j < ns_cols; j++)
2830 double sum = row_tot[i] + col_tot[j];
2831 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2834 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2836 ase[8] = sqrt ((W * W * sum_rici
2837 + sum_rici * sum_rici
2838 - W * sum_riciri_ci)
2839 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2841 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2842 / pow2 (W * W - sum_rici))
2843 + ((2. * (W - sum_fii)
2844 * (2. * sum_fii * sum_rici
2845 - W * sum_fiiri_ci))
2846 / cube (W * W - sum_rici))
2847 + (pow2 (W - sum_fii)
2848 * (W * sum_fijri_ci2 - 4.
2849 * sum_rici * sum_rici)
2850 / pow4 (W * W - sum_rici))));
2852 t[8] = v[8] / ase[8];
2859 /* Calculate risk estimate. */
2861 calc_risk (double *value, double *upper, double *lower, union value *c)
2863 double f11, f12, f21, f22;
2869 for (i = 0; i < 3; i++)
2870 value[i] = upper[i] = lower[i] = SYSMIS;
2873 if (ns_rows != 2 || ns_cols != 2)
2880 for (i = j = 0; i < n_cols; i++)
2881 if (col_tot[i] != 0.)
2890 f11 = mat[nz_cols[0]];
2891 f12 = mat[nz_cols[1]];
2892 f21 = mat[nz_cols[0] + n_cols];
2893 f22 = mat[nz_cols[1] + n_cols];
2895 c[0] = cols[nz_cols[0]];
2896 c[1] = cols[nz_cols[1]];
2899 value[0] = (f11 * f22) / (f12 * f21);
2900 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2901 lower[0] = value[0] * exp (-1.960 * v);
2902 upper[0] = value[0] * exp (1.960 * v);
2904 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2905 v = sqrt ((f12 / (f11 * (f11 + f12)))
2906 + (f22 / (f21 * (f21 + f22))));
2907 lower[1] = value[1] * exp (-1.960 * v);
2908 upper[1] = value[1] * exp (1.960 * v);
2910 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2911 v = sqrt ((f11 / (f12 * (f11 + f12)))
2912 + (f21 / (f22 * (f21 + f22))));
2913 lower[2] = value[2] * exp (-1.960 * v);
2914 upper[2] = value[2] * exp (1.960 * v);
2919 /* Calculate directional measures. */
2921 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2922 double t[N_DIRECTIONAL])
2927 for (i = 0; i < N_DIRECTIONAL; i++)
2928 v[i] = ase[i] = t[i] = SYSMIS;
2932 if (cmd.a_statistics[CRS_ST_LAMBDA])
2934 double *fim = xnmalloc (n_rows, sizeof *fim);
2935 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2936 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2937 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2938 double sum_fim, sum_fmj;
2940 int rm_index, cm_index;
2943 /* Find maximum for each row and their sum. */
2944 for (sum_fim = 0., i = 0; i < n_rows; i++)
2946 double max = mat[i * n_cols];
2949 for (j = 1; j < n_cols; j++)
2950 if (mat[j + i * n_cols] > max)
2952 max = mat[j + i * n_cols];
2956 sum_fim += fim[i] = max;
2957 fim_index[i] = index;
2960 /* Find maximum for each column. */
2961 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2963 double max = mat[j];
2966 for (i = 1; i < n_rows; i++)
2967 if (mat[j + i * n_cols] > max)
2969 max = mat[j + i * n_cols];
2973 sum_fmj += fmj[j] = max;
2974 fmj_index[j] = index;
2977 /* Find maximum row total. */
2980 for (i = 1; i < n_rows; i++)
2981 if (row_tot[i] > rm)
2987 /* Find maximum column total. */
2990 for (j = 1; j < n_cols; j++)
2991 if (col_tot[j] > cm)
2997 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2998 v[1] = (sum_fmj - rm) / (W - rm);
2999 v[2] = (sum_fim - cm) / (W - cm);
3001 /* ASE1 for Y given X. */
3005 for (accum = 0., i = 0; i < n_rows; i++)
3006 for (j = 0; j < n_cols; j++)
3008 const int deltaj = j == cm_index;
3009 accum += (mat[j + i * n_cols]
3010 * pow2 ((j == fim_index[i])
3015 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3018 /* ASE0 for Y given X. */
3022 for (accum = 0., i = 0; i < n_rows; i++)
3023 if (cm_index != fim_index[i])
3024 accum += (mat[i * n_cols + fim_index[i]]
3025 + mat[i * n_cols + cm_index]);
3026 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3029 /* ASE1 for X given Y. */
3033 for (accum = 0., i = 0; i < n_rows; i++)
3034 for (j = 0; j < n_cols; j++)
3036 const int deltaj = i == rm_index;
3037 accum += (mat[j + i * n_cols]
3038 * pow2 ((i == fmj_index[j])
3043 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3046 /* ASE0 for X given Y. */
3050 for (accum = 0., j = 0; j < n_cols; j++)
3051 if (rm_index != fmj_index[j])
3052 accum += (mat[j + n_cols * fmj_index[j]]
3053 + mat[j + n_cols * rm_index]);
3054 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3057 /* Symmetric ASE0 and ASE1. */
3062 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3063 for (j = 0; j < n_cols; j++)
3065 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3066 int temp1 = (i == rm_index) + (j == cm_index);
3067 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3068 accum1 += (mat[j + i * n_cols]
3069 * pow2 (temp0 + (v[0] - 1.) * temp1));
3071 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3072 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3073 / (2. * W - rm - cm));
3082 double sum_fij2_ri, sum_fij2_ci;
3083 double sum_ri2, sum_cj2;
3085 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3086 for (j = 0; j < n_cols; j++)
3088 double temp = pow2 (mat[j + i * n_cols]);
3089 sum_fij2_ri += temp / row_tot[i];
3090 sum_fij2_ci += temp / col_tot[j];
3093 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3094 sum_ri2 += row_tot[i] * row_tot[i];
3096 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3097 sum_cj2 += col_tot[j] * col_tot[j];
3099 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3100 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3104 if (cmd.a_statistics[CRS_ST_UC])
3106 double UX, UY, UXY, P;
3107 double ase1_yx, ase1_xy, ase1_sym;
3110 for (UX = 0., i = 0; i < n_rows; i++)
3111 if (row_tot[i] > 0.)
3112 UX -= row_tot[i] / W * log (row_tot[i] / W);
3114 for (UY = 0., j = 0; j < n_cols; j++)
3115 if (col_tot[j] > 0.)
3116 UY -= col_tot[j] / W * log (col_tot[j] / W);
3118 for (UXY = P = 0., i = 0; i < n_rows; i++)
3119 for (j = 0; j < n_cols; j++)
3121 double entry = mat[j + i * n_cols];
3126 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3127 UXY -= entry / W * log (entry / W);
3130 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3131 for (j = 0; j < n_cols; j++)
3133 double entry = mat[j + i * n_cols];
3138 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3139 + (UX - UXY) * log (col_tot[j] / W));
3140 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3141 + (UY - UXY) * log (row_tot[i] / W));
3142 ase1_sym += entry * pow2 ((UXY
3143 * log (row_tot[i] * col_tot[j] / (W * W)))
3144 - (UX + UY) * log (entry / W));
3147 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3148 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3149 t[5] = v[5] / ((2. / (W * (UX + UY)))
3150 * sqrt (P - pow2 (UX + UY - UXY) / W));
3152 v[6] = (UX + UY - UXY) / UX;
3153 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3154 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3156 v[7] = (UX + UY - UXY) / UY;
3157 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3158 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3162 if (cmd.a_statistics[CRS_ST_D])
3167 calc_symmetric (NULL, NULL, NULL);
3168 for (i = 0; i < 3; i++)
3170 v[8 + i] = somers_d_v[i];
3171 ase[8 + i] = somers_d_ase[i];
3172 t[8 + i] = somers_d_t[i];
3177 if (cmd.a_statistics[CRS_ST_ETA])
3180 double sum_Xr, sum_X2r;
3184 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3186 sum_Xr += rows[i].f * row_tot[i];
3187 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3189 SX = sum_X2r - sum_Xr * sum_Xr / W;
3191 for (SXW = 0., j = 0; j < n_cols; j++)
3195 for (cum = 0., i = 0; i < n_rows; i++)
3197 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3198 cum += rows[i].f * mat[j + i * n_cols];
3201 SXW -= cum * cum / col_tot[j];
3203 v[11] = sqrt (1. - SXW / SX);
3207 double sum_Yc, sum_Y2c;
3211 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3213 sum_Yc += cols[i].f * col_tot[i];
3214 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3216 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3218 for (SYW = 0., i = 0; i < n_rows; i++)
3222 for (cum = 0., j = 0; j < n_cols; j++)
3224 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3225 cum += cols[j].f * mat[j + i * n_cols];
3228 SYW -= cum * cum / row_tot[i];
3230 v[12] = sqrt (1. - SYW / SY);
3237 /* A wrapper around data_out() that limits string output to short
3238 string width and null terminates the result. */
3240 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3242 struct fmt_spec fmt_subst;
3244 /* Limit to short string width. */
3245 if (fmt_is_string (fp->type))
3249 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3250 if (fmt_subst.type == FMT_A)
3251 fmt_subst.w = MIN (8, fmt_subst.w);
3253 fmt_subst.w = MIN (16, fmt_subst.w);
3259 data_out (v, fp, s);
3261 /* Null terminate. */