1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 const struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (const struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static const struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write_style; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (struct casereader *, const struct dataset *);
181 static void calc_general (struct ccase *, const struct dataset *);
182 static void calc_integer (struct ccase *, const struct dataset *);
183 static void postcalc (void);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
197 pool_destroy (pl_tc);
198 pool_destroy (pl_col);
200 for (i = 0; i < nxtab; i++)
207 /* Parses and executes the CROSSTABS procedure. */
209 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
211 struct casegrouper *grouper;
212 struct casereader *input, *group;
220 pl_tc = pool_create ();
221 pl_col = pool_create ();
223 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
226 mode = variables ? INTEGER : GENERAL;
231 cmd.a_cells[CRS_CL_COUNT] = 1;
237 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_COUNT] = 1;
243 cmd.a_cells[CRS_CL_ROW] = 1;
244 cmd.a_cells[CRS_CL_COLUMN] = 1;
245 cmd.a_cells[CRS_CL_TOTAL] = 1;
247 if (cmd.a_cells[CRS_CL_ALL])
249 for (i = 0; i < CRS_CL_count; i++)
251 cmd.a_cells[CRS_CL_ALL] = 0;
253 cmd.a_cells[CRS_CL_NONE] = 0;
255 for (num_cells = i = 0; i < CRS_CL_count; i++)
257 cells[num_cells++] = i;
260 if (cmd.sbc_statistics)
265 for (i = 0; i < CRS_ST_count; i++)
266 if (cmd.a_statistics[i])
269 cmd.a_statistics[CRS_ST_CHISQ] = 1;
270 if (cmd.a_statistics[CRS_ST_ALL])
271 for (i = 0; i < CRS_ST_count; i++)
272 cmd.a_statistics[i] = 1;
276 if (cmd.miss == CRS_REPORT && mode == GENERAL)
278 msg (SE, _("Missing mode REPORT not allowed in general mode. "
279 "Assuming MISSING=TABLE."));
280 cmd.miss = CRS_TABLE;
284 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
285 cmd.a_write[CRS_WR_ALL] = 0;
286 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
288 msg (SE, _("Write mode ALL not allowed in general mode. "
289 "Assuming WRITE=CELLS."));
290 cmd.a_write[CRS_WR_CELLS] = 1;
293 && (cmd.a_write[CRS_WR_NONE]
294 + cmd.a_write[CRS_WR_ALL]
295 + cmd.a_write[CRS_WR_CELLS] == 0))
296 cmd.a_write[CRS_WR_CELLS] = 1;
297 if (cmd.a_write[CRS_WR_CELLS])
298 write_style = CRS_WR_CELLS;
299 else if (cmd.a_write[CRS_WR_ALL])
300 write_style = CRS_WR_ALL;
302 write_style = CRS_WR_NONE;
304 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
306 grouper = casegrouper_create_splits (input, dataset_dict (ds));
307 while (casegrouper_get_next_group (grouper, &group))
313 for (; casereader_read (group, &c); case_destroy (&c))
316 calc_general (&c, ds);
318 calc_integer (&c, ds);
320 casereader_destroy (group);
324 ok = casegrouper_destroy (grouper);
325 ok = proc_commit (ds) && ok;
327 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
330 /* Parses the TABLES subcommand. */
332 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
334 struct const_var_set *var_set;
336 const struct variable ***by = NULL;
337 size_t *by_nvar = NULL;
341 /* Ensure that this is a TABLES subcommand. */
342 if (!lex_match_id (lexer, "TABLES")
343 && (lex_token (lexer) != T_ID ||
344 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
345 && lex_token (lexer) != T_ALL)
347 lex_match (lexer, '=');
349 if (variables != NULL)
350 var_set = const_var_set_create_from_array (variables, variables_cnt);
352 var_set = const_var_set_create_from_dict (dataset_dict (ds));
353 assert (var_set != NULL);
357 by = xnrealloc (by, n_by + 1, sizeof *by);
358 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
359 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
360 PV_NO_DUPLICATE | PV_NO_SCRATCH))
362 if (xalloc_oversized (nx, by_nvar[n_by]))
364 msg (SE, _("Too many cross-tabulation variables or dimensions."));
370 if (!lex_match (lexer, T_BY))
374 lex_error (lexer, _("expecting BY"));
383 int *by_iter = xcalloc (n_by, sizeof *by_iter);
386 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
387 for (i = 0; i < nx; i++)
391 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
398 for (i = 0; i < n_by; i++)
399 x->vars[i] = by[i][by_iter[i]];
405 for (i = n_by - 1; i >= 0; i--)
407 if (++by_iter[i] < by_nvar[i])
420 /* All return paths lead here. */
424 for (i = 0; i < n_by; i++)
430 const_var_set_destroy (var_set);
435 /* Parses the VARIABLES subcommand. */
437 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
441 msg (SE, _("VARIABLES must be specified before TABLES."));
445 lex_match (lexer, '=');
449 size_t orig_nv = variables_cnt;
454 if (!parse_variables_const (lexer, dataset_dict (ds),
455 &variables, &variables_cnt,
456 (PV_APPEND | PV_NUMERIC
457 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
460 if (lex_token (lexer) != '(')
462 lex_error (lexer, "expecting `('");
467 if (!lex_force_int (lexer))
469 min = lex_integer (lexer);
472 lex_match (lexer, ',');
474 if (!lex_force_int (lexer))
476 max = lex_integer (lexer);
479 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
485 if (lex_token (lexer) != ')')
487 lex_error (lexer, "expecting `)'");
492 for (i = orig_nv; i < variables_cnt; i++)
494 struct var_range *vr = xmalloc (sizeof *vr);
497 vr->count = max - min + 1;
498 var_attach_aux (variables[i], vr, var_dtor_free);
501 if (lex_token (lexer) == '/')
513 /* Data file processing. */
515 static int compare_table_entry (const void *, const void *, const void *);
516 static unsigned hash_table_entry (const void *, const void *);
518 /* Set up the crosstabulation tables for processing. */
520 precalc (struct casereader *input, const struct dataset *ds)
524 if (!casereader_peek (input, 0, &c))
526 output_split_file_values (ds, &c);
531 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
541 for (i = 0; i < nxtab; i++)
543 struct crosstab *x = xtab[i];
548 x->ofs = n_sorted_tab;
550 for (j = 2; j < x->nvar; j++)
551 count *= get_var_range (x->vars[j - 2])->count;
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + count, sizeof *sorted_tab);
555 v = local_alloc (sizeof *v * x->nvar);
556 for (j = 2; j < x->nvar; j++)
557 v[j] = get_var_range (x->vars[j])->min;
558 for (j = 0; j < count; j++)
560 struct table_entry *te;
563 te = sorted_tab[n_sorted_tab++]
564 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
568 int row_cnt = get_var_range (x->vars[0])->count;
569 int col_cnt = get_var_range (x->vars[1])->count;
570 const int mat_size = row_cnt * col_cnt;
573 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
574 for (m = 0; m < mat_size; m++)
578 for (k = 2; k < x->nvar; k++)
579 te->values[k].f = v[k];
580 for (k = 2; k < x->nvar; k++)
582 struct var_range *vr = get_var_range (x->vars[k]);
583 if (++v[k] >= vr->max)
592 sorted_tab = xnrealloc (sorted_tab,
593 n_sorted_tab + 1, sizeof *sorted_tab);
594 sorted_tab[n_sorted_tab] = NULL;
599 /* Form crosstabulations for general mode. */
601 calc_general (struct ccase *c, const struct dataset *ds)
603 /* Missing values to exclude. */
604 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
605 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
609 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
611 /* Flattened current table index. */
614 for (t = 0; t < nxtab; t++)
616 struct crosstab *x = xtab[t];
617 const size_t entry_size = (sizeof (struct table_entry)
618 + sizeof (union value) * (x->nvar - 1));
619 struct table_entry *te = local_alloc (entry_size);
621 /* Construct table entry for the current record and table. */
627 for (j = 0; j < x->nvar; j++)
629 const union value *v = case_data (c, x->vars[j]);
630 if (var_is_value_missing (x->vars[j], v, exclude))
632 x->missing += weight;
636 if (var_is_numeric (x->vars[j]))
637 te->values[j].f = case_num (c, x->vars[j]);
640 memcpy (te->values[j].s, case_str (c, x->vars[j]),
641 var_get_width (x->vars[j]));
643 /* Necessary in order to simplify comparisons. */
644 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
645 sizeof (union value) - var_get_width (x->vars[j]));
650 /* Add record to hash table. */
652 struct table_entry **tepp
653 = (struct table_entry **) hsh_probe (gen_tab, te);
656 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
659 memcpy (tep, te, entry_size);
664 (*tepp)->u.freq += weight;
673 calc_integer (struct ccase *c, const struct dataset *ds)
675 bool bad_warn = true;
678 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
680 /* Flattened current table index. */
683 for (t = 0; t < nxtab; t++)
685 struct crosstab *x = xtab[t];
690 for (i = 0; i < x->nvar; i++)
692 const struct variable *const v = x->vars[i];
693 struct var_range *vr = get_var_range (v);
694 double value = case_num (c, v);
696 /* Note that the first test also rules out SYSMIS. */
697 if ((value < vr->min || value >= vr->max)
698 || (cmd.miss == CRS_TABLE
699 && var_is_num_missing (v, value, MV_USER)))
701 x->missing += weight;
707 ofs += fact * ((int) value - vr->min);
713 const struct variable *row_var = x->vars[ROW_VAR];
714 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
716 const struct variable *col_var = x->vars[COL_VAR];
717 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
719 const int col_dim = get_var_range (col_var)->count;
721 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
728 /* Compare the table_entry's at A and B and return a strcmp()-type
731 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
733 const struct table_entry *a = a_;
734 const struct table_entry *b = b_;
736 if (a->table > b->table)
738 else if (a->table < b->table)
742 const struct crosstab *x = xtab[a->table];
745 for (i = x->nvar - 1; i >= 0; i--)
746 if (var_is_numeric (x->vars[i]))
748 const double diffnum = a->values[i].f - b->values[i].f;
751 else if (diffnum > 0)
756 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
757 var_get_width (x->vars[i]));
766 /* Calculate a hash value from table_entry A. */
768 hash_table_entry (const void *a_, const void *aux UNUSED)
770 const struct table_entry *a = a_;
775 for (i = 0; i < xtab[a->table]->nvar; i++)
776 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
781 /* Post-data reading calculations. */
783 static struct table_entry **find_pivot_extent (struct table_entry **,
784 int *cnt, int pivot);
785 static void enum_var_values (struct table_entry **entries, int entry_cnt,
787 union value **values, int *value_cnt);
788 static void output_pivot_table (struct table_entry **, struct table_entry **,
789 double **, double **, double **,
790 int *, int *, int *);
791 static void make_summary_table (void);
798 n_sorted_tab = hsh_count (gen_tab);
799 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
802 make_summary_table ();
804 /* Identify all the individual crosstabulation tables, and deal with
807 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
808 int pc = n_sorted_tab; /* Pivot count. */
810 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
811 int maxrows = 0, maxcols = 0, maxcells = 0;
815 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
819 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
820 &maxrows, &maxcols, &maxcells);
829 hsh_destroy (gen_tab);
833 for (i = 0; i < n_sorted_tab; i++)
835 free (sorted_tab[i]->u.data);
836 free (sorted_tab[i]);
842 static void insert_summary (struct tab_table *, int tab_index, double valid);
844 /* Output a table summarizing the cases processed. */
846 make_summary_table (void)
848 struct tab_table *summary;
850 struct table_entry **pb = sorted_tab, **pe;
851 int pc = n_sorted_tab;
854 summary = tab_create (7, 3 + nxtab, 1);
855 tab_title (summary, _("Summary."));
856 tab_headers (summary, 1, 0, 3, 0);
857 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
858 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
859 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
860 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
861 tab_hline (summary, TAL_1, 1, 6, 1);
862 tab_hline (summary, TAL_1, 1, 6, 2);
863 tab_vline (summary, TAL_1, 3, 1, 1);
864 tab_vline (summary, TAL_1, 5, 1, 1);
868 for (i = 0; i < 3; i++)
870 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
871 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
874 tab_offset (summary, 0, 3);
880 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
884 while (cur_tab < (*pb)->table)
885 insert_summary (summary, cur_tab++, 0.);
888 for (valid = 0.; pb < pe; pb++)
889 valid += (*pb)->u.freq;
892 const struct crosstab *const x = xtab[(*pb)->table];
893 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
894 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
895 const int count = n_cols * n_rows;
897 for (valid = 0.; pb < pe; pb++)
899 const double *data = (*pb)->u.data;
902 for (i = 0; i < count; i++)
906 insert_summary (summary, cur_tab++, valid);
911 while (cur_tab < nxtab)
912 insert_summary (summary, cur_tab++, 0.);
917 /* Inserts a line into T describing the crosstabulation at index
918 TAB_INDEX, which has VALID valid observations. */
920 insert_summary (struct tab_table *t, int tab_index, double valid)
922 struct crosstab *x = xtab[tab_index];
924 tab_hline (t, TAL_1, 0, 6, 0);
926 /* Crosstabulation name. */
928 char *buf = local_alloc (128 * x->nvar);
932 for (i = 0; i < x->nvar; i++)
935 cp = stpcpy (cp, " * ");
937 cp = stpcpy (cp, var_to_string (x->vars[i]));
939 tab_text (t, 0, 0, TAB_LEFT, buf);
944 /* Counts and percentages. */
954 for (i = 0; i < 3; i++)
956 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
957 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
968 static struct tab_table *table; /* Crosstabulation table. */
969 static struct tab_table *chisq; /* Chi-square table. */
970 static struct tab_table *sym; /* Symmetric measures table. */
971 static struct tab_table *risk; /* Risk estimate table. */
972 static struct tab_table *direct; /* Directional measures table. */
975 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
977 /* Column values, number of columns. */
978 static union value *cols;
981 /* Row values, number of rows. */
982 static union value *rows;
985 /* Number of statistically interesting columns/rows (columns/rows with
987 static int ns_cols, ns_rows;
989 /* Crosstabulation. */
990 static const struct crosstab *x;
992 /* Number of variables from the crosstabulation to consider. This is
993 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
996 /* Matrix contents. */
997 static double *mat; /* Matrix proper. */
998 static double *row_tot; /* Row totals. */
999 static double *col_tot; /* Column totals. */
1000 static double W; /* Grand total. */
1002 static void display_dimensions (struct tab_table *, int first_difference,
1003 struct table_entry *);
1004 static void display_crosstabulation (void);
1005 static void display_chisq (void);
1006 static void display_symmetric (void);
1007 static void display_risk (void);
1008 static void display_directional (void);
1009 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1010 static void table_value_missing (struct tab_table *table, int c, int r,
1011 unsigned char opt, const union value *v,
1012 const struct variable *var);
1013 static void delete_missing (void);
1015 /* Output pivot table beginning at PB and continuing until PE,
1016 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1017 hold *MAXROWS entries. */
1019 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1020 double **matp, double **row_totp, double **col_totp,
1021 int *maxrows, int *maxcols, int *maxcells)
1024 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1025 int tc = pe - pb; /* Table count. */
1027 /* Table entry for header comparison. */
1028 struct table_entry *cmp = NULL;
1030 x = xtab[(*pb)->table];
1031 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1033 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1035 /* Crosstabulation table initialization. */
1038 table = tab_create (nvar + n_cols,
1039 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1040 tab_headers (table, nvar - 1, 0, 2, 0);
1042 /* First header line. */
1043 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1044 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1046 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1048 /* Second header line. */
1052 for (i = 2; i < nvar; i++)
1053 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1054 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1055 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1056 var_get_name (x->vars[ROW_VAR]));
1057 for (i = 0; i < n_cols; i++)
1058 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1060 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1063 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1064 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1068 char *title = local_alloc (x->nvar * 64 + 128);
1072 if (cmd.pivot == CRS_PIVOT)
1073 for (i = 0; i < nvar; i++)
1076 cp = stpcpy (cp, " by ");
1077 cp = stpcpy (cp, var_get_name (x->vars[i]));
1081 cp = spprintf (cp, "%s by %s for",
1082 var_get_name (x->vars[0]),
1083 var_get_name (x->vars[1]));
1084 for (i = 2; i < nvar; i++)
1086 char buf[64], *bufp;
1091 cp = stpcpy (cp, var_get_name (x->vars[i]));
1093 format_short (buf, var_get_print_format (x->vars[i]),
1095 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1097 cp = stpcpy (cp, bufp);
1101 cp = stpcpy (cp, " [");
1102 for (i = 0; i < num_cells; i++)
1110 static const struct tuple cell_names[] =
1112 {CRS_CL_COUNT, N_("count")},
1113 {CRS_CL_ROW, N_("row %")},
1114 {CRS_CL_COLUMN, N_("column %")},
1115 {CRS_CL_TOTAL, N_("total %")},
1116 {CRS_CL_EXPECTED, N_("expected")},
1117 {CRS_CL_RESIDUAL, N_("residual")},
1118 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1119 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1123 const struct tuple *t;
1125 for (t = cell_names; t->value != cells[i]; t++)
1126 assert (t->value != -1);
1128 cp = stpcpy (cp, ", ");
1129 cp = stpcpy (cp, gettext (t->name));
1133 tab_title (table, "%s", title);
1137 tab_offset (table, 0, 2);
1142 /* Chi-square table initialization. */
1143 if (cmd.a_statistics[CRS_ST_CHISQ])
1145 chisq = tab_create (6 + (nvar - 2),
1146 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1147 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1149 tab_title (chisq, _("Chi-square tests."));
1151 tab_offset (chisq, nvar - 2, 0);
1152 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1153 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1154 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1155 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1156 _("Asymp. Sig. (2-sided)"));
1157 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1158 _("Exact. Sig. (2-sided)"));
1159 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1160 _("Exact. Sig. (1-sided)"));
1162 tab_offset (chisq, 0, 1);
1167 /* Symmetric measures. */
1168 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1169 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1170 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1171 || cmd.a_statistics[CRS_ST_KAPPA])
1173 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1174 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1175 tab_title (sym, _("Symmetric measures."));
1177 tab_offset (sym, nvar - 2, 0);
1178 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1179 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1180 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1181 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1182 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1183 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1184 tab_offset (sym, 0, 1);
1189 /* Risk estimate. */
1190 if (cmd.a_statistics[CRS_ST_RISK])
1192 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1193 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1194 tab_title (risk, _("Risk estimate."));
1196 tab_offset (risk, nvar - 2, 0);
1197 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1198 _("95%% Confidence Interval"));
1199 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1200 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1201 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1202 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1203 tab_hline (risk, TAL_1, 2, 3, 1);
1204 tab_vline (risk, TAL_1, 2, 0, 1);
1205 tab_offset (risk, 0, 2);
1210 /* Directional measures. */
1211 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1212 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1214 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1215 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1216 tab_title (direct, _("Directional measures."));
1218 tab_offset (direct, nvar - 2, 0);
1219 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1220 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1221 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1222 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1223 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1224 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1225 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1226 tab_offset (direct, 0, 1);
1233 /* Find pivot subtable if applicable. */
1234 te = find_pivot_extent (tb, &tc, 0);
1238 /* Find all the row variable values. */
1239 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1241 /* Allocate memory space for the column and row totals. */
1242 if (n_rows > *maxrows)
1244 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1245 row_tot = *row_totp;
1248 if (n_cols > *maxcols)
1250 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1251 col_tot = *col_totp;
1255 /* Allocate table space for the matrix. */
1256 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1257 tab_realloc (table, -1,
1258 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1259 tab_nr (table) * (pe - pb) / (te - tb)));
1261 if (mode == GENERAL)
1263 /* Allocate memory space for the matrix. */
1264 if (n_cols * n_rows > *maxcells)
1266 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1267 *maxcells = n_cols * n_rows;
1272 /* Build the matrix and calculate column totals. */
1274 union value *cur_col = cols;
1275 union value *cur_row = rows;
1277 double *cp = col_tot;
1278 struct table_entry **p;
1281 for (p = &tb[0]; p < te; p++)
1283 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1287 for (; cur_row < &rows[n_rows]; cur_row++)
1293 mp = &mat[cur_col - cols];
1296 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1303 *cp += *mp = (*p)->u.freq;
1308 /* Zero out the rest of the matrix. */
1309 for (; cur_row < &rows[n_rows]; cur_row++)
1315 if (cur_col < &cols[n_cols])
1317 const int rem_cols = n_cols - (cur_col - cols);
1320 for (c = 0; c < rem_cols; c++)
1322 mp = &mat[cur_col - cols];
1323 for (r = 0; r < n_rows; r++)
1325 for (c = 0; c < rem_cols; c++)
1327 mp += n_cols - rem_cols;
1335 double *tp = col_tot;
1337 assert (mode == INTEGER);
1338 mat = (*tb)->u.data;
1341 /* Calculate column totals. */
1342 for (c = 0; c < n_cols; c++)
1345 double *cp = &mat[c];
1347 for (r = 0; r < n_rows; r++)
1348 cum += cp[r * n_cols];
1356 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1357 ns_cols += *cp != 0.;
1360 /* Calculate row totals. */
1363 double *rp = row_tot;
1366 for (ns_rows = 0, r = n_rows; r--; )
1369 for (c = n_cols; c--; )
1377 /* Calculate grand total. */
1383 if (n_rows < n_cols)
1384 tp = row_tot, n = n_rows;
1386 tp = col_tot, n = n_cols;
1392 /* Find the first variable that differs from the last subtable,
1393 then display the values of the dimensioning variables for
1394 each table that needs it. */
1396 int first_difference = nvar - 1;
1399 for (; ; first_difference--)
1401 assert (first_difference >= 2);
1402 if (memcmp (&cmp->values[first_difference],
1403 &(*tb)->values[first_difference],
1404 sizeof *cmp->values))
1410 display_dimensions (table, first_difference, *tb);
1412 display_dimensions (chisq, first_difference, *tb);
1414 display_dimensions (sym, first_difference, *tb);
1416 display_dimensions (risk, first_difference, *tb);
1418 display_dimensions (direct, first_difference, *tb);
1422 display_crosstabulation ();
1423 if (cmd.miss == CRS_REPORT)
1428 display_symmetric ();
1432 display_directional ();
1443 tab_resize (chisq, 4 + (nvar - 2), -1);
1454 /* Delete missing rows and columns for statistical analysis when
1457 delete_missing (void)
1462 for (r = 0; r < n_rows; r++)
1463 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1467 for (c = 0; c < n_cols; c++)
1468 mat[c + r * n_cols] = 0.;
1476 for (c = 0; c < n_cols; c++)
1477 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1481 for (r = 0; r < n_rows; r++)
1482 mat[c + r * n_cols] = 0.;
1488 /* Prepare table T for submission, and submit it. */
1490 submit (struct tab_table *t)
1497 tab_resize (t, -1, 0);
1498 if (tab_nr (t) == tab_t (t))
1503 tab_offset (t, 0, 0);
1505 for (i = 2; i < nvar; i++)
1506 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1507 var_to_string (x->vars[i]));
1508 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1509 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1511 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1513 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1514 tab_dim (t, crosstabs_dim);
1518 /* Sets the widths of all the columns and heights of all the rows in
1519 table T for driver D. */
1521 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1525 /* Width of a numerical column. */
1526 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1527 if (cmd.miss == CRS_REPORT)
1528 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1530 /* Set width for header columns. */
1536 w = d->width - c * (t->nc - t->l);
1537 for (i = 0; i <= t->nc; i++)
1541 if (w < d->prop_em_width * 8)
1542 w = d->prop_em_width * 8;
1544 if (w > d->prop_em_width * 15)
1545 w = d->prop_em_width * 15;
1547 for (i = 0; i < t->l; i++)
1551 for (i = t->l; i < t->nc; i++)
1554 for (i = 0; i < t->nr; i++)
1555 t->h[i] = tab_natural_height (t, d, i);
1558 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1559 int *cnt, int pivot);
1560 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1561 int *cnt, int pivot);
1563 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1565 static struct table_entry **
1566 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1568 return (mode == GENERAL
1569 ? find_pivot_extent_general (tp, cnt, pivot)
1570 : find_pivot_extent_integer (tp, cnt, pivot));
1573 /* Find the extent of a region in TP that contains one table. If
1574 PIVOT != 0 that means a set of table entries with identical table
1575 number; otherwise they also have to have the same values for every
1576 dimension after the row and column dimensions. The table that is
1577 searched starts at TP and has length CNT. Returns the first entry
1578 after the last one in the table; sets *CNT to the number of
1579 remaining values. If there are no entries in TP at all, returns
1580 NULL. A yucky interface, admittedly, but it works. */
1581 static struct table_entry **
1582 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1584 struct table_entry *fp = *tp;
1589 x = xtab[(*tp)->table];
1597 if ((*tp)->table != fp->table)
1602 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1609 /* Integer mode correspondent to find_pivot_extent_general(). This
1610 could be optimized somewhat, but I just don't give a crap about
1611 CROSSTABS performance in integer mode, which is just a
1612 CROSSTABS wart as far as I'm concerned.
1614 That said, feel free to send optimization patches to me. */
1615 static struct table_entry **
1616 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1618 struct table_entry *fp = *tp;
1623 x = xtab[(*tp)->table];
1631 if ((*tp)->table != fp->table)
1636 if (memcmp (&(*tp)->values[2], &fp->values[2],
1637 sizeof (union value) * (x->nvar - 2)))
1644 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1645 result. WIDTH_ points to an int which is either 0 for a
1646 numeric value or a string width for a string value. */
1648 compare_value (const void *a_, const void *b_, const void *width_)
1650 const union value *a = a_;
1651 const union value *b = b_;
1652 const int *pwidth = width_;
1653 const int width = *pwidth;
1656 return (a->f < b->f) ? -1 : (a->f > b->f);
1658 return strncmp (a->s, b->s, width);
1661 /* Given an array of ENTRY_CNT table_entry structures starting at
1662 ENTRIES, creates a sorted list of the values that the variable
1663 with index VAR_IDX takes on. The values are returned as a
1664 malloc()'darray stored in *VALUES, with the number of values
1665 stored in *VALUE_CNT.
1668 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1669 union value **values, int *value_cnt)
1671 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1673 if (mode == GENERAL)
1675 int width = var_get_width (v);
1678 *values = xnmalloc (entry_cnt, sizeof **values);
1679 for (i = 0; i < entry_cnt; i++)
1680 (*values)[i] = entries[i]->values[var_idx];
1681 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1682 compare_value, &width);
1686 struct var_range *vr = get_var_range (v);
1689 assert (mode == INTEGER);
1690 *values = xnmalloc (vr->count, sizeof **values);
1691 for (i = 0; i < vr->count; i++)
1692 (*values)[i].f = i + vr->min;
1693 *value_cnt = vr->count;
1697 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1698 from V, displayed with print format spec from variable VAR. When
1699 in REPORT missing-value mode, missing values have an M appended. */
1701 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1702 const union value *v, const struct variable *var)
1705 const struct fmt_spec *print = var_get_print_format (var);
1707 const char *label = var_lookup_value_label (var, v);
1710 tab_text (table, c, r, TAB_LEFT, label);
1714 s.string = tab_alloc (table, print->w);
1715 format_short (s.string, print, v);
1716 s.length = strlen (s.string);
1717 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1718 s.string[s.length++] = 'M';
1719 while (s.length && *s.string == ' ')
1724 tab_raw (table, c, r, opt, &s);
1727 /* Draws a line across TABLE at the current row to indicate the most
1728 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1729 that changed, and puts the values that changed into the table. TB
1730 and X must be the corresponding table_entry and crosstab,
1733 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1735 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1737 for (; first_difference >= 2; first_difference--)
1738 table_value_missing (table, nvar - first_difference - 1, 0,
1739 TAB_RIGHT, &tb->values[first_difference],
1740 x->vars[first_difference]);
1743 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1744 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1745 additionally suffixed with a letter `M'. */
1747 format_cell_entry (struct tab_table *table, int c, int r, double value,
1748 char suffix, bool mark_missing)
1750 const struct fmt_spec f = {FMT_F, 10, 1};
1755 s.string = tab_alloc (table, 16);
1757 data_out (&v, &f, s.string);
1758 while (*s.string == ' ')
1764 s.string[s.length++] = suffix;
1766 s.string[s.length++] = 'M';
1768 tab_raw (table, c, r, TAB_RIGHT, &s);
1771 /* Displays the crosstabulation table. */
1773 display_crosstabulation (void)
1778 for (r = 0; r < n_rows; r++)
1779 table_value_missing (table, nvar - 2, r * num_cells,
1780 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1782 tab_text (table, nvar - 2, n_rows * num_cells,
1783 TAB_LEFT, _("Total"));
1785 /* Put in the actual cells. */
1790 tab_offset (table, nvar - 1, -1);
1791 for (r = 0; r < n_rows; r++)
1794 tab_hline (table, TAL_1, -1, n_cols, 0);
1795 for (c = 0; c < n_cols; c++)
1797 bool mark_missing = false;
1798 double expected_value = row_tot[r] * col_tot[c] / W;
1799 if (cmd.miss == CRS_REPORT
1800 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1801 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1803 mark_missing = true;
1804 for (i = 0; i < num_cells; i++)
1815 v = *mp / row_tot[r] * 100.;
1819 v = *mp / col_tot[c] * 100.;
1826 case CRS_CL_EXPECTED:
1829 case CRS_CL_RESIDUAL:
1830 v = *mp - expected_value;
1832 case CRS_CL_SRESIDUAL:
1833 v = (*mp - expected_value) / sqrt (expected_value);
1835 case CRS_CL_ASRESIDUAL:
1836 v = ((*mp - expected_value)
1837 / sqrt (expected_value
1838 * (1. - row_tot[r] / W)
1839 * (1. - col_tot[c] / W)));
1845 format_cell_entry (table, c, i, v, suffix, mark_missing);
1851 tab_offset (table, -1, tab_row (table) + num_cells);
1859 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1860 for (r = 0; r < n_rows; r++)
1863 bool mark_missing = false;
1865 if (cmd.miss == CRS_REPORT
1866 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1867 mark_missing = true;
1869 for (i = 0; i < num_cells; i++)
1883 v = row_tot[r] / W * 100.;
1887 v = row_tot[r] / W * 100.;
1890 case CRS_CL_EXPECTED:
1891 case CRS_CL_RESIDUAL:
1892 case CRS_CL_SRESIDUAL:
1893 case CRS_CL_ASRESIDUAL:
1900 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1901 tab_next_row (table);
1906 /* Column totals, grand total. */
1912 tab_hline (table, TAL_1, -1, n_cols, 0);
1913 for (c = 0; c <= n_cols; c++)
1915 double ct = c < n_cols ? col_tot[c] : W;
1916 bool mark_missing = false;
1920 if (cmd.miss == CRS_REPORT && c < n_cols
1921 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1922 mark_missing = true;
1924 for (i = 0; i < num_cells; i++)
1946 case CRS_CL_EXPECTED:
1947 case CRS_CL_RESIDUAL:
1948 case CRS_CL_SRESIDUAL:
1949 case CRS_CL_ASRESIDUAL:
1955 format_cell_entry (table, c, i, v, suffix, mark_missing);
1960 tab_offset (table, -1, tab_row (table) + last_row);
1963 tab_offset (table, 0, -1);
1966 static void calc_r (double *X, double *Y, double *, double *, double *);
1967 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1969 /* Display chi-square statistics. */
1971 display_chisq (void)
1973 static const char *chisq_stats[N_CHISQ] =
1975 N_("Pearson Chi-Square"),
1976 N_("Likelihood Ratio"),
1977 N_("Fisher's Exact Test"),
1978 N_("Continuity Correction"),
1979 N_("Linear-by-Linear Association"),
1981 double chisq_v[N_CHISQ];
1982 double fisher1, fisher2;
1988 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1990 tab_offset (chisq, nvar - 2, -1);
1992 for (i = 0; i < N_CHISQ; i++)
1994 if ((i != 2 && chisq_v[i] == SYSMIS)
1995 || (i == 2 && fisher1 == SYSMIS))
1999 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2002 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2003 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2004 tab_float (chisq, 3, 0, TAB_RIGHT,
2005 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2010 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2011 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2013 tab_next_row (chisq);
2016 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2017 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2018 tab_next_row (chisq);
2020 tab_offset (chisq, 0, -1);
2023 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2024 double[N_SYMMETRIC]);
2026 /* Display symmetric measures. */
2028 display_symmetric (void)
2030 static const char *categories[] =
2032 N_("Nominal by Nominal"),
2033 N_("Ordinal by Ordinal"),
2034 N_("Interval by Interval"),
2035 N_("Measure of Agreement"),
2038 static const char *stats[N_SYMMETRIC] =
2042 N_("Contingency Coefficient"),
2043 N_("Kendall's tau-b"),
2044 N_("Kendall's tau-c"),
2046 N_("Spearman Correlation"),
2051 static const int stats_categories[N_SYMMETRIC] =
2053 0, 0, 0, 1, 1, 1, 1, 2, 3,
2057 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2060 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2063 tab_offset (sym, nvar - 2, -1);
2065 for (i = 0; i < N_SYMMETRIC; i++)
2067 if (sym_v[i] == SYSMIS)
2070 if (stats_categories[i] != last_cat)
2072 last_cat = stats_categories[i];
2073 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2076 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2077 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2078 if (sym_ase[i] != SYSMIS)
2079 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2080 if (sym_t[i] != SYSMIS)
2081 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2082 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2086 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2087 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2090 tab_offset (sym, 0, -1);
2093 static int calc_risk (double[], double[], double[], union value *);
2095 /* Display risk estimate. */
2100 double risk_v[3], lower[3], upper[3];
2104 if (!calc_risk (risk_v, upper, lower, c))
2107 tab_offset (risk, nvar - 2, -1);
2109 for (i = 0; i < 3; i++)
2111 if (risk_v[i] == SYSMIS)
2117 if (var_is_numeric (x->vars[COL_VAR]))
2118 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2119 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2121 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2122 var_get_name (x->vars[COL_VAR]),
2123 var_get_width (x->vars[COL_VAR]), c[0].s,
2124 var_get_width (x->vars[COL_VAR]), c[1].s);
2128 if (var_is_numeric (x->vars[ROW_VAR]))
2129 sprintf (buf, _("For cohort %s = %g"),
2130 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2132 sprintf (buf, _("For cohort %s = %.*s"),
2133 var_get_name (x->vars[ROW_VAR]),
2134 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2138 tab_text (risk, 0, 0, TAB_LEFT, buf);
2139 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2140 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2141 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2142 tab_next_row (risk);
2145 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2146 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2147 tab_next_row (risk);
2149 tab_offset (risk, 0, -1);
2152 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2153 double[N_DIRECTIONAL]);
2155 /* Display directional measures. */
2157 display_directional (void)
2159 static const char *categories[] =
2161 N_("Nominal by Nominal"),
2162 N_("Ordinal by Ordinal"),
2163 N_("Nominal by Interval"),
2166 static const char *stats[] =
2169 N_("Goodman and Kruskal tau"),
2170 N_("Uncertainty Coefficient"),
2175 static const char *types[] =
2182 static const int stats_categories[N_DIRECTIONAL] =
2184 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2187 static const int stats_stats[N_DIRECTIONAL] =
2189 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2192 static const int stats_types[N_DIRECTIONAL] =
2194 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2197 static const int *stats_lookup[] =
2204 static const char **stats_names[] =
2216 double direct_v[N_DIRECTIONAL];
2217 double direct_ase[N_DIRECTIONAL];
2218 double direct_t[N_DIRECTIONAL];
2222 if (!calc_directional (direct_v, direct_ase, direct_t))
2225 tab_offset (direct, nvar - 2, -1);
2227 for (i = 0; i < N_DIRECTIONAL; i++)
2229 if (direct_v[i] == SYSMIS)
2235 for (j = 0; j < 3; j++)
2236 if (last[j] != stats_lookup[j][i])
2239 tab_hline (direct, TAL_1, j, 6, 0);
2244 int k = last[j] = stats_lookup[j][i];
2249 string = var_get_name (x->vars[0]);
2251 string = var_get_name (x->vars[1]);
2253 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2254 gettext (stats_names[j][k]), string);
2259 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2260 if (direct_ase[i] != SYSMIS)
2261 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2262 if (direct_t[i] != SYSMIS)
2263 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2264 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2265 tab_next_row (direct);
2268 tab_offset (direct, 0, -1);
2271 /* Statistical calculations. */
2273 /* Returns the value of the gamma (factorial) function for an integer
2276 gamma_int (double x)
2281 for (i = 2; i < x; i++)
2286 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2288 static inline double
2289 Pr (int a, int b, int c, int d)
2291 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2292 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2293 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2294 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2295 / gamma_int (a + b + c + d + 1.));
2298 /* Swap the contents of A and B. */
2300 swap (int *a, int *b)
2307 /* Calculate significance for Fisher's exact test as specified in
2308 _SPSS Statistical Algorithms_, Appendix 5. */
2310 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2314 if (MIN (c, d) < MIN (a, b))
2315 swap (&a, &c), swap (&b, &d);
2316 if (MIN (b, d) < MIN (a, c))
2317 swap (&a, &b), swap (&c, &d);
2321 swap (&a, &b), swap (&c, &d);
2323 swap (&a, &c), swap (&b, &d);
2327 for (x = 0; x <= a; x++)
2328 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2330 *fisher2 = *fisher1;
2331 for (x = 1; x <= b; x++)
2332 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2335 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2336 columns with values COLS and N_ROWS rows with values ROWS. Values
2337 in the matrix sum to W. */
2339 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2340 double *fisher1, double *fisher2)
2344 chisq[0] = chisq[1] = 0.;
2345 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2346 *fisher1 = *fisher2 = SYSMIS;
2348 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2350 if (ns_rows <= 1 || ns_cols <= 1)
2352 chisq[0] = chisq[1] = SYSMIS;
2356 for (r = 0; r < n_rows; r++)
2357 for (c = 0; c < n_cols; c++)
2359 const double expected = row_tot[r] * col_tot[c] / W;
2360 const double freq = mat[n_cols * r + c];
2361 const double residual = freq - expected;
2363 chisq[0] += residual * residual / expected;
2365 chisq[1] += freq * log (expected / freq);
2376 /* Calculate Yates and Fisher exact test. */
2377 if (ns_cols == 2 && ns_rows == 2)
2379 double f11, f12, f21, f22;
2385 for (i = j = 0; i < n_cols; i++)
2386 if (col_tot[i] != 0.)
2395 f11 = mat[nz_cols[0]];
2396 f12 = mat[nz_cols[1]];
2397 f21 = mat[nz_cols[0] + n_cols];
2398 f22 = mat[nz_cols[1] + n_cols];
2403 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2406 chisq[3] = (W * x * x
2407 / (f11 + f12) / (f21 + f22)
2408 / (f11 + f21) / (f12 + f22));
2416 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2417 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2420 /* Calculate Mantel-Haenszel. */
2421 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2423 double r, ase_0, ase_1;
2424 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2426 chisq[4] = (W - 1.) * r * r;
2431 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2432 ASE_1, and ase_0 into ASE_0. The row and column values must be
2433 passed in X and Y. */
2435 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2437 double SX, SY, S, T;
2439 double sum_XYf, sum_X2Y2f;
2440 double sum_Xr, sum_X2r;
2441 double sum_Yc, sum_Y2c;
2444 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2445 for (j = 0; j < n_cols; j++)
2447 double fij = mat[j + i * n_cols];
2448 double product = X[i] * Y[j];
2449 double temp = fij * product;
2451 sum_X2Y2f += temp * product;
2454 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2456 sum_Xr += X[i] * row_tot[i];
2457 sum_X2r += X[i] * X[i] * row_tot[i];
2461 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2463 sum_Yc += Y[i] * col_tot[i];
2464 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2468 S = sum_XYf - sum_Xr * sum_Yc / W;
2469 SX = sum_X2r - sum_Xr * sum_Xr / W;
2470 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2473 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2478 for (s = c = 0., i = 0; i < n_rows; i++)
2479 for (j = 0; j < n_cols; j++)
2481 double Xresid, Yresid;
2484 Xresid = X[i] - Xbar;
2485 Yresid = Y[j] - Ybar;
2486 temp = (T * Xresid * Yresid
2488 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2489 y = mat[j + i * n_cols] * temp * temp - c;
2494 *ase_1 = sqrt (s) / (T * T);
2498 static double somers_d_v[3];
2499 static double somers_d_ase[3];
2500 static double somers_d_t[3];
2502 /* Calculate symmetric statistics and their asymptotic standard
2503 errors. Returns 0 if none could be calculated. */
2505 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2506 double t[N_SYMMETRIC])
2508 int q = MIN (ns_rows, ns_cols);
2517 for (i = 0; i < N_SYMMETRIC; i++)
2518 v[i] = ase[i] = t[i] = SYSMIS;
2521 /* Phi, Cramer's V, contingency coefficient. */
2522 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2524 double Xp = 0.; /* Pearson chi-square. */
2529 for (r = 0; r < n_rows; r++)
2530 for (c = 0; c < n_cols; c++)
2532 const double expected = row_tot[r] * col_tot[c] / W;
2533 const double freq = mat[n_cols * r + c];
2534 const double residual = freq - expected;
2536 Xp += residual * residual / expected;
2540 if (cmd.a_statistics[CRS_ST_PHI])
2542 v[0] = sqrt (Xp / W);
2543 v[1] = sqrt (Xp / (W * (q - 1)));
2545 if (cmd.a_statistics[CRS_ST_CC])
2546 v[2] = sqrt (Xp / (Xp + W));
2549 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2550 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2555 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2562 for (r = 0; r < n_rows; r++)
2563 Dr -= row_tot[r] * row_tot[r];
2564 for (c = 0; c < n_cols; c++)
2565 Dc -= col_tot[c] * col_tot[c];
2571 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2572 for (c = 0; c < n_cols; c++)
2576 for (r = 0; r < n_rows; r++)
2577 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2587 for (i = 0; i < n_rows; i++)
2591 for (j = 1; j < n_cols; j++)
2592 Cij += col_tot[j] - cum[j + i * n_cols];
2595 for (j = 1; j < n_cols; j++)
2596 Dij += cum[j + (i - 1) * n_cols];
2600 double fij = mat[j + i * n_cols];
2606 assert (j < n_cols);
2608 Cij -= col_tot[j] - cum[j + i * n_cols];
2609 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2613 Cij += cum[j - 1 + (i - 1) * n_cols];
2614 Dij -= cum[j + (i - 1) * n_cols];
2620 if (cmd.a_statistics[CRS_ST_BTAU])
2621 v[3] = (P - Q) / sqrt (Dr * Dc);
2622 if (cmd.a_statistics[CRS_ST_CTAU])
2623 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2624 if (cmd.a_statistics[CRS_ST_GAMMA])
2625 v[5] = (P - Q) / (P + Q);
2627 /* ASE for tau-b, tau-c, gamma. Calculations could be
2628 eliminated here, at expense of memory. */
2633 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2634 for (i = 0; i < n_rows; i++)
2638 for (j = 1; j < n_cols; j++)
2639 Cij += col_tot[j] - cum[j + i * n_cols];
2642 for (j = 1; j < n_cols; j++)
2643 Dij += cum[j + (i - 1) * n_cols];
2647 double fij = mat[j + i * n_cols];
2649 if (cmd.a_statistics[CRS_ST_BTAU])
2651 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2652 + v[3] * (row_tot[i] * Dc
2653 + col_tot[j] * Dr));
2654 btau_cum += fij * temp * temp;
2658 const double temp = Cij - Dij;
2659 ctau_cum += fij * temp * temp;
2662 if (cmd.a_statistics[CRS_ST_GAMMA])
2664 const double temp = Q * Cij - P * Dij;
2665 gamma_cum += fij * temp * temp;
2668 if (cmd.a_statistics[CRS_ST_D])
2670 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2671 - (P - Q) * (W - row_tot[i]));
2672 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2673 - (Q - P) * (W - col_tot[j]));
2678 assert (j < n_cols);
2680 Cij -= col_tot[j] - cum[j + i * n_cols];
2681 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2685 Cij += cum[j - 1 + (i - 1) * n_cols];
2686 Dij -= cum[j + (i - 1) * n_cols];
2692 btau_var = ((btau_cum
2693 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2695 if (cmd.a_statistics[CRS_ST_BTAU])
2697 ase[3] = sqrt (btau_var);
2698 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2701 if (cmd.a_statistics[CRS_ST_CTAU])
2703 ase[4] = ((2 * q / ((q - 1) * W * W))
2704 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2705 t[4] = v[4] / ase[4];
2707 if (cmd.a_statistics[CRS_ST_GAMMA])
2709 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2710 t[5] = v[5] / (2. / (P + Q)
2711 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2713 if (cmd.a_statistics[CRS_ST_D])
2715 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2716 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2717 somers_d_t[0] = (somers_d_v[0]
2719 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2720 somers_d_v[1] = (P - Q) / Dc;
2721 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2722 somers_d_t[1] = (somers_d_v[1]
2724 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2725 somers_d_v[2] = (P - Q) / Dr;
2726 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2727 somers_d_t[2] = (somers_d_v[2]
2729 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2735 /* Spearman correlation, Pearson's r. */
2736 if (cmd.a_statistics[CRS_ST_CORR])
2738 double *R = local_alloc (sizeof *R * n_rows);
2739 double *C = local_alloc (sizeof *C * n_cols);
2742 double y, t, c = 0., s = 0.;
2747 R[i] = s + (row_tot[i] + 1.) / 2.;
2754 assert (i < n_rows);
2759 double y, t, c = 0., s = 0.;
2764 C[j] = s + (col_tot[j] + 1.) / 2;
2771 assert (j < n_cols);
2775 calc_r (R, C, &v[6], &t[6], &ase[6]);
2781 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2785 /* Cohen's kappa. */
2786 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2788 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2791 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2792 i < ns_rows; i++, j++)
2796 while (col_tot[j] == 0.)
2799 prod = row_tot[i] * col_tot[j];
2800 sum = row_tot[i] + col_tot[j];
2802 sum_fii += mat[j + i * n_cols];
2804 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2805 sum_riciri_ci += prod * sum;
2807 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2808 for (j = 0; j < ns_cols; j++)
2810 double sum = row_tot[i] + col_tot[j];
2811 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2814 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2816 ase[8] = sqrt ((W * W * sum_rici
2817 + sum_rici * sum_rici
2818 - W * sum_riciri_ci)
2819 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2821 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2822 / pow2 (W * W - sum_rici))
2823 + ((2. * (W - sum_fii)
2824 * (2. * sum_fii * sum_rici
2825 - W * sum_fiiri_ci))
2826 / cube (W * W - sum_rici))
2827 + (pow2 (W - sum_fii)
2828 * (W * sum_fijri_ci2 - 4.
2829 * sum_rici * sum_rici)
2830 / pow4 (W * W - sum_rici))));
2832 t[8] = v[8] / ase[8];
2839 /* Calculate risk estimate. */
2841 calc_risk (double *value, double *upper, double *lower, union value *c)
2843 double f11, f12, f21, f22;
2849 for (i = 0; i < 3; i++)
2850 value[i] = upper[i] = lower[i] = SYSMIS;
2853 if (ns_rows != 2 || ns_cols != 2)
2860 for (i = j = 0; i < n_cols; i++)
2861 if (col_tot[i] != 0.)
2870 f11 = mat[nz_cols[0]];
2871 f12 = mat[nz_cols[1]];
2872 f21 = mat[nz_cols[0] + n_cols];
2873 f22 = mat[nz_cols[1] + n_cols];
2875 c[0] = cols[nz_cols[0]];
2876 c[1] = cols[nz_cols[1]];
2879 value[0] = (f11 * f22) / (f12 * f21);
2880 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2881 lower[0] = value[0] * exp (-1.960 * v);
2882 upper[0] = value[0] * exp (1.960 * v);
2884 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2885 v = sqrt ((f12 / (f11 * (f11 + f12)))
2886 + (f22 / (f21 * (f21 + f22))));
2887 lower[1] = value[1] * exp (-1.960 * v);
2888 upper[1] = value[1] * exp (1.960 * v);
2890 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2891 v = sqrt ((f11 / (f12 * (f11 + f12)))
2892 + (f21 / (f22 * (f21 + f22))));
2893 lower[2] = value[2] * exp (-1.960 * v);
2894 upper[2] = value[2] * exp (1.960 * v);
2899 /* Calculate directional measures. */
2901 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2902 double t[N_DIRECTIONAL])
2907 for (i = 0; i < N_DIRECTIONAL; i++)
2908 v[i] = ase[i] = t[i] = SYSMIS;
2912 if (cmd.a_statistics[CRS_ST_LAMBDA])
2914 double *fim = xnmalloc (n_rows, sizeof *fim);
2915 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2916 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2917 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2918 double sum_fim, sum_fmj;
2920 int rm_index, cm_index;
2923 /* Find maximum for each row and their sum. */
2924 for (sum_fim = 0., i = 0; i < n_rows; i++)
2926 double max = mat[i * n_cols];
2929 for (j = 1; j < n_cols; j++)
2930 if (mat[j + i * n_cols] > max)
2932 max = mat[j + i * n_cols];
2936 sum_fim += fim[i] = max;
2937 fim_index[i] = index;
2940 /* Find maximum for each column. */
2941 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2943 double max = mat[j];
2946 for (i = 1; i < n_rows; i++)
2947 if (mat[j + i * n_cols] > max)
2949 max = mat[j + i * n_cols];
2953 sum_fmj += fmj[j] = max;
2954 fmj_index[j] = index;
2957 /* Find maximum row total. */
2960 for (i = 1; i < n_rows; i++)
2961 if (row_tot[i] > rm)
2967 /* Find maximum column total. */
2970 for (j = 1; j < n_cols; j++)
2971 if (col_tot[j] > cm)
2977 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2978 v[1] = (sum_fmj - rm) / (W - rm);
2979 v[2] = (sum_fim - cm) / (W - cm);
2981 /* ASE1 for Y given X. */
2985 for (accum = 0., i = 0; i < n_rows; i++)
2986 for (j = 0; j < n_cols; j++)
2988 const int deltaj = j == cm_index;
2989 accum += (mat[j + i * n_cols]
2990 * pow2 ((j == fim_index[i])
2995 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2998 /* ASE0 for Y given X. */
3002 for (accum = 0., i = 0; i < n_rows; i++)
3003 if (cm_index != fim_index[i])
3004 accum += (mat[i * n_cols + fim_index[i]]
3005 + mat[i * n_cols + cm_index]);
3006 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3009 /* ASE1 for X given Y. */
3013 for (accum = 0., i = 0; i < n_rows; i++)
3014 for (j = 0; j < n_cols; j++)
3016 const int deltaj = i == rm_index;
3017 accum += (mat[j + i * n_cols]
3018 * pow2 ((i == fmj_index[j])
3023 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3026 /* ASE0 for X given Y. */
3030 for (accum = 0., j = 0; j < n_cols; j++)
3031 if (rm_index != fmj_index[j])
3032 accum += (mat[j + n_cols * fmj_index[j]]
3033 + mat[j + n_cols * rm_index]);
3034 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3037 /* Symmetric ASE0 and ASE1. */
3042 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3043 for (j = 0; j < n_cols; j++)
3045 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3046 int temp1 = (i == rm_index) + (j == cm_index);
3047 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3048 accum1 += (mat[j + i * n_cols]
3049 * pow2 (temp0 + (v[0] - 1.) * temp1));
3051 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3052 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3053 / (2. * W - rm - cm));
3062 double sum_fij2_ri, sum_fij2_ci;
3063 double sum_ri2, sum_cj2;
3065 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3066 for (j = 0; j < n_cols; j++)
3068 double temp = pow2 (mat[j + i * n_cols]);
3069 sum_fij2_ri += temp / row_tot[i];
3070 sum_fij2_ci += temp / col_tot[j];
3073 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3074 sum_ri2 += row_tot[i] * row_tot[i];
3076 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3077 sum_cj2 += col_tot[j] * col_tot[j];
3079 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3080 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3084 if (cmd.a_statistics[CRS_ST_UC])
3086 double UX, UY, UXY, P;
3087 double ase1_yx, ase1_xy, ase1_sym;
3090 for (UX = 0., i = 0; i < n_rows; i++)
3091 if (row_tot[i] > 0.)
3092 UX -= row_tot[i] / W * log (row_tot[i] / W);
3094 for (UY = 0., j = 0; j < n_cols; j++)
3095 if (col_tot[j] > 0.)
3096 UY -= col_tot[j] / W * log (col_tot[j] / W);
3098 for (UXY = P = 0., i = 0; i < n_rows; i++)
3099 for (j = 0; j < n_cols; j++)
3101 double entry = mat[j + i * n_cols];
3106 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3107 UXY -= entry / W * log (entry / W);
3110 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3111 for (j = 0; j < n_cols; j++)
3113 double entry = mat[j + i * n_cols];
3118 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3119 + (UX - UXY) * log (col_tot[j] / W));
3120 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3121 + (UY - UXY) * log (row_tot[i] / W));
3122 ase1_sym += entry * pow2 ((UXY
3123 * log (row_tot[i] * col_tot[j] / (W * W)))
3124 - (UX + UY) * log (entry / W));
3127 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3128 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3129 t[5] = v[5] / ((2. / (W * (UX + UY)))
3130 * sqrt (P - pow2 (UX + UY - UXY) / W));
3132 v[6] = (UX + UY - UXY) / UX;
3133 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3134 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3136 v[7] = (UX + UY - UXY) / UY;
3137 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3138 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3142 if (cmd.a_statistics[CRS_ST_D])
3147 calc_symmetric (NULL, NULL, NULL);
3148 for (i = 0; i < 3; i++)
3150 v[8 + i] = somers_d_v[i];
3151 ase[8 + i] = somers_d_ase[i];
3152 t[8 + i] = somers_d_t[i];
3157 if (cmd.a_statistics[CRS_ST_ETA])
3160 double sum_Xr, sum_X2r;
3164 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3166 sum_Xr += rows[i].f * row_tot[i];
3167 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3169 SX = sum_X2r - sum_Xr * sum_Xr / W;
3171 for (SXW = 0., j = 0; j < n_cols; j++)
3175 for (cum = 0., i = 0; i < n_rows; i++)
3177 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3178 cum += rows[i].f * mat[j + i * n_cols];
3181 SXW -= cum * cum / col_tot[j];
3183 v[11] = sqrt (1. - SXW / SX);
3187 double sum_Yc, sum_Y2c;
3191 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3193 sum_Yc += cols[i].f * col_tot[i];
3194 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3196 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3198 for (SYW = 0., i = 0; i < n_rows; i++)
3202 for (cum = 0., j = 0; j < n_cols; j++)
3204 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3205 cum += cols[j].f * mat[j + i * n_cols];
3208 SYW -= cum * cum / row_tot[i];
3210 v[12] = sqrt (1. - SYW / SY);
3217 /* A wrapper around data_out() that limits string output to short
3218 string width and null terminates the result. */
3220 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3222 struct fmt_spec fmt_subst;
3224 /* Limit to short string width. */
3225 if (fmt_is_string (fp->type))
3229 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3230 if (fmt_subst.type == FMT_A)
3231 fmt_subst.w = MIN (8, fmt_subst.w);
3233 fmt_subst.w = MIN (16, fmt_subst.w);
3239 data_out (v, fp, s);
3241 /* Null terminate. */