1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (const struct ccase *, const struct dataset *);
181 static void calc_integer (const struct ccase *, const struct dataset *);
182 static void postcalc (void);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
192 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
199 for (i = 0; i < nxtab; i++)
206 /* Parses and executes the CROSSTABS procedure. */
208 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
210 struct casegrouper *grouper;
211 struct casereader *input, *group;
219 pl_tc = pool_create ();
220 pl_col = pool_create ();
222 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
225 mode = variables ? INTEGER : GENERAL;
230 cmd.a_cells[CRS_CL_COUNT] = 1;
236 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_COUNT] = 1;
242 cmd.a_cells[CRS_CL_ROW] = 1;
243 cmd.a_cells[CRS_CL_COLUMN] = 1;
244 cmd.a_cells[CRS_CL_TOTAL] = 1;
246 if (cmd.a_cells[CRS_CL_ALL])
248 for (i = 0; i < CRS_CL_count; i++)
250 cmd.a_cells[CRS_CL_ALL] = 0;
252 cmd.a_cells[CRS_CL_NONE] = 0;
254 for (num_cells = i = 0; i < CRS_CL_count; i++)
256 cells[num_cells++] = i;
259 if (cmd.sbc_statistics)
264 for (i = 0; i < CRS_ST_count; i++)
265 if (cmd.a_statistics[i])
268 cmd.a_statistics[CRS_ST_CHISQ] = 1;
269 if (cmd.a_statistics[CRS_ST_ALL])
270 for (i = 0; i < CRS_ST_count; i++)
271 cmd.a_statistics[i] = 1;
275 if (cmd.miss == CRS_REPORT && mode == GENERAL)
277 msg (SE, _("Missing mode REPORT not allowed in general mode. "
278 "Assuming MISSING=TABLE."));
279 cmd.miss = CRS_TABLE;
283 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
284 cmd.a_write[CRS_WR_ALL] = 0;
285 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
287 msg (SE, _("Write mode ALL not allowed in general mode. "
288 "Assuming WRITE=CELLS."));
289 cmd.a_write[CRS_WR_CELLS] = 1;
292 && (cmd.a_write[CRS_WR_NONE]
293 + cmd.a_write[CRS_WR_ALL]
294 + cmd.a_write[CRS_WR_CELLS] == 0))
295 cmd.a_write[CRS_WR_CELLS] = 1;
296 if (cmd.a_write[CRS_WR_CELLS])
297 write_style = CRS_WR_CELLS;
298 else if (cmd.a_write[CRS_WR_ALL])
299 write_style = CRS_WR_ALL;
301 write_style = CRS_WR_NONE;
303 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
305 grouper = casegrouper_create_splits (input, dataset_dict (ds));
306 while (casegrouper_get_next_group (grouper, &group))
312 for (; (c = casereader_read (group)) != NULL; case_unref (c))
315 calc_general (c, ds);
317 calc_integer (c, ds);
319 casereader_destroy (group);
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
326 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
329 /* Parses the TABLES subcommand. */
331 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
333 struct const_var_set *var_set;
335 const struct variable ***by = NULL;
336 size_t *by_nvar = NULL;
340 /* Ensure that this is a TABLES subcommand. */
341 if (!lex_match_id (lexer, "TABLES")
342 && (lex_token (lexer) != T_ID ||
343 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
344 && lex_token (lexer) != T_ALL)
346 lex_match (lexer, '=');
348 if (variables != NULL)
349 var_set = const_var_set_create_from_array (variables, variables_cnt);
351 var_set = const_var_set_create_from_dict (dataset_dict (ds));
352 assert (var_set != NULL);
356 by = xnrealloc (by, n_by + 1, sizeof *by);
357 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
358 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
359 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (xalloc_oversized (nx, by_nvar[n_by]))
363 msg (SE, _("Too many cross-tabulation variables or dimensions."));
369 if (!lex_match (lexer, T_BY))
373 lex_error (lexer, _("expecting BY"));
382 int *by_iter = xcalloc (n_by, sizeof *by_iter);
385 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
386 for (i = 0; i < nx; i++)
390 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
397 for (i = 0; i < n_by; i++)
398 x->vars[i] = by[i][by_iter[i]];
404 for (i = n_by - 1; i >= 0; i--)
406 if (++by_iter[i] < by_nvar[i])
419 /* All return paths lead here. */
423 for (i = 0; i < n_by; i++)
429 const_var_set_destroy (var_set);
434 /* Parses the VARIABLES subcommand. */
436 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
440 msg (SE, _("VARIABLES must be specified before TABLES."));
444 lex_match (lexer, '=');
448 size_t orig_nv = variables_cnt;
453 if (!parse_variables_const (lexer, dataset_dict (ds),
454 &variables, &variables_cnt,
455 (PV_APPEND | PV_NUMERIC
456 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 if (lex_token (lexer) != '(')
461 lex_error (lexer, "expecting `('");
466 if (!lex_force_int (lexer))
468 min = lex_integer (lexer);
471 lex_match (lexer, ',');
473 if (!lex_force_int (lexer))
475 max = lex_integer (lexer);
478 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 if (lex_token (lexer) != ')')
486 lex_error (lexer, "expecting `)'");
491 for (i = orig_nv; i < variables_cnt; i++)
493 struct var_range *vr = xmalloc (sizeof *vr);
496 vr->count = max - min + 1;
497 var_attach_aux (variables[i], vr, var_dtor_free);
500 if (lex_token (lexer) == '/')
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, const void *);
515 static unsigned hash_table_entry (const void *, const void *);
517 /* Set up the crosstabulation tables for processing. */
519 precalc (struct casereader *input, const struct dataset *ds)
523 c = casereader_peek (input, 0);
526 output_split_file_values (ds, c);
532 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
542 for (i = 0; i < nxtab; i++)
544 struct crosstab *x = xtab[i];
549 x->ofs = n_sorted_tab;
551 for (j = 2; j < x->nvar; j++)
552 count *= get_var_range (x->vars[j - 2])->count;
554 sorted_tab = xnrealloc (sorted_tab,
555 n_sorted_tab + count, sizeof *sorted_tab);
556 v = xmalloca (sizeof *v * x->nvar);
557 for (j = 2; j < x->nvar; j++)
558 v[j] = get_var_range (x->vars[j])->min;
559 for (j = 0; j < count; j++)
561 struct table_entry *te;
564 te = sorted_tab[n_sorted_tab++]
565 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
569 int row_cnt = get_var_range (x->vars[0])->count;
570 int col_cnt = get_var_range (x->vars[1])->count;
571 const int mat_size = row_cnt * col_cnt;
574 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
575 for (m = 0; m < mat_size; m++)
579 for (k = 2; k < x->nvar; k++)
580 te->values[k].f = v[k];
581 for (k = 2; k < x->nvar; k++)
583 struct var_range *vr = get_var_range (x->vars[k]);
584 if (++v[k] >= vr->max)
593 sorted_tab = xnrealloc (sorted_tab,
594 n_sorted_tab + 1, sizeof *sorted_tab);
595 sorted_tab[n_sorted_tab] = NULL;
600 /* Form crosstabulations for general mode. */
602 calc_general (const struct ccase *c, const struct dataset *ds)
604 /* Missing values to exclude. */
605 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
606 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
610 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
612 /* Flattened current table index. */
615 for (t = 0; t < nxtab; t++)
617 struct crosstab *x = xtab[t];
618 const size_t entry_size = (sizeof (struct table_entry)
619 + sizeof (union value) * (x->nvar - 1));
620 struct table_entry *te = xmalloca (entry_size);
622 /* Construct table entry for the current record and table. */
628 for (j = 0; j < x->nvar; j++)
630 const union value *v = case_data (c, x->vars[j]);
631 if (var_is_value_missing (x->vars[j], v, exclude))
633 x->missing += weight;
637 if (var_is_numeric (x->vars[j]))
638 te->values[j].f = case_num (c, x->vars[j]);
641 size_t n = var_get_width (x->vars[j]);
642 if (n > MAX_SHORT_STRING)
643 n = MAX_SHORT_STRING;
644 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
646 /* Necessary in order to simplify comparisons. */
647 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
648 sizeof (union value) - n);
653 /* Add record to hash table. */
655 struct table_entry **tepp
656 = (struct table_entry **) hsh_probe (gen_tab, te);
659 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
662 memcpy (tep, te, entry_size);
667 (*tepp)->u.freq += weight;
676 calc_integer (const struct ccase *c, const struct dataset *ds)
678 bool bad_warn = true;
681 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
683 /* Flattened current table index. */
686 for (t = 0; t < nxtab; t++)
688 struct crosstab *x = xtab[t];
693 for (i = 0; i < x->nvar; i++)
695 const struct variable *const v = x->vars[i];
696 struct var_range *vr = get_var_range (v);
697 double value = case_num (c, v);
699 /* Note that the first test also rules out SYSMIS. */
700 if ((value < vr->min || value >= vr->max)
701 || (cmd.miss == CRS_TABLE
702 && var_is_num_missing (v, value, MV_USER)))
704 x->missing += weight;
710 ofs += fact * ((int) value - vr->min);
716 const struct variable *row_var = x->vars[ROW_VAR];
717 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
719 const struct variable *col_var = x->vars[COL_VAR];
720 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
722 const int col_dim = get_var_range (col_var)->count;
724 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
731 /* Compare the table_entry's at A and B and return a strcmp()-type
734 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
736 const struct table_entry *a = a_;
737 const struct table_entry *b = b_;
739 if (a->table > b->table)
741 else if (a->table < b->table)
745 const struct crosstab *x = xtab[a->table];
748 for (i = x->nvar - 1; i >= 0; i--)
749 if (var_is_numeric (x->vars[i]))
751 const double diffnum = a->values[i].f - b->values[i].f;
754 else if (diffnum > 0)
759 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
760 var_get_width (x->vars[i]));
769 /* Calculate a hash value from table_entry A. */
771 hash_table_entry (const void *a_, const void *aux UNUSED)
773 const struct table_entry *a = a_;
778 for (i = 0; i < xtab[a->table]->nvar; i++)
779 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
784 /* Post-data reading calculations. */
786 static struct table_entry **find_pivot_extent (struct table_entry **,
787 int *cnt, int pivot);
788 static void enum_var_values (struct table_entry **entries, int entry_cnt,
790 union value **values, int *value_cnt);
791 static void output_pivot_table (struct table_entry **, struct table_entry **,
792 double **, double **, double **,
793 int *, int *, int *);
794 static void make_summary_table (void);
801 n_sorted_tab = hsh_count (gen_tab);
802 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
805 make_summary_table ();
807 /* Identify all the individual crosstabulation tables, and deal with
810 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
811 int pc = n_sorted_tab; /* Pivot count. */
813 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
814 int maxrows = 0, maxcols = 0, maxcells = 0;
818 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
822 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
823 &maxrows, &maxcols, &maxcells);
832 hsh_destroy (gen_tab);
836 for (i = 0; i < n_sorted_tab; i++)
838 free (sorted_tab[i]->u.data);
839 free (sorted_tab[i]);
845 static void insert_summary (struct tab_table *, int tab_index, double valid);
847 /* Output a table summarizing the cases processed. */
849 make_summary_table (void)
851 struct tab_table *summary;
853 struct table_entry **pb = sorted_tab, **pe;
854 int pc = n_sorted_tab;
857 summary = tab_create (7, 3 + nxtab, 1);
858 tab_title (summary, _("Summary."));
859 tab_headers (summary, 1, 0, 3, 0);
860 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
861 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
862 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
863 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
864 tab_hline (summary, TAL_1, 1, 6, 1);
865 tab_hline (summary, TAL_1, 1, 6, 2);
866 tab_vline (summary, TAL_1, 3, 1, 1);
867 tab_vline (summary, TAL_1, 5, 1, 1);
871 for (i = 0; i < 3; i++)
873 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
874 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
877 tab_offset (summary, 0, 3);
883 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
887 while (cur_tab < (*pb)->table)
888 insert_summary (summary, cur_tab++, 0.);
891 for (valid = 0.; pb < pe; pb++)
892 valid += (*pb)->u.freq;
895 const struct crosstab *const x = xtab[(*pb)->table];
896 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
897 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
898 const int count = n_cols * n_rows;
900 for (valid = 0.; pb < pe; pb++)
902 const double *data = (*pb)->u.data;
905 for (i = 0; i < count; i++)
909 insert_summary (summary, cur_tab++, valid);
914 while (cur_tab < nxtab)
915 insert_summary (summary, cur_tab++, 0.);
920 /* Inserts a line into T describing the crosstabulation at index
921 TAB_INDEX, which has VALID valid observations. */
923 insert_summary (struct tab_table *t, int tab_index, double valid)
925 struct crosstab *x = xtab[tab_index];
927 tab_hline (t, TAL_1, 0, 6, 0);
929 /* Crosstabulation name. */
931 char *buf = xmalloca (128 * x->nvar);
935 for (i = 0; i < x->nvar; i++)
938 cp = stpcpy (cp, " * ");
940 cp = stpcpy (cp, var_to_string (x->vars[i]));
942 tab_text (t, 0, 0, TAB_LEFT, buf);
947 /* Counts and percentages. */
957 for (i = 0; i < 3; i++)
959 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
960 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
971 static struct tab_table *table; /* Crosstabulation table. */
972 static struct tab_table *chisq; /* Chi-square table. */
973 static struct tab_table *sym; /* Symmetric measures table. */
974 static struct tab_table *risk; /* Risk estimate table. */
975 static struct tab_table *direct; /* Directional measures table. */
978 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
980 /* Column values, number of columns. */
981 static union value *cols;
984 /* Row values, number of rows. */
985 static union value *rows;
988 /* Number of statistically interesting columns/rows (columns/rows with
990 static int ns_cols, ns_rows;
992 /* Crosstabulation. */
993 static const struct crosstab *x;
995 /* Number of variables from the crosstabulation to consider. This is
996 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
999 /* Matrix contents. */
1000 static double *mat; /* Matrix proper. */
1001 static double *row_tot; /* Row totals. */
1002 static double *col_tot; /* Column totals. */
1003 static double W; /* Grand total. */
1005 static void display_dimensions (struct tab_table *, int first_difference,
1006 struct table_entry *);
1007 static void display_crosstabulation (void);
1008 static void display_chisq (void);
1009 static void display_symmetric (void);
1010 static void display_risk (void);
1011 static void display_directional (void);
1012 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1013 static void table_value_missing (struct tab_table *table, int c, int r,
1014 unsigned char opt, const union value *v,
1015 const struct variable *var);
1016 static void delete_missing (void);
1018 /* Output pivot table beginning at PB and continuing until PE,
1019 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1020 hold *MAXROWS entries. */
1022 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1023 double **matp, double **row_totp, double **col_totp,
1024 int *maxrows, int *maxcols, int *maxcells)
1027 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1028 int tc = pe - pb; /* Table count. */
1030 /* Table entry for header comparison. */
1031 struct table_entry *cmp = NULL;
1033 x = xtab[(*pb)->table];
1034 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1036 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1038 /* Crosstabulation table initialization. */
1041 table = tab_create (nvar + n_cols,
1042 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1043 tab_headers (table, nvar - 1, 0, 2, 0);
1045 /* First header line. */
1046 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1047 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1049 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1051 /* Second header line. */
1055 for (i = 2; i < nvar; i++)
1056 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1057 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1058 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1059 var_get_name (x->vars[ROW_VAR]));
1060 for (i = 0; i < n_cols; i++)
1061 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1063 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1066 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1067 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1071 char *title = xmalloca (x->nvar * 64 + 128);
1075 if (cmd.pivot == CRS_PIVOT)
1076 for (i = 0; i < nvar; i++)
1079 cp = stpcpy (cp, " by ");
1080 cp = stpcpy (cp, var_get_name (x->vars[i]));
1084 cp = spprintf (cp, "%s by %s for",
1085 var_get_name (x->vars[0]),
1086 var_get_name (x->vars[1]));
1087 for (i = 2; i < nvar; i++)
1089 char buf[64], *bufp;
1094 cp = stpcpy (cp, var_get_name (x->vars[i]));
1096 format_short (buf, var_get_print_format (x->vars[i]),
1098 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1100 cp = stpcpy (cp, bufp);
1104 cp = stpcpy (cp, " [");
1105 for (i = 0; i < num_cells; i++)
1113 static const struct tuple cell_names[] =
1115 {CRS_CL_COUNT, N_("count")},
1116 {CRS_CL_ROW, N_("row %")},
1117 {CRS_CL_COLUMN, N_("column %")},
1118 {CRS_CL_TOTAL, N_("total %")},
1119 {CRS_CL_EXPECTED, N_("expected")},
1120 {CRS_CL_RESIDUAL, N_("residual")},
1121 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1122 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1126 const struct tuple *t;
1128 for (t = cell_names; t->value != cells[i]; t++)
1129 assert (t->value != -1);
1131 cp = stpcpy (cp, ", ");
1132 cp = stpcpy (cp, gettext (t->name));
1136 tab_title (table, "%s", title);
1140 tab_offset (table, 0, 2);
1145 /* Chi-square table initialization. */
1146 if (cmd.a_statistics[CRS_ST_CHISQ])
1148 chisq = tab_create (6 + (nvar - 2),
1149 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1150 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1152 tab_title (chisq, _("Chi-square tests."));
1154 tab_offset (chisq, nvar - 2, 0);
1155 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1156 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1157 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1158 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1159 _("Asymp. Sig. (2-sided)"));
1160 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1161 _("Exact. Sig. (2-sided)"));
1162 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1163 _("Exact. Sig. (1-sided)"));
1165 tab_offset (chisq, 0, 1);
1170 /* Symmetric measures. */
1171 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1172 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1173 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1174 || cmd.a_statistics[CRS_ST_KAPPA])
1176 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1177 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1178 tab_title (sym, _("Symmetric measures."));
1180 tab_offset (sym, nvar - 2, 0);
1181 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1182 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1183 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1184 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1185 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1186 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1187 tab_offset (sym, 0, 1);
1192 /* Risk estimate. */
1193 if (cmd.a_statistics[CRS_ST_RISK])
1195 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1196 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1197 tab_title (risk, _("Risk estimate."));
1199 tab_offset (risk, nvar - 2, 0);
1200 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1201 _("95%% Confidence Interval"));
1202 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1203 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1204 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1205 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1206 tab_hline (risk, TAL_1, 2, 3, 1);
1207 tab_vline (risk, TAL_1, 2, 0, 1);
1208 tab_offset (risk, 0, 2);
1213 /* Directional measures. */
1214 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1215 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1217 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1218 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1219 tab_title (direct, _("Directional measures."));
1221 tab_offset (direct, nvar - 2, 0);
1222 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1223 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1224 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1225 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1226 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1227 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1228 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1229 tab_offset (direct, 0, 1);
1236 /* Find pivot subtable if applicable. */
1237 te = find_pivot_extent (tb, &tc, 0);
1241 /* Find all the row variable values. */
1242 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1244 /* Allocate memory space for the column and row totals. */
1245 if (n_rows > *maxrows)
1247 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1248 row_tot = *row_totp;
1251 if (n_cols > *maxcols)
1253 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1254 col_tot = *col_totp;
1258 /* Allocate table space for the matrix. */
1259 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1260 tab_realloc (table, -1,
1261 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1262 tab_nr (table) * (pe - pb) / (te - tb)));
1264 if (mode == GENERAL)
1266 /* Allocate memory space for the matrix. */
1267 if (n_cols * n_rows > *maxcells)
1269 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1270 *maxcells = n_cols * n_rows;
1275 /* Build the matrix and calculate column totals. */
1277 union value *cur_col = cols;
1278 union value *cur_row = rows;
1280 double *cp = col_tot;
1281 struct table_entry **p;
1284 for (p = &tb[0]; p < te; p++)
1286 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1290 for (; cur_row < &rows[n_rows]; cur_row++)
1296 mp = &mat[cur_col - cols];
1299 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1306 *cp += *mp = (*p)->u.freq;
1311 /* Zero out the rest of the matrix. */
1312 for (; cur_row < &rows[n_rows]; cur_row++)
1318 if (cur_col < &cols[n_cols])
1320 const int rem_cols = n_cols - (cur_col - cols);
1323 for (c = 0; c < rem_cols; c++)
1325 mp = &mat[cur_col - cols];
1326 for (r = 0; r < n_rows; r++)
1328 for (c = 0; c < rem_cols; c++)
1330 mp += n_cols - rem_cols;
1338 double *tp = col_tot;
1340 assert (mode == INTEGER);
1341 mat = (*tb)->u.data;
1344 /* Calculate column totals. */
1345 for (c = 0; c < n_cols; c++)
1348 double *cp = &mat[c];
1350 for (r = 0; r < n_rows; r++)
1351 cum += cp[r * n_cols];
1359 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1360 ns_cols += *cp != 0.;
1363 /* Calculate row totals. */
1366 double *rp = row_tot;
1369 for (ns_rows = 0, r = n_rows; r--; )
1372 for (c = n_cols; c--; )
1380 /* Calculate grand total. */
1386 if (n_rows < n_cols)
1387 tp = row_tot, n = n_rows;
1389 tp = col_tot, n = n_cols;
1395 /* Find the first variable that differs from the last subtable,
1396 then display the values of the dimensioning variables for
1397 each table that needs it. */
1399 int first_difference = nvar - 1;
1402 for (; ; first_difference--)
1404 assert (first_difference >= 2);
1405 if (memcmp (&cmp->values[first_difference],
1406 &(*tb)->values[first_difference],
1407 sizeof *cmp->values))
1413 display_dimensions (table, first_difference, *tb);
1415 display_dimensions (chisq, first_difference, *tb);
1417 display_dimensions (sym, first_difference, *tb);
1419 display_dimensions (risk, first_difference, *tb);
1421 display_dimensions (direct, first_difference, *tb);
1425 display_crosstabulation ();
1426 if (cmd.miss == CRS_REPORT)
1431 display_symmetric ();
1435 display_directional ();
1446 tab_resize (chisq, 4 + (nvar - 2), -1);
1457 /* Delete missing rows and columns for statistical analysis when
1460 delete_missing (void)
1465 for (r = 0; r < n_rows; r++)
1466 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1470 for (c = 0; c < n_cols; c++)
1471 mat[c + r * n_cols] = 0.;
1479 for (c = 0; c < n_cols; c++)
1480 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1484 for (r = 0; r < n_rows; r++)
1485 mat[c + r * n_cols] = 0.;
1491 /* Prepare table T for submission, and submit it. */
1493 submit (struct tab_table *t)
1500 tab_resize (t, -1, 0);
1501 if (tab_nr (t) == tab_t (t))
1506 tab_offset (t, 0, 0);
1508 for (i = 2; i < nvar; i++)
1509 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1510 var_to_string (x->vars[i]));
1511 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1512 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1514 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1516 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1517 tab_dim (t, crosstabs_dim);
1521 /* Sets the widths of all the columns and heights of all the rows in
1522 table T for driver D. */
1524 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1528 /* Width of a numerical column. */
1529 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1530 if (cmd.miss == CRS_REPORT)
1531 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1533 /* Set width for header columns. */
1539 w = d->width - c * (t->nc - t->l);
1540 for (i = 0; i <= t->nc; i++)
1544 if (w < d->prop_em_width * 8)
1545 w = d->prop_em_width * 8;
1547 if (w > d->prop_em_width * 15)
1548 w = d->prop_em_width * 15;
1550 for (i = 0; i < t->l; i++)
1554 for (i = t->l; i < t->nc; i++)
1557 for (i = 0; i < t->nr; i++)
1558 t->h[i] = tab_natural_height (t, d, i);
1561 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1562 int *cnt, int pivot);
1563 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1564 int *cnt, int pivot);
1566 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1568 static struct table_entry **
1569 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1571 return (mode == GENERAL
1572 ? find_pivot_extent_general (tp, cnt, pivot)
1573 : find_pivot_extent_integer (tp, cnt, pivot));
1576 /* Find the extent of a region in TP that contains one table. If
1577 PIVOT != 0 that means a set of table entries with identical table
1578 number; otherwise they also have to have the same values for every
1579 dimension after the row and column dimensions. The table that is
1580 searched starts at TP and has length CNT. Returns the first entry
1581 after the last one in the table; sets *CNT to the number of
1582 remaining values. If there are no entries in TP at all, returns
1583 NULL. A yucky interface, admittedly, but it works. */
1584 static struct table_entry **
1585 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1587 struct table_entry *fp = *tp;
1592 x = xtab[(*tp)->table];
1600 if ((*tp)->table != fp->table)
1605 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1612 /* Integer mode correspondent to find_pivot_extent_general(). This
1613 could be optimized somewhat, but I just don't give a crap about
1614 CROSSTABS performance in integer mode, which is just a
1615 CROSSTABS wart as far as I'm concerned.
1617 That said, feel free to send optimization patches to me. */
1618 static struct table_entry **
1619 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1621 struct table_entry *fp = *tp;
1626 x = xtab[(*tp)->table];
1634 if ((*tp)->table != fp->table)
1639 if (memcmp (&(*tp)->values[2], &fp->values[2],
1640 sizeof (union value) * (x->nvar - 2)))
1647 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1648 result. WIDTH_ points to an int which is either 0 for a
1649 numeric value or a string width for a string value. */
1651 compare_value (const void *a_, const void *b_, const void *width_)
1653 const union value *a = a_;
1654 const union value *b = b_;
1655 const int *pwidth = width_;
1656 const int width = *pwidth;
1659 return (a->f < b->f) ? -1 : (a->f > b->f);
1661 return strncmp (a->s, b->s, width);
1664 /* Given an array of ENTRY_CNT table_entry structures starting at
1665 ENTRIES, creates a sorted list of the values that the variable
1666 with index VAR_IDX takes on. The values are returned as a
1667 malloc()'darray stored in *VALUES, with the number of values
1668 stored in *VALUE_CNT.
1671 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1672 union value **values, int *value_cnt)
1674 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1676 if (mode == GENERAL)
1678 int width = MIN (var_get_width (v), MAX_SHORT_STRING);
1681 *values = xnmalloc (entry_cnt, sizeof **values);
1682 for (i = 0; i < entry_cnt; i++)
1683 (*values)[i] = entries[i]->values[var_idx];
1684 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1685 compare_value, &width);
1689 struct var_range *vr = get_var_range (v);
1692 assert (mode == INTEGER);
1693 *values = xnmalloc (vr->count, sizeof **values);
1694 for (i = 0; i < vr->count; i++)
1695 (*values)[i].f = i + vr->min;
1696 *value_cnt = vr->count;
1700 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1701 from V, displayed with print format spec from variable VAR. When
1702 in REPORT missing-value mode, missing values have an M appended. */
1704 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1705 const union value *v, const struct variable *var)
1708 const struct fmt_spec *print = var_get_print_format (var);
1710 const char *label = var_lookup_value_label (var, v);
1713 tab_text (table, c, r, TAB_LEFT, label);
1717 s.string = tab_alloc (table, print->w);
1718 format_short (s.string, print, v);
1719 s.length = strlen (s.string);
1720 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1721 s.string[s.length++] = 'M';
1722 while (s.length && *s.string == ' ')
1727 tab_raw (table, c, r, opt, &s);
1730 /* Draws a line across TABLE at the current row to indicate the most
1731 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1732 that changed, and puts the values that changed into the table. TB
1733 and X must be the corresponding table_entry and crosstab,
1736 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1738 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1740 for (; first_difference >= 2; first_difference--)
1741 table_value_missing (table, nvar - first_difference - 1, 0,
1742 TAB_RIGHT, &tb->values[first_difference],
1743 x->vars[first_difference]);
1746 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1747 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1748 additionally suffixed with a letter `M'. */
1750 format_cell_entry (struct tab_table *table, int c, int r, double value,
1751 char suffix, bool mark_missing)
1753 const struct fmt_spec f = {FMT_F, 10, 1};
1758 s.string = tab_alloc (table, 16);
1760 data_out (&v, &f, s.string);
1761 while (*s.string == ' ')
1767 s.string[s.length++] = suffix;
1769 s.string[s.length++] = 'M';
1771 tab_raw (table, c, r, TAB_RIGHT, &s);
1774 /* Displays the crosstabulation table. */
1776 display_crosstabulation (void)
1781 for (r = 0; r < n_rows; r++)
1782 table_value_missing (table, nvar - 2, r * num_cells,
1783 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1785 tab_text (table, nvar - 2, n_rows * num_cells,
1786 TAB_LEFT, _("Total"));
1788 /* Put in the actual cells. */
1793 tab_offset (table, nvar - 1, -1);
1794 for (r = 0; r < n_rows; r++)
1797 tab_hline (table, TAL_1, -1, n_cols, 0);
1798 for (c = 0; c < n_cols; c++)
1800 bool mark_missing = false;
1801 double expected_value = row_tot[r] * col_tot[c] / W;
1802 if (cmd.miss == CRS_REPORT
1803 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1804 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1806 mark_missing = true;
1807 for (i = 0; i < num_cells; i++)
1818 v = *mp / row_tot[r] * 100.;
1822 v = *mp / col_tot[c] * 100.;
1829 case CRS_CL_EXPECTED:
1832 case CRS_CL_RESIDUAL:
1833 v = *mp - expected_value;
1835 case CRS_CL_SRESIDUAL:
1836 v = (*mp - expected_value) / sqrt (expected_value);
1838 case CRS_CL_ASRESIDUAL:
1839 v = ((*mp - expected_value)
1840 / sqrt (expected_value
1841 * (1. - row_tot[r] / W)
1842 * (1. - col_tot[c] / W)));
1848 format_cell_entry (table, c, i, v, suffix, mark_missing);
1854 tab_offset (table, -1, tab_row (table) + num_cells);
1862 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1863 for (r = 0; r < n_rows; r++)
1865 bool mark_missing = false;
1867 if (cmd.miss == CRS_REPORT
1868 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1869 mark_missing = true;
1871 for (i = 0; i < num_cells; i++)
1886 v = row_tot[r] / W * 100.;
1890 v = row_tot[r] / W * 100.;
1893 case CRS_CL_EXPECTED:
1894 case CRS_CL_RESIDUAL:
1895 case CRS_CL_SRESIDUAL:
1896 case CRS_CL_ASRESIDUAL:
1903 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1904 tab_next_row (table);
1909 /* Column totals, grand total. */
1915 tab_hline (table, TAL_1, -1, n_cols, 0);
1916 for (c = 0; c <= n_cols; c++)
1918 double ct = c < n_cols ? col_tot[c] : W;
1919 bool mark_missing = false;
1922 if (cmd.miss == CRS_REPORT && c < n_cols
1923 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1924 mark_missing = true;
1926 for (i = 0; i < num_cells; i++)
1948 case CRS_CL_EXPECTED:
1949 case CRS_CL_RESIDUAL:
1950 case CRS_CL_SRESIDUAL:
1951 case CRS_CL_ASRESIDUAL:
1957 format_cell_entry (table, c, i, v, suffix, mark_missing);
1962 tab_offset (table, -1, tab_row (table) + last_row);
1965 tab_offset (table, 0, -1);
1968 static void calc_r (double *X, double *Y, double *, double *, double *);
1969 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1971 /* Display chi-square statistics. */
1973 display_chisq (void)
1975 static const char *chisq_stats[N_CHISQ] =
1977 N_("Pearson Chi-Square"),
1978 N_("Likelihood Ratio"),
1979 N_("Fisher's Exact Test"),
1980 N_("Continuity Correction"),
1981 N_("Linear-by-Linear Association"),
1983 double chisq_v[N_CHISQ];
1984 double fisher1, fisher2;
1990 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1992 tab_offset (chisq, nvar - 2, -1);
1994 for (i = 0; i < N_CHISQ; i++)
1996 if ((i != 2 && chisq_v[i] == SYSMIS)
1997 || (i == 2 && fisher1 == SYSMIS))
2001 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2004 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2005 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2006 tab_float (chisq, 3, 0, TAB_RIGHT,
2007 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2012 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2013 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2015 tab_next_row (chisq);
2018 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2019 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2020 tab_next_row (chisq);
2022 tab_offset (chisq, 0, -1);
2025 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2026 double[N_SYMMETRIC]);
2028 /* Display symmetric measures. */
2030 display_symmetric (void)
2032 static const char *categories[] =
2034 N_("Nominal by Nominal"),
2035 N_("Ordinal by Ordinal"),
2036 N_("Interval by Interval"),
2037 N_("Measure of Agreement"),
2040 static const char *stats[N_SYMMETRIC] =
2044 N_("Contingency Coefficient"),
2045 N_("Kendall's tau-b"),
2046 N_("Kendall's tau-c"),
2048 N_("Spearman Correlation"),
2053 static const int stats_categories[N_SYMMETRIC] =
2055 0, 0, 0, 1, 1, 1, 1, 2, 3,
2059 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2062 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2065 tab_offset (sym, nvar - 2, -1);
2067 for (i = 0; i < N_SYMMETRIC; i++)
2069 if (sym_v[i] == SYSMIS)
2072 if (stats_categories[i] != last_cat)
2074 last_cat = stats_categories[i];
2075 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2078 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2079 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2080 if (sym_ase[i] != SYSMIS)
2081 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2082 if (sym_t[i] != SYSMIS)
2083 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2084 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2088 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2089 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2092 tab_offset (sym, 0, -1);
2095 static int calc_risk (double[], double[], double[], union value *);
2097 /* Display risk estimate. */
2102 double risk_v[3], lower[3], upper[3];
2106 if (!calc_risk (risk_v, upper, lower, c))
2109 tab_offset (risk, nvar - 2, -1);
2111 for (i = 0; i < 3; i++)
2113 if (risk_v[i] == SYSMIS)
2119 if (var_is_numeric (x->vars[COL_VAR]))
2120 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2121 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2123 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2124 var_get_name (x->vars[COL_VAR]),
2125 var_get_width (x->vars[COL_VAR]), c[0].s,
2126 var_get_width (x->vars[COL_VAR]), c[1].s);
2130 if (var_is_numeric (x->vars[ROW_VAR]))
2131 sprintf (buf, _("For cohort %s = %g"),
2132 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2134 sprintf (buf, _("For cohort %s = %.*s"),
2135 var_get_name (x->vars[ROW_VAR]),
2136 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2140 tab_text (risk, 0, 0, TAB_LEFT, buf);
2141 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2142 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2143 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2144 tab_next_row (risk);
2147 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2148 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2149 tab_next_row (risk);
2151 tab_offset (risk, 0, -1);
2154 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2155 double[N_DIRECTIONAL]);
2157 /* Display directional measures. */
2159 display_directional (void)
2161 static const char *categories[] =
2163 N_("Nominal by Nominal"),
2164 N_("Ordinal by Ordinal"),
2165 N_("Nominal by Interval"),
2168 static const char *stats[] =
2171 N_("Goodman and Kruskal tau"),
2172 N_("Uncertainty Coefficient"),
2177 static const char *types[] =
2184 static const int stats_categories[N_DIRECTIONAL] =
2186 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2189 static const int stats_stats[N_DIRECTIONAL] =
2191 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2194 static const int stats_types[N_DIRECTIONAL] =
2196 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2199 static const int *stats_lookup[] =
2206 static const char **stats_names[] =
2218 double direct_v[N_DIRECTIONAL];
2219 double direct_ase[N_DIRECTIONAL];
2220 double direct_t[N_DIRECTIONAL];
2224 if (!calc_directional (direct_v, direct_ase, direct_t))
2227 tab_offset (direct, nvar - 2, -1);
2229 for (i = 0; i < N_DIRECTIONAL; i++)
2231 if (direct_v[i] == SYSMIS)
2237 for (j = 0; j < 3; j++)
2238 if (last[j] != stats_lookup[j][i])
2241 tab_hline (direct, TAL_1, j, 6, 0);
2246 int k = last[j] = stats_lookup[j][i];
2251 string = var_get_name (x->vars[0]);
2253 string = var_get_name (x->vars[1]);
2255 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2256 gettext (stats_names[j][k]), string);
2261 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2262 if (direct_ase[i] != SYSMIS)
2263 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2264 if (direct_t[i] != SYSMIS)
2265 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2266 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2267 tab_next_row (direct);
2270 tab_offset (direct, 0, -1);
2273 /* Statistical calculations. */
2275 /* Returns the value of the gamma (factorial) function for an integer
2278 gamma_int (double x)
2283 for (i = 2; i < x; i++)
2288 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2290 static inline double
2291 Pr (int a, int b, int c, int d)
2293 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2294 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2295 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2296 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2297 / gamma_int (a + b + c + d + 1.));
2300 /* Swap the contents of A and B. */
2302 swap (int *a, int *b)
2309 /* Calculate significance for Fisher's exact test as specified in
2310 _SPSS Statistical Algorithms_, Appendix 5. */
2312 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2316 if (MIN (c, d) < MIN (a, b))
2317 swap (&a, &c), swap (&b, &d);
2318 if (MIN (b, d) < MIN (a, c))
2319 swap (&a, &b), swap (&c, &d);
2323 swap (&a, &b), swap (&c, &d);
2325 swap (&a, &c), swap (&b, &d);
2329 for (x = 0; x <= a; x++)
2330 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2332 *fisher2 = *fisher1;
2333 for (x = 1; x <= b; x++)
2334 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2337 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2338 columns with values COLS and N_ROWS rows with values ROWS. Values
2339 in the matrix sum to W. */
2341 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2342 double *fisher1, double *fisher2)
2346 chisq[0] = chisq[1] = 0.;
2347 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2348 *fisher1 = *fisher2 = SYSMIS;
2350 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2352 if (ns_rows <= 1 || ns_cols <= 1)
2354 chisq[0] = chisq[1] = SYSMIS;
2358 for (r = 0; r < n_rows; r++)
2359 for (c = 0; c < n_cols; c++)
2361 const double expected = row_tot[r] * col_tot[c] / W;
2362 const double freq = mat[n_cols * r + c];
2363 const double residual = freq - expected;
2365 chisq[0] += residual * residual / expected;
2367 chisq[1] += freq * log (expected / freq);
2378 /* Calculate Yates and Fisher exact test. */
2379 if (ns_cols == 2 && ns_rows == 2)
2381 double f11, f12, f21, f22;
2387 for (i = j = 0; i < n_cols; i++)
2388 if (col_tot[i] != 0.)
2397 f11 = mat[nz_cols[0]];
2398 f12 = mat[nz_cols[1]];
2399 f21 = mat[nz_cols[0] + n_cols];
2400 f22 = mat[nz_cols[1] + n_cols];
2405 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2408 chisq[3] = (W * x * x
2409 / (f11 + f12) / (f21 + f22)
2410 / (f11 + f21) / (f12 + f22));
2418 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2419 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2422 /* Calculate Mantel-Haenszel. */
2423 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2425 double r, ase_0, ase_1;
2426 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2428 chisq[4] = (W - 1.) * r * r;
2433 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2434 ASE_1, and ase_0 into ASE_0. The row and column values must be
2435 passed in X and Y. */
2437 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2439 double SX, SY, S, T;
2441 double sum_XYf, sum_X2Y2f;
2442 double sum_Xr, sum_X2r;
2443 double sum_Yc, sum_Y2c;
2446 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2447 for (j = 0; j < n_cols; j++)
2449 double fij = mat[j + i * n_cols];
2450 double product = X[i] * Y[j];
2451 double temp = fij * product;
2453 sum_X2Y2f += temp * product;
2456 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2458 sum_Xr += X[i] * row_tot[i];
2459 sum_X2r += pow2 (X[i]) * row_tot[i];
2463 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2465 sum_Yc += Y[i] * col_tot[i];
2466 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2470 S = sum_XYf - sum_Xr * sum_Yc / W;
2471 SX = sum_X2r - pow2 (sum_Xr) / W;
2472 SY = sum_Y2c - pow2 (sum_Yc) / W;
2475 *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c));
2480 for (s = c = 0., i = 0; i < n_rows; i++)
2481 for (j = 0; j < n_cols; j++)
2483 double Xresid, Yresid;
2486 Xresid = X[i] - Xbar;
2487 Yresid = Y[j] - Ybar;
2488 temp = (T * Xresid * Yresid
2490 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2491 y = mat[j + i * n_cols] * temp * temp - c;
2496 *ase_1 = sqrt (s) / (T * T);
2500 static double somers_d_v[3];
2501 static double somers_d_ase[3];
2502 static double somers_d_t[3];
2504 /* Calculate symmetric statistics and their asymptotic standard
2505 errors. Returns 0 if none could be calculated. */
2507 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2508 double t[N_SYMMETRIC])
2510 int q = MIN (ns_rows, ns_cols);
2519 for (i = 0; i < N_SYMMETRIC; i++)
2520 v[i] = ase[i] = t[i] = SYSMIS;
2523 /* Phi, Cramer's V, contingency coefficient. */
2524 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2526 double Xp = 0.; /* Pearson chi-square. */
2531 for (r = 0; r < n_rows; r++)
2532 for (c = 0; c < n_cols; c++)
2534 const double expected = row_tot[r] * col_tot[c] / W;
2535 const double freq = mat[n_cols * r + c];
2536 const double residual = freq - expected;
2538 Xp += residual * residual / expected;
2542 if (cmd.a_statistics[CRS_ST_PHI])
2544 v[0] = sqrt (Xp / W);
2545 v[1] = sqrt (Xp / (W * (q - 1)));
2547 if (cmd.a_statistics[CRS_ST_CC])
2548 v[2] = sqrt (Xp / (Xp + W));
2551 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2552 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2557 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2564 for (r = 0; r < n_rows; r++)
2565 Dr -= pow2 (row_tot[r]);
2566 for (c = 0; c < n_cols; c++)
2567 Dc -= pow2 (col_tot[c]);
2573 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2574 for (c = 0; c < n_cols; c++)
2578 for (r = 0; r < n_rows; r++)
2579 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2589 for (i = 0; i < n_rows; i++)
2593 for (j = 1; j < n_cols; j++)
2594 Cij += col_tot[j] - cum[j + i * n_cols];
2597 for (j = 1; j < n_cols; j++)
2598 Dij += cum[j + (i - 1) * n_cols];
2602 double fij = mat[j + i * n_cols];
2608 assert (j < n_cols);
2610 Cij -= col_tot[j] - cum[j + i * n_cols];
2611 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2615 Cij += cum[j - 1 + (i - 1) * n_cols];
2616 Dij -= cum[j + (i - 1) * n_cols];
2622 if (cmd.a_statistics[CRS_ST_BTAU])
2623 v[3] = (P - Q) / sqrt (Dr * Dc);
2624 if (cmd.a_statistics[CRS_ST_CTAU])
2625 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2626 if (cmd.a_statistics[CRS_ST_GAMMA])
2627 v[5] = (P - Q) / (P + Q);
2629 /* ASE for tau-b, tau-c, gamma. Calculations could be
2630 eliminated here, at expense of memory. */
2635 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2636 for (i = 0; i < n_rows; i++)
2640 for (j = 1; j < n_cols; j++)
2641 Cij += col_tot[j] - cum[j + i * n_cols];
2644 for (j = 1; j < n_cols; j++)
2645 Dij += cum[j + (i - 1) * n_cols];
2649 double fij = mat[j + i * n_cols];
2651 if (cmd.a_statistics[CRS_ST_BTAU])
2653 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2654 + v[3] * (row_tot[i] * Dc
2655 + col_tot[j] * Dr));
2656 btau_cum += fij * temp * temp;
2660 const double temp = Cij - Dij;
2661 ctau_cum += fij * temp * temp;
2664 if (cmd.a_statistics[CRS_ST_GAMMA])
2666 const double temp = Q * Cij - P * Dij;
2667 gamma_cum += fij * temp * temp;
2670 if (cmd.a_statistics[CRS_ST_D])
2672 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2673 - (P - Q) * (W - row_tot[i]));
2674 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2675 - (Q - P) * (W - col_tot[j]));
2680 assert (j < n_cols);
2682 Cij -= col_tot[j] - cum[j + i * n_cols];
2683 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2687 Cij += cum[j - 1 + (i - 1) * n_cols];
2688 Dij -= cum[j + (i - 1) * n_cols];
2694 btau_var = ((btau_cum
2695 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2697 if (cmd.a_statistics[CRS_ST_BTAU])
2699 ase[3] = sqrt (btau_var);
2700 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2703 if (cmd.a_statistics[CRS_ST_CTAU])
2705 ase[4] = ((2 * q / ((q - 1) * W * W))
2706 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2707 t[4] = v[4] / ase[4];
2709 if (cmd.a_statistics[CRS_ST_GAMMA])
2711 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2712 t[5] = v[5] / (2. / (P + Q)
2713 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2715 if (cmd.a_statistics[CRS_ST_D])
2717 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2718 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2719 somers_d_t[0] = (somers_d_v[0]
2721 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2722 somers_d_v[1] = (P - Q) / Dc;
2723 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2724 somers_d_t[1] = (somers_d_v[1]
2726 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2727 somers_d_v[2] = (P - Q) / Dr;
2728 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2729 somers_d_t[2] = (somers_d_v[2]
2731 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2737 /* Spearman correlation, Pearson's r. */
2738 if (cmd.a_statistics[CRS_ST_CORR])
2740 double *R = xmalloca (sizeof *R * n_rows);
2741 double *C = xmalloca (sizeof *C * n_cols);
2744 double y, t, c = 0., s = 0.;
2749 R[i] = s + (row_tot[i] + 1.) / 2.;
2756 assert (i < n_rows);
2761 double y, t, c = 0., s = 0.;
2766 C[j] = s + (col_tot[j] + 1.) / 2;
2773 assert (j < n_cols);
2777 calc_r (R, C, &v[6], &t[6], &ase[6]);
2783 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2787 /* Cohen's kappa. */
2788 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2790 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2793 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2794 i < ns_rows; i++, j++)
2798 while (col_tot[j] == 0.)
2801 prod = row_tot[i] * col_tot[j];
2802 sum = row_tot[i] + col_tot[j];
2804 sum_fii += mat[j + i * n_cols];
2806 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2807 sum_riciri_ci += prod * sum;
2809 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2810 for (j = 0; j < ns_cols; j++)
2812 double sum = row_tot[i] + col_tot[j];
2813 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2816 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2818 ase[8] = sqrt ((W * W * sum_rici
2819 + sum_rici * sum_rici
2820 - W * sum_riciri_ci)
2821 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2823 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2824 / pow2 (W * W - sum_rici))
2825 + ((2. * (W - sum_fii)
2826 * (2. * sum_fii * sum_rici
2827 - W * sum_fiiri_ci))
2828 / cube (W * W - sum_rici))
2829 + (pow2 (W - sum_fii)
2830 * (W * sum_fijri_ci2 - 4.
2831 * sum_rici * sum_rici)
2832 / pow4 (W * W - sum_rici))));
2834 t[8] = v[8] / ase[8];
2841 /* Calculate risk estimate. */
2843 calc_risk (double *value, double *upper, double *lower, union value *c)
2845 double f11, f12, f21, f22;
2851 for (i = 0; i < 3; i++)
2852 value[i] = upper[i] = lower[i] = SYSMIS;
2855 if (ns_rows != 2 || ns_cols != 2)
2862 for (i = j = 0; i < n_cols; i++)
2863 if (col_tot[i] != 0.)
2872 f11 = mat[nz_cols[0]];
2873 f12 = mat[nz_cols[1]];
2874 f21 = mat[nz_cols[0] + n_cols];
2875 f22 = mat[nz_cols[1] + n_cols];
2877 c[0] = cols[nz_cols[0]];
2878 c[1] = cols[nz_cols[1]];
2881 value[0] = (f11 * f22) / (f12 * f21);
2882 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2883 lower[0] = value[0] * exp (-1.960 * v);
2884 upper[0] = value[0] * exp (1.960 * v);
2886 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2887 v = sqrt ((f12 / (f11 * (f11 + f12)))
2888 + (f22 / (f21 * (f21 + f22))));
2889 lower[1] = value[1] * exp (-1.960 * v);
2890 upper[1] = value[1] * exp (1.960 * v);
2892 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2893 v = sqrt ((f11 / (f12 * (f11 + f12)))
2894 + (f21 / (f22 * (f21 + f22))));
2895 lower[2] = value[2] * exp (-1.960 * v);
2896 upper[2] = value[2] * exp (1.960 * v);
2901 /* Calculate directional measures. */
2903 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2904 double t[N_DIRECTIONAL])
2909 for (i = 0; i < N_DIRECTIONAL; i++)
2910 v[i] = ase[i] = t[i] = SYSMIS;
2914 if (cmd.a_statistics[CRS_ST_LAMBDA])
2916 double *fim = xnmalloc (n_rows, sizeof *fim);
2917 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2918 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2919 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2920 double sum_fim, sum_fmj;
2922 int rm_index, cm_index;
2925 /* Find maximum for each row and their sum. */
2926 for (sum_fim = 0., i = 0; i < n_rows; i++)
2928 double max = mat[i * n_cols];
2931 for (j = 1; j < n_cols; j++)
2932 if (mat[j + i * n_cols] > max)
2934 max = mat[j + i * n_cols];
2938 sum_fim += fim[i] = max;
2939 fim_index[i] = index;
2942 /* Find maximum for each column. */
2943 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2945 double max = mat[j];
2948 for (i = 1; i < n_rows; i++)
2949 if (mat[j + i * n_cols] > max)
2951 max = mat[j + i * n_cols];
2955 sum_fmj += fmj[j] = max;
2956 fmj_index[j] = index;
2959 /* Find maximum row total. */
2962 for (i = 1; i < n_rows; i++)
2963 if (row_tot[i] > rm)
2969 /* Find maximum column total. */
2972 for (j = 1; j < n_cols; j++)
2973 if (col_tot[j] > cm)
2979 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2980 v[1] = (sum_fmj - rm) / (W - rm);
2981 v[2] = (sum_fim - cm) / (W - cm);
2983 /* ASE1 for Y given X. */
2987 for (accum = 0., i = 0; i < n_rows; i++)
2988 for (j = 0; j < n_cols; j++)
2990 const int deltaj = j == cm_index;
2991 accum += (mat[j + i * n_cols]
2992 * pow2 ((j == fim_index[i])
2997 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3000 /* ASE0 for Y given X. */
3004 for (accum = 0., i = 0; i < n_rows; i++)
3005 if (cm_index != fim_index[i])
3006 accum += (mat[i * n_cols + fim_index[i]]
3007 + mat[i * n_cols + cm_index]);
3008 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3011 /* ASE1 for X given Y. */
3015 for (accum = 0., i = 0; i < n_rows; i++)
3016 for (j = 0; j < n_cols; j++)
3018 const int deltaj = i == rm_index;
3019 accum += (mat[j + i * n_cols]
3020 * pow2 ((i == fmj_index[j])
3025 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3028 /* ASE0 for X given Y. */
3032 for (accum = 0., j = 0; j < n_cols; j++)
3033 if (rm_index != fmj_index[j])
3034 accum += (mat[j + n_cols * fmj_index[j]]
3035 + mat[j + n_cols * rm_index]);
3036 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3039 /* Symmetric ASE0 and ASE1. */
3044 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3045 for (j = 0; j < n_cols; j++)
3047 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3048 int temp1 = (i == rm_index) + (j == cm_index);
3049 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3050 accum1 += (mat[j + i * n_cols]
3051 * pow2 (temp0 + (v[0] - 1.) * temp1));
3053 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3054 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3055 / (2. * W - rm - cm));
3064 double sum_fij2_ri, sum_fij2_ci;
3065 double sum_ri2, sum_cj2;
3067 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3068 for (j = 0; j < n_cols; j++)
3070 double temp = pow2 (mat[j + i * n_cols]);
3071 sum_fij2_ri += temp / row_tot[i];
3072 sum_fij2_ci += temp / col_tot[j];
3075 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3076 sum_ri2 += pow2 (row_tot[i]);
3078 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3079 sum_cj2 += pow2 (col_tot[j]);
3081 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3082 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3086 if (cmd.a_statistics[CRS_ST_UC])
3088 double UX, UY, UXY, P;
3089 double ase1_yx, ase1_xy, ase1_sym;
3092 for (UX = 0., i = 0; i < n_rows; i++)
3093 if (row_tot[i] > 0.)
3094 UX -= row_tot[i] / W * log (row_tot[i] / W);
3096 for (UY = 0., j = 0; j < n_cols; j++)
3097 if (col_tot[j] > 0.)
3098 UY -= col_tot[j] / W * log (col_tot[j] / W);
3100 for (UXY = P = 0., i = 0; i < n_rows; i++)
3101 for (j = 0; j < n_cols; j++)
3103 double entry = mat[j + i * n_cols];
3108 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3109 UXY -= entry / W * log (entry / W);
3112 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3113 for (j = 0; j < n_cols; j++)
3115 double entry = mat[j + i * n_cols];
3120 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3121 + (UX - UXY) * log (col_tot[j] / W));
3122 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3123 + (UY - UXY) * log (row_tot[i] / W));
3124 ase1_sym += entry * pow2 ((UXY
3125 * log (row_tot[i] * col_tot[j] / (W * W)))
3126 - (UX + UY) * log (entry / W));
3129 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3130 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3131 t[5] = v[5] / ((2. / (W * (UX + UY)))
3132 * sqrt (P - pow2 (UX + UY - UXY) / W));
3134 v[6] = (UX + UY - UXY) / UX;
3135 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3136 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3138 v[7] = (UX + UY - UXY) / UY;
3139 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3140 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3144 if (cmd.a_statistics[CRS_ST_D])
3149 calc_symmetric (NULL, NULL, NULL);
3150 for (i = 0; i < 3; i++)
3152 v[8 + i] = somers_d_v[i];
3153 ase[8 + i] = somers_d_ase[i];
3154 t[8 + i] = somers_d_t[i];
3159 if (cmd.a_statistics[CRS_ST_ETA])
3162 double sum_Xr, sum_X2r;
3166 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3168 sum_Xr += rows[i].f * row_tot[i];
3169 sum_X2r += pow2 (rows[i].f) * row_tot[i];
3171 SX = sum_X2r - pow2 (sum_Xr) / W;
3173 for (SXW = 0., j = 0; j < n_cols; j++)
3177 for (cum = 0., i = 0; i < n_rows; i++)
3179 SXW += pow2 (rows[i].f) * mat[j + i * n_cols];
3180 cum += rows[i].f * mat[j + i * n_cols];
3183 SXW -= cum * cum / col_tot[j];
3185 v[11] = sqrt (1. - SXW / SX);
3189 double sum_Yc, sum_Y2c;
3193 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3195 sum_Yc += cols[i].f * col_tot[i];
3196 sum_Y2c += pow2 (cols[i].f) * col_tot[i];
3198 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3200 for (SYW = 0., i = 0; i < n_rows; i++)
3204 for (cum = 0., j = 0; j < n_cols; j++)
3206 SYW += pow2 (cols[j].f) * mat[j + i * n_cols];
3207 cum += cols[j].f * mat[j + i * n_cols];
3210 SYW -= cum * cum / row_tot[i];
3212 v[12] = sqrt (1. - SYW / SY);
3219 /* A wrapper around data_out() that limits string output to short
3220 string width and null terminates the result. */
3222 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3224 struct fmt_spec fmt_subst;
3226 /* Limit to short string width. */
3227 if (fmt_is_string (fp->type))
3231 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3232 if (fmt_subst.type == FMT_A)
3233 fmt_subst.w = MIN (8, fmt_subst.w);
3235 fmt_subst.w = MIN (16, fmt_subst.w);
3241 data_out (v, fp, s);
3243 /* Null terminate. */