1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 - Pearson's R (but not Spearman!) is off a little.
22 - T values for Spearman's R and Pearson's R are wrong.
23 - How to calculate significance of symmetric and directional measures?
24 - Asymmetric ASEs and T values for lambda are wrong.
25 - ASE of Goodman and Kruskal's tau is not calculated.
26 - ASE of symmetric somers' d is wrong.
27 - Approx. T of uncertainty coefficient is wrong.
34 #include <gsl/gsl_cdf.h>
38 #include <data/case.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 const struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (const struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static const struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write_style; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (const struct ccase *, void *, const struct dataset *);
181 static bool calc_general (const struct ccase *, void *, const struct dataset *);
182 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
183 static bool postcalc (void *, const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
213 pl_tc = pool_create ();
214 pl_col = pool_create ();
216 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
219 mode = variables ? INTEGER : GENERAL;
224 cmd.a_cells[CRS_CL_COUNT] = 1;
230 for (i = 0; i < CRS_CL_count; i++)
235 cmd.a_cells[CRS_CL_COUNT] = 1;
236 cmd.a_cells[CRS_CL_ROW] = 1;
237 cmd.a_cells[CRS_CL_COLUMN] = 1;
238 cmd.a_cells[CRS_CL_TOTAL] = 1;
240 if (cmd.a_cells[CRS_CL_ALL])
242 for (i = 0; i < CRS_CL_count; i++)
244 cmd.a_cells[CRS_CL_ALL] = 0;
246 cmd.a_cells[CRS_CL_NONE] = 0;
248 for (num_cells = i = 0; i < CRS_CL_count; i++)
250 cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write_style = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
293 write_style = CRS_WR_ALL;
295 write_style = CRS_WR_NONE;
297 ok = procedure_with_splits (ds, precalc,
298 mode == GENERAL ? calc_general : calc_integer,
301 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
304 /* Parses the TABLES subcommand. */
306 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
308 struct const_var_set *var_set;
310 const struct variable ***by = NULL;
311 size_t *by_nvar = NULL;
315 /* Ensure that this is a TABLES subcommand. */
316 if (!lex_match_id (lexer, "TABLES")
317 && (lex_token (lexer) != T_ID ||
318 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
319 && lex_token (lexer) != T_ALL)
321 lex_match (lexer, '=');
323 if (variables != NULL)
324 var_set = const_var_set_create_from_array (variables, variables_cnt);
326 var_set = const_var_set_create_from_dict (dataset_dict (ds));
327 assert (var_set != NULL);
331 by = xnrealloc (by, n_by + 1, sizeof *by);
332 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
333 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
334 PV_NO_DUPLICATE | PV_NO_SCRATCH))
336 if (xalloc_oversized (nx, by_nvar[n_by]))
338 msg (SE, _("Too many crosstabulation variables or dimensions."));
344 if (!lex_match (lexer, T_BY))
348 lex_error (lexer, _("expecting BY"));
357 int *by_iter = xcalloc (n_by, sizeof *by_iter);
360 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
361 for (i = 0; i < nx; i++)
365 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
372 for (i = 0; i < n_by; i++)
373 x->vars[i] = by[i][by_iter[i]];
379 for (i = n_by - 1; i >= 0; i--)
381 if (++by_iter[i] < by_nvar[i])
394 /* All return paths lead here. */
398 for (i = 0; i < n_by; i++)
404 const_var_set_destroy (var_set);
409 /* Parses the VARIABLES subcommand. */
411 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
415 msg (SE, _("VARIABLES must be specified before TABLES."));
419 lex_match (lexer, '=');
423 size_t orig_nv = variables_cnt;
428 if (!parse_variables_const (lexer, dataset_dict (ds),
429 &variables, &variables_cnt,
430 (PV_APPEND | PV_NUMERIC
431 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
434 if (lex_token (lexer) != '(')
436 lex_error (lexer, "expecting `('");
441 if (!lex_force_int (lexer))
443 min = lex_integer (lexer);
446 lex_match (lexer, ',');
448 if (!lex_force_int (lexer))
450 max = lex_integer (lexer);
453 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
459 if (lex_token (lexer) != ')')
461 lex_error (lexer, "expecting `)'");
466 for (i = orig_nv; i < variables_cnt; i++)
468 struct var_range *vr = xmalloc (sizeof *vr);
471 vr->count = max - min + 1;
472 var_attach_aux (variables[i], vr, var_dtor_free);
475 if (lex_token (lexer) == '/')
487 /* Data file processing. */
489 static int compare_table_entry (const void *, const void *, const void *);
490 static unsigned hash_table_entry (const void *, const void *);
492 /* Set up the crosstabulation tables for processing. */
494 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
496 output_split_file_values (ds, first);
499 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
509 for (i = 0; i < nxtab; i++)
511 struct crosstab *x = xtab[i];
516 x->ofs = n_sorted_tab;
518 for (j = 2; j < x->nvar; j++)
519 count *= get_var_range (x->vars[j - 2])->count;
521 sorted_tab = xnrealloc (sorted_tab,
522 n_sorted_tab + count, sizeof *sorted_tab);
523 v = local_alloc (sizeof *v * x->nvar);
524 for (j = 2; j < x->nvar; j++)
525 v[j] = get_var_range (x->vars[j])->min;
526 for (j = 0; j < count; j++)
528 struct table_entry *te;
531 te = sorted_tab[n_sorted_tab++]
532 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
536 int row_cnt = get_var_range (x->vars[0])->count;
537 int col_cnt = get_var_range (x->vars[1])->count;
538 const int mat_size = row_cnt * col_cnt;
541 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
542 for (m = 0; m < mat_size; m++)
546 for (k = 2; k < x->nvar; k++)
547 te->values[k].f = v[k];
548 for (k = 2; k < x->nvar; k++)
550 struct var_range *vr = get_var_range (x->vars[k]);
551 if (++v[k] >= vr->max)
560 sorted_tab = xnrealloc (sorted_tab,
561 n_sorted_tab + 1, sizeof *sorted_tab);
562 sorted_tab[n_sorted_tab] = NULL;
567 /* Form crosstabulations for general mode. */
569 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
571 bool bad_warn = true;
573 /* Missing values to exclude. */
574 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
575 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
579 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
581 /* Flattened current table index. */
584 for (t = 0; t < nxtab; t++)
586 struct crosstab *x = xtab[t];
587 const size_t entry_size = (sizeof (struct table_entry)
588 + sizeof (union value) * (x->nvar - 1));
589 struct table_entry *te = local_alloc (entry_size);
591 /* Construct table entry for the current record and table. */
597 for (j = 0; j < x->nvar; j++)
599 const union value *v = case_data (c, x->vars[j]);
600 if (var_is_value_missing (x->vars[j], v, exclude))
602 x->missing += weight;
606 if (var_is_numeric (x->vars[j]))
607 te->values[j].f = case_num (c, x->vars[j]);
610 memcpy (te->values[j].s, case_str (c, x->vars[j]),
611 var_get_width (x->vars[j]));
613 /* Necessary in order to simplify comparisons. */
614 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
615 sizeof (union value) - var_get_width (x->vars[j]));
620 /* Add record to hash table. */
622 struct table_entry **tepp
623 = (struct table_entry **) hsh_probe (gen_tab, te);
626 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
629 memcpy (tep, te, entry_size);
634 (*tepp)->u.freq += weight;
645 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
647 bool bad_warn = true;
650 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
652 /* Flattened current table index. */
655 for (t = 0; t < nxtab; t++)
657 struct crosstab *x = xtab[t];
662 for (i = 0; i < x->nvar; i++)
664 const struct variable *const v = x->vars[i];
665 struct var_range *vr = get_var_range (v);
666 double value = case_num (c, v);
668 /* Note that the first test also rules out SYSMIS. */
669 if ((value < vr->min || value >= vr->max)
670 || (cmd.miss == CRS_TABLE
671 && var_is_num_missing (v, value, MV_USER)))
673 x->missing += weight;
679 ofs += fact * ((int) value - vr->min);
685 const struct variable *row_var = x->vars[ROW_VAR];
686 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
688 const struct variable *col_var = x->vars[COL_VAR];
689 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
691 const int col_dim = get_var_range (col_var)->count;
693 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
702 /* Compare the table_entry's at A and B and return a strcmp()-type
705 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
707 const struct table_entry *a = a_;
708 const struct table_entry *b = b_;
710 if (a->table > b->table)
712 else if (a->table < b->table)
716 const struct crosstab *x = xtab[a->table];
719 for (i = x->nvar - 1; i >= 0; i--)
720 if (var_is_numeric (x->vars[i]))
722 const double diffnum = a->values[i].f - b->values[i].f;
725 else if (diffnum > 0)
730 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
731 var_get_width (x->vars[i]));
740 /* Calculate a hash value from table_entry A. */
742 hash_table_entry (const void *a_, const void *aux UNUSED)
744 const struct table_entry *a = a_;
749 for (i = 0; i < xtab[a->table]->nvar; i++)
750 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
755 /* Post-data reading calculations. */
757 static struct table_entry **find_pivot_extent (struct table_entry **,
758 int *cnt, int pivot);
759 static void enum_var_values (struct table_entry **entries, int entry_cnt,
761 union value **values, int *value_cnt);
762 static void output_pivot_table (struct table_entry **, struct table_entry **,
763 double **, double **, double **,
764 int *, int *, int *);
765 static void make_summary_table (void);
768 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
772 n_sorted_tab = hsh_count (gen_tab);
773 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
776 make_summary_table ();
778 /* Identify all the individual crosstabulation tables, and deal with
781 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
782 int pc = n_sorted_tab; /* Pivot count. */
784 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
785 int maxrows = 0, maxcols = 0, maxcells = 0;
789 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
793 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
794 &maxrows, &maxcols, &maxcells);
803 hsh_destroy (gen_tab);
808 static void insert_summary (struct tab_table *, int tab_index, double valid);
810 /* Output a table summarizing the cases processed. */
812 make_summary_table (void)
814 struct tab_table *summary;
816 struct table_entry **pb = sorted_tab, **pe;
817 int pc = n_sorted_tab;
820 summary = tab_create (7, 3 + nxtab, 1);
821 tab_title (summary, _("Summary."));
822 tab_headers (summary, 1, 0, 3, 0);
823 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
824 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
825 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
826 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
827 tab_hline (summary, TAL_1, 1, 6, 1);
828 tab_hline (summary, TAL_1, 1, 6, 2);
829 tab_vline (summary, TAL_1, 3, 1, 1);
830 tab_vline (summary, TAL_1, 5, 1, 1);
834 for (i = 0; i < 3; i++)
836 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
837 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
840 tab_offset (summary, 0, 3);
846 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
850 while (cur_tab < (*pb)->table)
851 insert_summary (summary, cur_tab++, 0.);
854 for (valid = 0.; pb < pe; pb++)
855 valid += (*pb)->u.freq;
858 const struct crosstab *const x = xtab[(*pb)->table];
859 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
860 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
861 const int count = n_cols * n_rows;
863 for (valid = 0.; pb < pe; pb++)
865 const double *data = (*pb)->u.data;
868 for (i = 0; i < count; i++)
872 insert_summary (summary, cur_tab++, valid);
877 while (cur_tab < nxtab)
878 insert_summary (summary, cur_tab++, 0.);
883 /* Inserts a line into T describing the crosstabulation at index
884 TAB_INDEX, which has VALID valid observations. */
886 insert_summary (struct tab_table *t, int tab_index, double valid)
888 struct crosstab *x = xtab[tab_index];
890 tab_hline (t, TAL_1, 0, 6, 0);
892 /* Crosstabulation name. */
894 char *buf = local_alloc (128 * x->nvar);
898 for (i = 0; i < x->nvar; i++)
901 cp = stpcpy (cp, " * ");
903 cp = stpcpy (cp, var_to_string (x->vars[i]));
905 tab_text (t, 0, 0, TAB_LEFT, buf);
910 /* Counts and percentages. */
920 for (i = 0; i < 3; i++)
922 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
923 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
934 static struct tab_table *table; /* Crosstabulation table. */
935 static struct tab_table *chisq; /* Chi-square table. */
936 static struct tab_table *sym; /* Symmetric measures table. */
937 static struct tab_table *risk; /* Risk estimate table. */
938 static struct tab_table *direct; /* Directional measures table. */
941 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
943 /* Column values, number of columns. */
944 static union value *cols;
947 /* Row values, number of rows. */
948 static union value *rows;
951 /* Number of statistically interesting columns/rows (columns/rows with
953 static int ns_cols, ns_rows;
955 /* Crosstabulation. */
956 static const struct crosstab *x;
958 /* Number of variables from the crosstabulation to consider. This is
959 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
962 /* Matrix contents. */
963 static double *mat; /* Matrix proper. */
964 static double *row_tot; /* Row totals. */
965 static double *col_tot; /* Column totals. */
966 static double W; /* Grand total. */
968 static void display_dimensions (struct tab_table *, int first_difference,
969 struct table_entry *);
970 static void display_crosstabulation (void);
971 static void display_chisq (void);
972 static void display_symmetric (void);
973 static void display_risk (void);
974 static void display_directional (void);
975 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
976 static void table_value_missing (struct tab_table *table, int c, int r,
977 unsigned char opt, const union value *v,
978 const struct variable *var);
979 static void delete_missing (void);
981 /* Output pivot table beginning at PB and continuing until PE,
982 exclusive. For efficiency, *MATP is a pointer to a matrix that can
983 hold *MAXROWS entries. */
985 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
986 double **matp, double **row_totp, double **col_totp,
987 int *maxrows, int *maxcols, int *maxcells)
990 struct table_entry **tb = pb, **te; /* Table begin, table end. */
991 int tc = pe - pb; /* Table count. */
993 /* Table entry for header comparison. */
994 struct table_entry *cmp = NULL;
996 x = xtab[(*pb)->table];
997 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
999 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1001 /* Crosstabulation table initialization. */
1004 table = tab_create (nvar + n_cols,
1005 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1006 tab_headers (table, nvar - 1, 0, 2, 0);
1008 /* First header line. */
1009 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1010 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1012 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1014 /* Second header line. */
1018 for (i = 2; i < nvar; i++)
1019 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1020 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1021 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1022 var_get_name (x->vars[ROW_VAR]));
1023 for (i = 0; i < n_cols; i++)
1024 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1026 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1029 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1030 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1034 char *title = local_alloc (x->nvar * 64 + 128);
1038 if (cmd.pivot == CRS_PIVOT)
1039 for (i = 0; i < nvar; i++)
1042 cp = stpcpy (cp, " by ");
1043 cp = stpcpy (cp, var_get_name (x->vars[i]));
1047 cp = spprintf (cp, "%s by %s for",
1048 var_get_name (x->vars[0]),
1049 var_get_name (x->vars[1]));
1050 for (i = 2; i < nvar; i++)
1052 char buf[64], *bufp;
1057 cp = stpcpy (cp, var_get_name (x->vars[i]));
1059 format_short (buf, var_get_print_format (x->vars[i]),
1061 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1063 cp = stpcpy (cp, bufp);
1067 cp = stpcpy (cp, " [");
1068 for (i = 0; i < num_cells; i++)
1076 static const struct tuple cell_names[] =
1078 {CRS_CL_COUNT, N_("count")},
1079 {CRS_CL_ROW, N_("row %")},
1080 {CRS_CL_COLUMN, N_("column %")},
1081 {CRS_CL_TOTAL, N_("total %")},
1082 {CRS_CL_EXPECTED, N_("expected")},
1083 {CRS_CL_RESIDUAL, N_("residual")},
1084 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1085 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1089 const struct tuple *t;
1091 for (t = cell_names; t->value != cells[i]; t++)
1092 assert (t->value != -1);
1094 cp = stpcpy (cp, ", ");
1095 cp = stpcpy (cp, gettext (t->name));
1099 tab_title (table, "%s", title);
1103 tab_offset (table, 0, 2);
1108 /* Chi-square table initialization. */
1109 if (cmd.a_statistics[CRS_ST_CHISQ])
1111 chisq = tab_create (6 + (nvar - 2),
1112 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1113 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1115 tab_title (chisq, _("Chi-square tests."));
1117 tab_offset (chisq, nvar - 2, 0);
1118 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1119 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1120 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1121 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1122 _("Asymp. Sig. (2-sided)"));
1123 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1124 _("Exact. Sig. (2-sided)"));
1125 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1126 _("Exact. Sig. (1-sided)"));
1128 tab_offset (chisq, 0, 1);
1133 /* Symmetric measures. */
1134 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1135 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1136 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1137 || cmd.a_statistics[CRS_ST_KAPPA])
1139 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1140 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1141 tab_title (sym, _("Symmetric measures."));
1143 tab_offset (sym, nvar - 2, 0);
1144 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1145 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1146 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1147 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1148 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1149 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1150 tab_offset (sym, 0, 1);
1155 /* Risk estimate. */
1156 if (cmd.a_statistics[CRS_ST_RISK])
1158 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1159 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1160 tab_title (risk, _("Risk estimate."));
1162 tab_offset (risk, nvar - 2, 0);
1163 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1164 _("95%% Confidence Interval"));
1165 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1166 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1167 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1168 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1169 tab_hline (risk, TAL_1, 2, 3, 1);
1170 tab_vline (risk, TAL_1, 2, 0, 1);
1171 tab_offset (risk, 0, 2);
1176 /* Directional measures. */
1177 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1178 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1180 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1181 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1182 tab_title (direct, _("Directional measures."));
1184 tab_offset (direct, nvar - 2, 0);
1185 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1186 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1187 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1188 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1189 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1190 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1191 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1192 tab_offset (direct, 0, 1);
1199 /* Find pivot subtable if applicable. */
1200 te = find_pivot_extent (tb, &tc, 0);
1204 /* Find all the row variable values. */
1205 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1207 /* Allocate memory space for the column and row totals. */
1208 if (n_rows > *maxrows)
1210 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1211 row_tot = *row_totp;
1214 if (n_cols > *maxcols)
1216 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1217 col_tot = *col_totp;
1221 /* Allocate table space for the matrix. */
1222 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1223 tab_realloc (table, -1,
1224 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1225 tab_nr (table) * (pe - pb) / (te - tb)));
1227 if (mode == GENERAL)
1229 /* Allocate memory space for the matrix. */
1230 if (n_cols * n_rows > *maxcells)
1232 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1233 *maxcells = n_cols * n_rows;
1238 /* Build the matrix and calculate column totals. */
1240 union value *cur_col = cols;
1241 union value *cur_row = rows;
1243 double *cp = col_tot;
1244 struct table_entry **p;
1247 for (p = &tb[0]; p < te; p++)
1249 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1253 for (; cur_row < &rows[n_rows]; cur_row++)
1259 mp = &mat[cur_col - cols];
1262 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1269 *cp += *mp = (*p)->u.freq;
1274 /* Zero out the rest of the matrix. */
1275 for (; cur_row < &rows[n_rows]; cur_row++)
1281 if (cur_col < &cols[n_cols])
1283 const int rem_cols = n_cols - (cur_col - cols);
1286 for (c = 0; c < rem_cols; c++)
1288 mp = &mat[cur_col - cols];
1289 for (r = 0; r < n_rows; r++)
1291 for (c = 0; c < rem_cols; c++)
1293 mp += n_cols - rem_cols;
1301 double *tp = col_tot;
1303 assert (mode == INTEGER);
1304 mat = (*tb)->u.data;
1307 /* Calculate column totals. */
1308 for (c = 0; c < n_cols; c++)
1311 double *cp = &mat[c];
1313 for (r = 0; r < n_rows; r++)
1314 cum += cp[r * n_cols];
1322 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1323 ns_cols += *cp != 0.;
1326 /* Calculate row totals. */
1329 double *rp = row_tot;
1332 for (ns_rows = 0, r = n_rows; r--; )
1335 for (c = n_cols; c--; )
1343 /* Calculate grand total. */
1349 if (n_rows < n_cols)
1350 tp = row_tot, n = n_rows;
1352 tp = col_tot, n = n_cols;
1358 /* Find the first variable that differs from the last subtable,
1359 then display the values of the dimensioning variables for
1360 each table that needs it. */
1362 int first_difference = nvar - 1;
1365 for (; ; first_difference--)
1367 assert (first_difference >= 2);
1368 if (memcmp (&cmp->values[first_difference],
1369 &(*tb)->values[first_difference],
1370 sizeof *cmp->values))
1376 display_dimensions (table, first_difference, *tb);
1378 display_dimensions (chisq, first_difference, *tb);
1380 display_dimensions (sym, first_difference, *tb);
1382 display_dimensions (risk, first_difference, *tb);
1384 display_dimensions (direct, first_difference, *tb);
1388 display_crosstabulation ();
1389 if (cmd.miss == CRS_REPORT)
1394 display_symmetric ();
1398 display_directional ();
1409 tab_resize (chisq, 4 + (nvar - 2), -1);
1420 /* Delete missing rows and columns for statistical analysis when
1423 delete_missing (void)
1428 for (r = 0; r < n_rows; r++)
1429 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1433 for (c = 0; c < n_cols; c++)
1434 mat[c + r * n_cols] = 0.;
1442 for (c = 0; c < n_cols; c++)
1443 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1447 for (r = 0; r < n_rows; r++)
1448 mat[c + r * n_cols] = 0.;
1454 /* Prepare table T for submission, and submit it. */
1456 submit (struct tab_table *t)
1463 tab_resize (t, -1, 0);
1464 if (tab_nr (t) == tab_t (t))
1469 tab_offset (t, 0, 0);
1471 for (i = 2; i < nvar; i++)
1472 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1473 var_to_string (x->vars[i]));
1474 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1475 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1477 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1479 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1480 tab_dim (t, crosstabs_dim);
1484 /* Sets the widths of all the columns and heights of all the rows in
1485 table T for driver D. */
1487 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1491 /* Width of a numerical column. */
1492 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1493 if (cmd.miss == CRS_REPORT)
1494 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1496 /* Set width for header columns. */
1502 w = d->width - c * (t->nc - t->l);
1503 for (i = 0; i <= t->nc; i++)
1507 if (w < d->prop_em_width * 8)
1508 w = d->prop_em_width * 8;
1510 if (w > d->prop_em_width * 15)
1511 w = d->prop_em_width * 15;
1513 for (i = 0; i < t->l; i++)
1517 for (i = t->l; i < t->nc; i++)
1520 for (i = 0; i < t->nr; i++)
1521 t->h[i] = tab_natural_height (t, d, i);
1524 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1525 int *cnt, int pivot);
1526 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1527 int *cnt, int pivot);
1529 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1531 static struct table_entry **
1532 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1534 return (mode == GENERAL
1535 ? find_pivot_extent_general (tp, cnt, pivot)
1536 : find_pivot_extent_integer (tp, cnt, pivot));
1539 /* Find the extent of a region in TP that contains one table. If
1540 PIVOT != 0 that means a set of table entries with identical table
1541 number; otherwise they also have to have the same values for every
1542 dimension after the row and column dimensions. The table that is
1543 searched starts at TP and has length CNT. Returns the first entry
1544 after the last one in the table; sets *CNT to the number of
1545 remaining values. If there are no entries in TP at all, returns
1546 NULL. A yucky interface, admittedly, but it works. */
1547 static struct table_entry **
1548 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1550 struct table_entry *fp = *tp;
1555 x = xtab[(*tp)->table];
1563 if ((*tp)->table != fp->table)
1568 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1575 /* Integer mode correspondent to find_pivot_extent_general(). This
1576 could be optimized somewhat, but I just don't give a crap about
1577 CROSSTABS performance in integer mode, which is just a
1578 CROSSTABS wart as far as I'm concerned.
1580 That said, feel free to send optimization patches to me. */
1581 static struct table_entry **
1582 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1584 struct table_entry *fp = *tp;
1589 x = xtab[(*tp)->table];
1597 if ((*tp)->table != fp->table)
1602 if (memcmp (&(*tp)->values[2], &fp->values[2],
1603 sizeof (union value) * (x->nvar - 2)))
1610 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1611 result. WIDTH_ points to an int which is either 0 for a
1612 numeric value or a string width for a string value. */
1614 compare_value (const void *a_, const void *b_, const void *width_)
1616 const union value *a = a_;
1617 const union value *b = b_;
1618 const int *pwidth = width_;
1619 const int width = *pwidth;
1622 return (a->f < b->f) ? -1 : (a->f > b->f);
1624 return strncmp (a->s, b->s, width);
1627 /* Given an array of ENTRY_CNT table_entry structures starting at
1628 ENTRIES, creates a sorted list of the values that the variable
1629 with index VAR_IDX takes on. The values are returned as a
1630 malloc()'darray stored in *VALUES, with the number of values
1631 stored in *VALUE_CNT.
1634 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1635 union value **values, int *value_cnt)
1637 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1639 if (mode == GENERAL)
1641 int width = var_get_width (v);
1644 *values = xnmalloc (entry_cnt, sizeof **values);
1645 for (i = 0; i < entry_cnt; i++)
1646 (*values)[i] = entries[i]->values[var_idx];
1647 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1648 compare_value, &width);
1652 struct var_range *vr = get_var_range (v);
1655 assert (mode == INTEGER);
1656 *values = xnmalloc (vr->count, sizeof **values);
1657 for (i = 0; i < vr->count; i++)
1658 (*values)[i].f = i + vr->min;
1659 *value_cnt = vr->count;
1663 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1664 from V, displayed with print format spec from variable VAR. When
1665 in REPORT missing-value mode, missing values have an M appended. */
1667 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1668 const union value *v, const struct variable *var)
1671 const struct fmt_spec *print = var_get_print_format (var);
1673 const char *label = var_lookup_value_label (var, v);
1676 tab_text (table, c, r, TAB_LEFT, label);
1680 s.string = tab_alloc (table, print->w);
1681 format_short (s.string, print, v);
1682 s.length = strlen (s.string);
1683 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1684 s.string[s.length++] = 'M';
1685 while (s.length && *s.string == ' ')
1690 tab_raw (table, c, r, opt, &s);
1693 /* Draws a line across TABLE at the current row to indicate the most
1694 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1695 that changed, and puts the values that changed into the table. TB
1696 and X must be the corresponding table_entry and crosstab,
1699 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1701 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1703 for (; first_difference >= 2; first_difference--)
1704 table_value_missing (table, nvar - first_difference - 1, 0,
1705 TAB_RIGHT, &tb->values[first_difference],
1706 x->vars[first_difference]);
1709 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1710 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1711 additionally suffixed with a letter `M'. */
1713 format_cell_entry (struct tab_table *table, int c, int r, double value,
1714 char suffix, bool mark_missing)
1716 const struct fmt_spec f = {FMT_F, 10, 1};
1721 s.string = tab_alloc (table, 16);
1723 data_out (&v, &f, s.string);
1724 while (*s.string == ' ')
1730 s.string[s.length++] = suffix;
1732 s.string[s.length++] = 'M';
1734 tab_raw (table, c, r, TAB_RIGHT, &s);
1737 /* Displays the crosstabulation table. */
1739 display_crosstabulation (void)
1744 for (r = 0; r < n_rows; r++)
1745 table_value_missing (table, nvar - 2, r * num_cells,
1746 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1748 tab_text (table, nvar - 2, n_rows * num_cells,
1749 TAB_LEFT, _("Total"));
1751 /* Put in the actual cells. */
1756 tab_offset (table, nvar - 1, -1);
1757 for (r = 0; r < n_rows; r++)
1760 tab_hline (table, TAL_1, -1, n_cols, 0);
1761 for (c = 0; c < n_cols; c++)
1763 bool mark_missing = false;
1764 double expected_value = row_tot[r] * col_tot[c] / W;
1765 if (cmd.miss == CRS_REPORT
1766 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1767 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1769 mark_missing = true;
1770 for (i = 0; i < num_cells; i++)
1781 v = *mp / row_tot[r] * 100.;
1785 v = *mp / col_tot[c] * 100.;
1792 case CRS_CL_EXPECTED:
1795 case CRS_CL_RESIDUAL:
1796 v = *mp - expected_value;
1798 case CRS_CL_SRESIDUAL:
1799 v = (*mp - expected_value) / sqrt (expected_value);
1801 case CRS_CL_ASRESIDUAL:
1802 v = ((*mp - expected_value)
1803 / sqrt (expected_value
1804 * (1. - row_tot[r] / W)
1805 * (1. - col_tot[c] / W)));
1811 format_cell_entry (table, c, i, v, suffix, mark_missing);
1817 tab_offset (table, -1, tab_row (table) + num_cells);
1825 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1826 for (r = 0; r < n_rows; r++)
1829 bool mark_missing = false;
1831 if (cmd.miss == CRS_REPORT
1832 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1833 mark_missing = true;
1835 for (i = 0; i < num_cells; i++)
1849 v = row_tot[r] / W * 100.;
1853 v = row_tot[r] / W * 100.;
1856 case CRS_CL_EXPECTED:
1857 case CRS_CL_RESIDUAL:
1858 case CRS_CL_SRESIDUAL:
1859 case CRS_CL_ASRESIDUAL:
1866 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1867 tab_next_row (table);
1872 /* Column totals, grand total. */
1878 tab_hline (table, TAL_1, -1, n_cols, 0);
1879 for (c = 0; c <= n_cols; c++)
1881 double ct = c < n_cols ? col_tot[c] : W;
1882 bool mark_missing = false;
1886 if (cmd.miss == CRS_REPORT && c < n_cols
1887 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1888 mark_missing = true;
1890 for (i = 0; i < num_cells; i++)
1912 case CRS_CL_EXPECTED:
1913 case CRS_CL_RESIDUAL:
1914 case CRS_CL_SRESIDUAL:
1915 case CRS_CL_ASRESIDUAL:
1921 format_cell_entry (table, c, i, v, suffix, mark_missing);
1926 tab_offset (table, -1, tab_row (table) + last_row);
1929 tab_offset (table, 0, -1);
1932 static void calc_r (double *X, double *Y, double *, double *, double *);
1933 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1935 /* Display chi-square statistics. */
1937 display_chisq (void)
1939 static const char *chisq_stats[N_CHISQ] =
1941 N_("Pearson Chi-Square"),
1942 N_("Likelihood Ratio"),
1943 N_("Fisher's Exact Test"),
1944 N_("Continuity Correction"),
1945 N_("Linear-by-Linear Association"),
1947 double chisq_v[N_CHISQ];
1948 double fisher1, fisher2;
1954 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1956 tab_offset (chisq, nvar - 2, -1);
1958 for (i = 0; i < N_CHISQ; i++)
1960 if ((i != 2 && chisq_v[i] == SYSMIS)
1961 || (i == 2 && fisher1 == SYSMIS))
1965 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1968 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1969 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1970 tab_float (chisq, 3, 0, TAB_RIGHT,
1971 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1976 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1977 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1979 tab_next_row (chisq);
1982 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1983 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1984 tab_next_row (chisq);
1986 tab_offset (chisq, 0, -1);
1989 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1990 double[N_SYMMETRIC]);
1992 /* Display symmetric measures. */
1994 display_symmetric (void)
1996 static const char *categories[] =
1998 N_("Nominal by Nominal"),
1999 N_("Ordinal by Ordinal"),
2000 N_("Interval by Interval"),
2001 N_("Measure of Agreement"),
2004 static const char *stats[N_SYMMETRIC] =
2008 N_("Contingency Coefficient"),
2009 N_("Kendall's tau-b"),
2010 N_("Kendall's tau-c"),
2012 N_("Spearman Correlation"),
2017 static const int stats_categories[N_SYMMETRIC] =
2019 0, 0, 0, 1, 1, 1, 1, 2, 3,
2023 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2026 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2029 tab_offset (sym, nvar - 2, -1);
2031 for (i = 0; i < N_SYMMETRIC; i++)
2033 if (sym_v[i] == SYSMIS)
2036 if (stats_categories[i] != last_cat)
2038 last_cat = stats_categories[i];
2039 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2042 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2043 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2044 if (sym_ase[i] != SYSMIS)
2045 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2046 if (sym_t[i] != SYSMIS)
2047 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2048 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2052 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2053 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2056 tab_offset (sym, 0, -1);
2059 static int calc_risk (double[], double[], double[], union value *);
2061 /* Display risk estimate. */
2066 double risk_v[3], lower[3], upper[3];
2070 if (!calc_risk (risk_v, upper, lower, c))
2073 tab_offset (risk, nvar - 2, -1);
2075 for (i = 0; i < 3; i++)
2077 if (risk_v[i] == SYSMIS)
2083 if (var_is_numeric (x->vars[COL_VAR]))
2084 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2085 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2087 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2088 var_get_name (x->vars[COL_VAR]),
2089 var_get_width (x->vars[COL_VAR]), c[0].s,
2090 var_get_width (x->vars[COL_VAR]), c[1].s);
2094 if (var_is_numeric (x->vars[ROW_VAR]))
2095 sprintf (buf, _("For cohort %s = %g"),
2096 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2098 sprintf (buf, _("For cohort %s = %.*s"),
2099 var_get_name (x->vars[ROW_VAR]),
2100 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2104 tab_text (risk, 0, 0, TAB_LEFT, buf);
2105 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2106 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2107 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2108 tab_next_row (risk);
2111 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2112 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2113 tab_next_row (risk);
2115 tab_offset (risk, 0, -1);
2118 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2119 double[N_DIRECTIONAL]);
2121 /* Display directional measures. */
2123 display_directional (void)
2125 static const char *categories[] =
2127 N_("Nominal by Nominal"),
2128 N_("Ordinal by Ordinal"),
2129 N_("Nominal by Interval"),
2132 static const char *stats[] =
2135 N_("Goodman and Kruskal tau"),
2136 N_("Uncertainty Coefficient"),
2141 static const char *types[] =
2148 static const int stats_categories[N_DIRECTIONAL] =
2150 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2153 static const int stats_stats[N_DIRECTIONAL] =
2155 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2158 static const int stats_types[N_DIRECTIONAL] =
2160 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2163 static const int *stats_lookup[] =
2170 static const char **stats_names[] =
2182 double direct_v[N_DIRECTIONAL];
2183 double direct_ase[N_DIRECTIONAL];
2184 double direct_t[N_DIRECTIONAL];
2188 if (!calc_directional (direct_v, direct_ase, direct_t))
2191 tab_offset (direct, nvar - 2, -1);
2193 for (i = 0; i < N_DIRECTIONAL; i++)
2195 if (direct_v[i] == SYSMIS)
2201 for (j = 0; j < 3; j++)
2202 if (last[j] != stats_lookup[j][i])
2205 tab_hline (direct, TAL_1, j, 6, 0);
2210 int k = last[j] = stats_lookup[j][i];
2215 string = var_get_name (x->vars[0]);
2217 string = var_get_name (x->vars[1]);
2219 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2220 gettext (stats_names[j][k]), string);
2225 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2226 if (direct_ase[i] != SYSMIS)
2227 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2228 if (direct_t[i] != SYSMIS)
2229 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2230 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2231 tab_next_row (direct);
2234 tab_offset (direct, 0, -1);
2237 /* Statistical calculations. */
2239 /* Returns the value of the gamma (factorial) function for an integer
2242 gamma_int (double x)
2247 for (i = 2; i < x; i++)
2252 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2254 static inline double
2255 Pr (int a, int b, int c, int d)
2257 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2258 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2259 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2260 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2261 / gamma_int (a + b + c + d + 1.));
2264 /* Swap the contents of A and B. */
2266 swap (int *a, int *b)
2273 /* Calculate significance for Fisher's exact test as specified in
2274 _SPSS Statistical Algorithms_, Appendix 5. */
2276 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2280 if (MIN (c, d) < MIN (a, b))
2281 swap (&a, &c), swap (&b, &d);
2282 if (MIN (b, d) < MIN (a, c))
2283 swap (&a, &b), swap (&c, &d);
2287 swap (&a, &b), swap (&c, &d);
2289 swap (&a, &c), swap (&b, &d);
2293 for (x = 0; x <= a; x++)
2294 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2296 *fisher2 = *fisher1;
2297 for (x = 1; x <= b; x++)
2298 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2301 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2302 columns with values COLS and N_ROWS rows with values ROWS. Values
2303 in the matrix sum to W. */
2305 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2306 double *fisher1, double *fisher2)
2310 chisq[0] = chisq[1] = 0.;
2311 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2312 *fisher1 = *fisher2 = SYSMIS;
2314 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2316 if (ns_rows <= 1 || ns_cols <= 1)
2318 chisq[0] = chisq[1] = SYSMIS;
2322 for (r = 0; r < n_rows; r++)
2323 for (c = 0; c < n_cols; c++)
2325 const double expected = row_tot[r] * col_tot[c] / W;
2326 const double freq = mat[n_cols * r + c];
2327 const double residual = freq - expected;
2329 chisq[0] += residual * residual / expected;
2331 chisq[1] += freq * log (expected / freq);
2342 /* Calculate Yates and Fisher exact test. */
2343 if (ns_cols == 2 && ns_rows == 2)
2345 double f11, f12, f21, f22;
2351 for (i = j = 0; i < n_cols; i++)
2352 if (col_tot[i] != 0.)
2361 f11 = mat[nz_cols[0]];
2362 f12 = mat[nz_cols[1]];
2363 f21 = mat[nz_cols[0] + n_cols];
2364 f22 = mat[nz_cols[1] + n_cols];
2369 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2372 chisq[3] = (W * x * x
2373 / (f11 + f12) / (f21 + f22)
2374 / (f11 + f21) / (f12 + f22));
2382 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2383 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2386 /* Calculate Mantel-Haenszel. */
2387 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2389 double r, ase_0, ase_1;
2390 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2392 chisq[4] = (W - 1.) * r * r;
2397 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2398 ASE_1, and ase_0 into ASE_0. The row and column values must be
2399 passed in X and Y. */
2401 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2403 double SX, SY, S, T;
2405 double sum_XYf, sum_X2Y2f;
2406 double sum_Xr, sum_X2r;
2407 double sum_Yc, sum_Y2c;
2410 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2411 for (j = 0; j < n_cols; j++)
2413 double fij = mat[j + i * n_cols];
2414 double product = X[i] * Y[j];
2415 double temp = fij * product;
2417 sum_X2Y2f += temp * product;
2420 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2422 sum_Xr += X[i] * row_tot[i];
2423 sum_X2r += X[i] * X[i] * row_tot[i];
2427 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2429 sum_Yc += Y[i] * col_tot[i];
2430 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2434 S = sum_XYf - sum_Xr * sum_Yc / W;
2435 SX = sum_X2r - sum_Xr * sum_Xr / W;
2436 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2439 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2444 for (s = c = 0., i = 0; i < n_rows; i++)
2445 for (j = 0; j < n_cols; j++)
2447 double Xresid, Yresid;
2450 Xresid = X[i] - Xbar;
2451 Yresid = Y[j] - Ybar;
2452 temp = (T * Xresid * Yresid
2454 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2455 y = mat[j + i * n_cols] * temp * temp - c;
2460 *ase_1 = sqrt (s) / (T * T);
2464 static double somers_d_v[3];
2465 static double somers_d_ase[3];
2466 static double somers_d_t[3];
2468 /* Calculate symmetric statistics and their asymptotic standard
2469 errors. Returns 0 if none could be calculated. */
2471 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2472 double t[N_SYMMETRIC])
2474 int q = MIN (ns_rows, ns_cols);
2483 for (i = 0; i < N_SYMMETRIC; i++)
2484 v[i] = ase[i] = t[i] = SYSMIS;
2487 /* Phi, Cramer's V, contingency coefficient. */
2488 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2490 double Xp = 0.; /* Pearson chi-square. */
2495 for (r = 0; r < n_rows; r++)
2496 for (c = 0; c < n_cols; c++)
2498 const double expected = row_tot[r] * col_tot[c] / W;
2499 const double freq = mat[n_cols * r + c];
2500 const double residual = freq - expected;
2502 Xp += residual * residual / expected;
2506 if (cmd.a_statistics[CRS_ST_PHI])
2508 v[0] = sqrt (Xp / W);
2509 v[1] = sqrt (Xp / (W * (q - 1)));
2511 if (cmd.a_statistics[CRS_ST_CC])
2512 v[2] = sqrt (Xp / (Xp + W));
2515 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2516 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2521 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2528 for (r = 0; r < n_rows; r++)
2529 Dr -= row_tot[r] * row_tot[r];
2530 for (c = 0; c < n_cols; c++)
2531 Dc -= col_tot[c] * col_tot[c];
2537 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2538 for (c = 0; c < n_cols; c++)
2542 for (r = 0; r < n_rows; r++)
2543 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2553 for (i = 0; i < n_rows; i++)
2557 for (j = 1; j < n_cols; j++)
2558 Cij += col_tot[j] - cum[j + i * n_cols];
2561 for (j = 1; j < n_cols; j++)
2562 Dij += cum[j + (i - 1) * n_cols];
2566 double fij = mat[j + i * n_cols];
2572 assert (j < n_cols);
2574 Cij -= col_tot[j] - cum[j + i * n_cols];
2575 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2579 Cij += cum[j - 1 + (i - 1) * n_cols];
2580 Dij -= cum[j + (i - 1) * n_cols];
2586 if (cmd.a_statistics[CRS_ST_BTAU])
2587 v[3] = (P - Q) / sqrt (Dr * Dc);
2588 if (cmd.a_statistics[CRS_ST_CTAU])
2589 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2590 if (cmd.a_statistics[CRS_ST_GAMMA])
2591 v[5] = (P - Q) / (P + Q);
2593 /* ASE for tau-b, tau-c, gamma. Calculations could be
2594 eliminated here, at expense of memory. */
2599 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2600 for (i = 0; i < n_rows; i++)
2604 for (j = 1; j < n_cols; j++)
2605 Cij += col_tot[j] - cum[j + i * n_cols];
2608 for (j = 1; j < n_cols; j++)
2609 Dij += cum[j + (i - 1) * n_cols];
2613 double fij = mat[j + i * n_cols];
2615 if (cmd.a_statistics[CRS_ST_BTAU])
2617 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2618 + v[3] * (row_tot[i] * Dc
2619 + col_tot[j] * Dr));
2620 btau_cum += fij * temp * temp;
2624 const double temp = Cij - Dij;
2625 ctau_cum += fij * temp * temp;
2628 if (cmd.a_statistics[CRS_ST_GAMMA])
2630 const double temp = Q * Cij - P * Dij;
2631 gamma_cum += fij * temp * temp;
2634 if (cmd.a_statistics[CRS_ST_D])
2636 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2637 - (P - Q) * (W - row_tot[i]));
2638 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2639 - (Q - P) * (W - col_tot[j]));
2644 assert (j < n_cols);
2646 Cij -= col_tot[j] - cum[j + i * n_cols];
2647 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2651 Cij += cum[j - 1 + (i - 1) * n_cols];
2652 Dij -= cum[j + (i - 1) * n_cols];
2658 btau_var = ((btau_cum
2659 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2661 if (cmd.a_statistics[CRS_ST_BTAU])
2663 ase[3] = sqrt (btau_var);
2664 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2667 if (cmd.a_statistics[CRS_ST_CTAU])
2669 ase[4] = ((2 * q / ((q - 1) * W * W))
2670 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2671 t[4] = v[4] / ase[4];
2673 if (cmd.a_statistics[CRS_ST_GAMMA])
2675 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2676 t[5] = v[5] / (2. / (P + Q)
2677 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2679 if (cmd.a_statistics[CRS_ST_D])
2681 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2682 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2683 somers_d_t[0] = (somers_d_v[0]
2685 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2686 somers_d_v[1] = (P - Q) / Dc;
2687 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2688 somers_d_t[1] = (somers_d_v[1]
2690 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2691 somers_d_v[2] = (P - Q) / Dr;
2692 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2693 somers_d_t[2] = (somers_d_v[2]
2695 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2701 /* Spearman correlation, Pearson's r. */
2702 if (cmd.a_statistics[CRS_ST_CORR])
2704 double *R = local_alloc (sizeof *R * n_rows);
2705 double *C = local_alloc (sizeof *C * n_cols);
2708 double y, t, c = 0., s = 0.;
2713 R[i] = s + (row_tot[i] + 1.) / 2.;
2720 assert (i < n_rows);
2725 double y, t, c = 0., s = 0.;
2730 C[j] = s + (col_tot[j] + 1.) / 2;
2737 assert (j < n_cols);
2741 calc_r (R, C, &v[6], &t[6], &ase[6]);
2747 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2751 /* Cohen's kappa. */
2752 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2754 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2757 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2758 i < ns_rows; i++, j++)
2762 while (col_tot[j] == 0.)
2765 prod = row_tot[i] * col_tot[j];
2766 sum = row_tot[i] + col_tot[j];
2768 sum_fii += mat[j + i * n_cols];
2770 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2771 sum_riciri_ci += prod * sum;
2773 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2774 for (j = 0; j < ns_cols; j++)
2776 double sum = row_tot[i] + col_tot[j];
2777 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2780 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2782 ase[8] = sqrt ((W * W * sum_rici
2783 + sum_rici * sum_rici
2784 - W * sum_riciri_ci)
2785 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2787 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2788 / pow2 (W * W - sum_rici))
2789 + ((2. * (W - sum_fii)
2790 * (2. * sum_fii * sum_rici
2791 - W * sum_fiiri_ci))
2792 / cube (W * W - sum_rici))
2793 + (pow2 (W - sum_fii)
2794 * (W * sum_fijri_ci2 - 4.
2795 * sum_rici * sum_rici)
2796 / pow4 (W * W - sum_rici))));
2798 t[8] = v[8] / ase[8];
2805 /* Calculate risk estimate. */
2807 calc_risk (double *value, double *upper, double *lower, union value *c)
2809 double f11, f12, f21, f22;
2815 for (i = 0; i < 3; i++)
2816 value[i] = upper[i] = lower[i] = SYSMIS;
2819 if (ns_rows != 2 || ns_cols != 2)
2826 for (i = j = 0; i < n_cols; i++)
2827 if (col_tot[i] != 0.)
2836 f11 = mat[nz_cols[0]];
2837 f12 = mat[nz_cols[1]];
2838 f21 = mat[nz_cols[0] + n_cols];
2839 f22 = mat[nz_cols[1] + n_cols];
2841 c[0] = cols[nz_cols[0]];
2842 c[1] = cols[nz_cols[1]];
2845 value[0] = (f11 * f22) / (f12 * f21);
2846 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2847 lower[0] = value[0] * exp (-1.960 * v);
2848 upper[0] = value[0] * exp (1.960 * v);
2850 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2851 v = sqrt ((f12 / (f11 * (f11 + f12)))
2852 + (f22 / (f21 * (f21 + f22))));
2853 lower[1] = value[1] * exp (-1.960 * v);
2854 upper[1] = value[1] * exp (1.960 * v);
2856 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2857 v = sqrt ((f11 / (f12 * (f11 + f12)))
2858 + (f21 / (f22 * (f21 + f22))));
2859 lower[2] = value[2] * exp (-1.960 * v);
2860 upper[2] = value[2] * exp (1.960 * v);
2865 /* Calculate directional measures. */
2867 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2868 double t[N_DIRECTIONAL])
2873 for (i = 0; i < N_DIRECTIONAL; i++)
2874 v[i] = ase[i] = t[i] = SYSMIS;
2878 if (cmd.a_statistics[CRS_ST_LAMBDA])
2880 double *fim = xnmalloc (n_rows, sizeof *fim);
2881 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2882 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2883 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2884 double sum_fim, sum_fmj;
2886 int rm_index, cm_index;
2889 /* Find maximum for each row and their sum. */
2890 for (sum_fim = 0., i = 0; i < n_rows; i++)
2892 double max = mat[i * n_cols];
2895 for (j = 1; j < n_cols; j++)
2896 if (mat[j + i * n_cols] > max)
2898 max = mat[j + i * n_cols];
2902 sum_fim += fim[i] = max;
2903 fim_index[i] = index;
2906 /* Find maximum for each column. */
2907 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2909 double max = mat[j];
2912 for (i = 1; i < n_rows; i++)
2913 if (mat[j + i * n_cols] > max)
2915 max = mat[j + i * n_cols];
2919 sum_fmj += fmj[j] = max;
2920 fmj_index[j] = index;
2923 /* Find maximum row total. */
2926 for (i = 1; i < n_rows; i++)
2927 if (row_tot[i] > rm)
2933 /* Find maximum column total. */
2936 for (j = 1; j < n_cols; j++)
2937 if (col_tot[j] > cm)
2943 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2944 v[1] = (sum_fmj - rm) / (W - rm);
2945 v[2] = (sum_fim - cm) / (W - cm);
2947 /* ASE1 for Y given X. */
2951 for (accum = 0., i = 0; i < n_rows; i++)
2952 for (j = 0; j < n_cols; j++)
2954 const int deltaj = j == cm_index;
2955 accum += (mat[j + i * n_cols]
2956 * pow2 ((j == fim_index[i])
2961 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2964 /* ASE0 for Y given X. */
2968 for (accum = 0., i = 0; i < n_rows; i++)
2969 if (cm_index != fim_index[i])
2970 accum += (mat[i * n_cols + fim_index[i]]
2971 + mat[i * n_cols + cm_index]);
2972 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2975 /* ASE1 for X given Y. */
2979 for (accum = 0., i = 0; i < n_rows; i++)
2980 for (j = 0; j < n_cols; j++)
2982 const int deltaj = i == rm_index;
2983 accum += (mat[j + i * n_cols]
2984 * pow2 ((i == fmj_index[j])
2989 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2992 /* ASE0 for X given Y. */
2996 for (accum = 0., j = 0; j < n_cols; j++)
2997 if (rm_index != fmj_index[j])
2998 accum += (mat[j + n_cols * fmj_index[j]]
2999 + mat[j + n_cols * rm_index]);
3000 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3003 /* Symmetric ASE0 and ASE1. */
3008 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3009 for (j = 0; j < n_cols; j++)
3011 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3012 int temp1 = (i == rm_index) + (j == cm_index);
3013 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3014 accum1 += (mat[j + i * n_cols]
3015 * pow2 (temp0 + (v[0] - 1.) * temp1));
3017 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3018 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3019 / (2. * W - rm - cm));
3028 double sum_fij2_ri, sum_fij2_ci;
3029 double sum_ri2, sum_cj2;
3031 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3032 for (j = 0; j < n_cols; j++)
3034 double temp = pow2 (mat[j + i * n_cols]);
3035 sum_fij2_ri += temp / row_tot[i];
3036 sum_fij2_ci += temp / col_tot[j];
3039 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3040 sum_ri2 += row_tot[i] * row_tot[i];
3042 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3043 sum_cj2 += col_tot[j] * col_tot[j];
3045 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3046 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3050 if (cmd.a_statistics[CRS_ST_UC])
3052 double UX, UY, UXY, P;
3053 double ase1_yx, ase1_xy, ase1_sym;
3056 for (UX = 0., i = 0; i < n_rows; i++)
3057 if (row_tot[i] > 0.)
3058 UX -= row_tot[i] / W * log (row_tot[i] / W);
3060 for (UY = 0., j = 0; j < n_cols; j++)
3061 if (col_tot[j] > 0.)
3062 UY -= col_tot[j] / W * log (col_tot[j] / W);
3064 for (UXY = P = 0., i = 0; i < n_rows; i++)
3065 for (j = 0; j < n_cols; j++)
3067 double entry = mat[j + i * n_cols];
3072 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3073 UXY -= entry / W * log (entry / W);
3076 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3077 for (j = 0; j < n_cols; j++)
3079 double entry = mat[j + i * n_cols];
3084 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3085 + (UX - UXY) * log (col_tot[j] / W));
3086 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3087 + (UY - UXY) * log (row_tot[i] / W));
3088 ase1_sym += entry * pow2 ((UXY
3089 * log (row_tot[i] * col_tot[j] / (W * W)))
3090 - (UX + UY) * log (entry / W));
3093 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3094 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3095 t[5] = v[5] / ((2. / (W * (UX + UY)))
3096 * sqrt (P - pow2 (UX + UY - UXY) / W));
3098 v[6] = (UX + UY - UXY) / UX;
3099 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3100 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3102 v[7] = (UX + UY - UXY) / UY;
3103 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3104 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3108 if (cmd.a_statistics[CRS_ST_D])
3113 calc_symmetric (NULL, NULL, NULL);
3114 for (i = 0; i < 3; i++)
3116 v[8 + i] = somers_d_v[i];
3117 ase[8 + i] = somers_d_ase[i];
3118 t[8 + i] = somers_d_t[i];
3123 if (cmd.a_statistics[CRS_ST_ETA])
3126 double sum_Xr, sum_X2r;
3130 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3132 sum_Xr += rows[i].f * row_tot[i];
3133 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3135 SX = sum_X2r - sum_Xr * sum_Xr / W;
3137 for (SXW = 0., j = 0; j < n_cols; j++)
3141 for (cum = 0., i = 0; i < n_rows; i++)
3143 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3144 cum += rows[i].f * mat[j + i * n_cols];
3147 SXW -= cum * cum / col_tot[j];
3149 v[11] = sqrt (1. - SXW / SX);
3153 double sum_Yc, sum_Y2c;
3157 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3159 sum_Yc += cols[i].f * col_tot[i];
3160 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3162 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3164 for (SYW = 0., i = 0; i < n_rows; i++)
3168 for (cum = 0., j = 0; j < n_cols; j++)
3170 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3171 cum += cols[j].f * mat[j + i * n_cols];
3174 SYW -= cum * cum / row_tot[i];
3176 v[12] = sqrt (1. - SYW / SY);
3183 /* A wrapper around data_out() that limits string output to short
3184 string width and null terminates the result. */
3186 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3188 struct fmt_spec fmt_subst;
3190 /* Limit to short string width. */
3191 if (fmt_is_string (fp->type))
3195 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3196 if (fmt_subst.type == FMT_A)
3197 fmt_subst.w = MIN (8, fmt_subst.w);
3199 fmt_subst.w = MIN (16, fmt_subst.w);
3205 data_out (v, fp, s);
3207 /* Null terminate. */