1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/data-out.h>
41 #include <data/dictionary.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
64 #define _(msgid) gettext (msgid)
65 #define N_(msgid) msgid
73 missing=miss:!table/include/report;
74 +write[wr_]=none,cells,all;
75 +format=fmt:!labels/nolabels/novallabs,
78 tabl:!tables/notables,
81 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
83 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
89 /* Number of chi-square statistics. */
92 /* Number of symmetric statistics. */
95 /* Number of directional statistics. */
96 #define N_DIRECTIONAL 13
98 /* A single table entry for general mode. */
101 int table; /* Flattened table number. */
104 double freq; /* Frequency count. */
105 double *data; /* Crosstabulation table for integer mode. */
108 union value values[1]; /* Values. */
111 /* A crosstabulation. */
114 int nvar; /* Number of variables. */
115 double missing; /* Missing cases count. */
116 int ofs; /* Integer mode: Offset into sorted_tab[]. */
117 struct variable *vars[2]; /* At least two variables; sorted by
118 larger indices first. */
121 /* Integer mode variable info. */
124 int min; /* Minimum value. */
125 int max; /* Maximum value + 1. */
126 int count; /* max - min. */
129 static inline struct var_range *
130 get_var_range (struct variable *v)
133 assert (v->aux != NULL);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct dataset *ds);
180 static void precalc (const struct ccase *, void *, const struct dataset *);
181 static bool calc_general (const struct ccase *, void *, const struct dataset *);
182 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
183 static bool postcalc (void *, const struct dataset *);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct dataset *ds)
193 int result = internal_cmd_crosstabs (ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct dataset *ds)
213 pl_tc = pool_create ();
214 pl_col = pool_create ();
216 if (!parse_crosstabs (ds, &cmd, NULL))
219 mode = variables ? INTEGER : GENERAL;
224 cmd.a_cells[CRS_CL_COUNT] = 1;
230 for (i = 0; i < CRS_CL_count; i++)
235 cmd.a_cells[CRS_CL_COUNT] = 1;
236 cmd.a_cells[CRS_CL_ROW] = 1;
237 cmd.a_cells[CRS_CL_COLUMN] = 1;
238 cmd.a_cells[CRS_CL_TOTAL] = 1;
240 if (cmd.a_cells[CRS_CL_ALL])
242 for (i = 0; i < CRS_CL_count; i++)
244 cmd.a_cells[CRS_CL_ALL] = 0;
246 cmd.a_cells[CRS_CL_NONE] = 0;
248 for (num_cells = i = 0; i < CRS_CL_count; i++)
250 cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
297 ok = procedure_with_splits (ds, precalc,
298 mode == GENERAL ? calc_general : calc_integer,
301 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
304 /* Parses the TABLES subcommand. */
306 crs_custom_tables (struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
308 struct var_set *var_set;
310 struct variable ***by = NULL;
311 size_t *by_nvar = NULL;
315 /* Ensure that this is a TABLES subcommand. */
316 if (!lex_match_id ("TABLES")
317 && (token != T_ID || dict_lookup_var (dataset_dict (ds), tokid) == NULL)
322 if (variables != NULL)
323 var_set = var_set_create_from_array (variables, variables_cnt);
325 var_set = var_set_create_from_dict (dataset_dict (ds));
326 assert (var_set != NULL);
330 by = xnrealloc (by, n_by + 1, sizeof *by);
331 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
332 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
333 PV_NO_DUPLICATE | PV_NO_SCRATCH))
335 if (xalloc_oversized (nx, by_nvar[n_by]))
337 msg (SE, _("Too many crosstabulation variables or dimensions."));
343 if (!lex_match (T_BY))
347 lex_error (_("expecting BY"));
356 int *by_iter = xcalloc (n_by, sizeof *by_iter);
359 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
360 for (i = 0; i < nx; i++)
364 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
371 for (i = 0; i < n_by; i++)
372 x->vars[i] = by[i][by_iter[i]];
378 for (i = n_by - 1; i >= 0; i--)
380 if (++by_iter[i] < by_nvar[i])
393 /* All return paths lead here. */
397 for (i = 0; i < n_by; i++)
403 var_set_destroy (var_set);
408 /* Parses the VARIABLES subcommand. */
410 crs_custom_variables (struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
414 msg (SE, _("VARIABLES must be specified before TABLES."));
422 size_t orig_nv = variables_cnt;
427 if (!parse_variables (dataset_dict (ds), &variables, &variables_cnt,
428 (PV_APPEND | PV_NUMERIC
429 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
434 lex_error ("expecting `('");
439 if (!lex_force_int ())
441 min = lex_integer ();
446 if (!lex_force_int ())
448 max = lex_integer ();
451 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
459 lex_error ("expecting `)'");
464 for (i = orig_nv; i < variables_cnt; i++)
466 struct var_range *vr = xmalloc (sizeof *vr);
469 vr->count = max - min + 1;
470 var_attach_aux (variables[i], vr, var_dtor_free);
485 /* Data file processing. */
487 static int compare_table_entry (const void *, const void *, const void *);
488 static unsigned hash_table_entry (const void *, const void *);
490 /* Set up the crosstabulation tables for processing. */
492 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
494 output_split_file_values (ds, first);
497 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
507 for (i = 0; i < nxtab; i++)
509 struct crosstab *x = xtab[i];
514 x->ofs = n_sorted_tab;
516 for (j = 2; j < x->nvar; j++)
517 count *= get_var_range (x->vars[j - 2])->count;
519 sorted_tab = xnrealloc (sorted_tab,
520 n_sorted_tab + count, sizeof *sorted_tab);
521 v = local_alloc (sizeof *v * x->nvar);
522 for (j = 2; j < x->nvar; j++)
523 v[j] = get_var_range (x->vars[j])->min;
524 for (j = 0; j < count; j++)
526 struct table_entry *te;
529 te = sorted_tab[n_sorted_tab++]
530 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
534 int row_cnt = get_var_range (x->vars[0])->count;
535 int col_cnt = get_var_range (x->vars[1])->count;
536 const int mat_size = row_cnt * col_cnt;
539 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
540 for (m = 0; m < mat_size; m++)
544 for (k = 2; k < x->nvar; k++)
545 te->values[k].f = v[k];
546 for (k = 2; k < x->nvar; k++)
548 struct var_range *vr = get_var_range (x->vars[k]);
549 if (++v[k] >= vr->max)
558 sorted_tab = xnrealloc (sorted_tab,
559 n_sorted_tab + 1, sizeof *sorted_tab);
560 sorted_tab[n_sorted_tab] = NULL;
565 /* Form crosstabulations for general mode. */
567 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
569 bool bad_warn = true;
572 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
574 /* Flattened current table index. */
577 for (t = 0; t < nxtab; t++)
579 struct crosstab *x = xtab[t];
580 const size_t entry_size = (sizeof (struct table_entry)
581 + sizeof (union value) * (x->nvar - 1));
582 struct table_entry *te = local_alloc (entry_size);
584 /* Construct table entry for the current record and table. */
590 for (j = 0; j < x->nvar; j++)
592 const union value *v = case_data (c, x->vars[j]->fv);
593 const struct missing_values *mv = &x->vars[j]->miss;
594 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
595 || (cmd.miss == CRS_INCLUDE
596 && mv_is_value_system_missing (mv, v)))
598 x->missing += weight;
602 if (x->vars[j]->type == NUMERIC)
603 te->values[j].f = case_num (c, x->vars[j]->fv);
606 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
609 /* Necessary in order to simplify comparisons. */
610 memset (&te->values[j].s[x->vars[j]->width], 0,
611 sizeof (union value) - x->vars[j]->width);
616 /* Add record to hash table. */
618 struct table_entry **tepp
619 = (struct table_entry **) hsh_probe (gen_tab, te);
622 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
625 memcpy (tep, te, entry_size);
630 (*tepp)->u.freq += weight;
641 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
643 bool bad_warn = true;
646 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
648 /* Flattened current table index. */
651 for (t = 0; t < nxtab; t++)
653 struct crosstab *x = xtab[t];
658 for (i = 0; i < x->nvar; i++)
660 struct variable *const v = x->vars[i];
661 struct var_range *vr = get_var_range (v);
662 double value = case_num (c, v->fv);
664 /* Note that the first test also rules out SYSMIS. */
665 if ((value < vr->min || value >= vr->max)
666 || (cmd.miss == CRS_TABLE
667 && mv_is_num_user_missing (&v->miss, value)))
669 x->missing += weight;
675 ofs += fact * ((int) value - vr->min);
681 struct variable *row_var = x->vars[ROW_VAR];
682 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
684 struct variable *col_var = x->vars[COL_VAR];
685 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
687 const int col_dim = get_var_range (col_var)->count;
689 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
698 /* Compare the table_entry's at A and B and return a strcmp()-type
701 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
703 const struct table_entry *a = a_;
704 const struct table_entry *b = b_;
706 if (a->table > b->table)
708 else if (a->table < b->table)
712 const struct crosstab *x = xtab[a->table];
715 for (i = x->nvar - 1; i >= 0; i--)
716 if (x->vars[i]->type == NUMERIC)
718 const double diffnum = a->values[i].f - b->values[i].f;
721 else if (diffnum > 0)
726 assert (x->vars[i]->type == ALPHA);
728 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
739 /* Calculate a hash value from table_entry A. */
741 hash_table_entry (const void *a_, const void *aux UNUSED)
743 const struct table_entry *a = a_;
748 for (i = 0; i < xtab[a->table]->nvar; i++)
749 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
754 /* Post-data reading calculations. */
756 static struct table_entry **find_pivot_extent (struct table_entry **,
757 int *cnt, int pivot);
758 static void enum_var_values (struct table_entry **entries, int entry_cnt,
760 union value **values, int *value_cnt);
761 static void output_pivot_table (struct table_entry **, struct table_entry **,
762 double **, double **, double **,
763 int *, int *, int *);
764 static void make_summary_table (void);
767 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
771 n_sorted_tab = hsh_count (gen_tab);
772 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
775 make_summary_table ();
777 /* Identify all the individual crosstabulation tables, and deal with
780 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
781 int pc = n_sorted_tab; /* Pivot count. */
783 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
784 int maxrows = 0, maxcols = 0, maxcells = 0;
788 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
792 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
793 &maxrows, &maxcols, &maxcells);
802 hsh_destroy (gen_tab);
807 static void insert_summary (struct tab_table *, int tab_index, double valid);
809 /* Output a table summarizing the cases processed. */
811 make_summary_table (void)
813 struct tab_table *summary;
815 struct table_entry **pb = sorted_tab, **pe;
816 int pc = n_sorted_tab;
819 summary = tab_create (7, 3 + nxtab, 1);
820 tab_title (summary, _("Summary."));
821 tab_headers (summary, 1, 0, 3, 0);
822 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
823 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
824 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
825 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
826 tab_hline (summary, TAL_1, 1, 6, 1);
827 tab_hline (summary, TAL_1, 1, 6, 2);
828 tab_vline (summary, TAL_1, 3, 1, 1);
829 tab_vline (summary, TAL_1, 5, 1, 1);
833 for (i = 0; i < 3; i++)
835 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
836 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
839 tab_offset (summary, 0, 3);
845 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
849 while (cur_tab < (*pb)->table)
850 insert_summary (summary, cur_tab++, 0.);
853 for (valid = 0.; pb < pe; pb++)
854 valid += (*pb)->u.freq;
857 const struct crosstab *const x = xtab[(*pb)->table];
858 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
859 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
860 const int count = n_cols * n_rows;
862 for (valid = 0.; pb < pe; pb++)
864 const double *data = (*pb)->u.data;
867 for (i = 0; i < count; i++)
871 insert_summary (summary, cur_tab++, valid);
876 while (cur_tab < nxtab)
877 insert_summary (summary, cur_tab++, 0.);
882 /* Inserts a line into T describing the crosstabulation at index
883 TAB_INDEX, which has VALID valid observations. */
885 insert_summary (struct tab_table *t, int tab_index, double valid)
887 struct crosstab *x = xtab[tab_index];
889 tab_hline (t, TAL_1, 0, 6, 0);
891 /* Crosstabulation name. */
893 char *buf = local_alloc (128 * x->nvar);
897 for (i = 0; i < x->nvar; i++)
900 cp = stpcpy (cp, " * ");
903 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
905 tab_text (t, 0, 0, TAB_LEFT, buf);
910 /* Counts and percentages. */
920 for (i = 0; i < 3; i++)
922 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
923 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
934 static struct tab_table *table; /* Crosstabulation table. */
935 static struct tab_table *chisq; /* Chi-square table. */
936 static struct tab_table *sym; /* Symmetric measures table. */
937 static struct tab_table *risk; /* Risk estimate table. */
938 static struct tab_table *direct; /* Directional measures table. */
941 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
943 /* Column values, number of columns. */
944 static union value *cols;
947 /* Row values, number of rows. */
948 static union value *rows;
951 /* Number of statistically interesting columns/rows (columns/rows with
953 static int ns_cols, ns_rows;
955 /* Crosstabulation. */
956 static const struct crosstab *x;
958 /* Number of variables from the crosstabulation to consider. This is
959 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
962 /* Matrix contents. */
963 static double *mat; /* Matrix proper. */
964 static double *row_tot; /* Row totals. */
965 static double *col_tot; /* Column totals. */
966 static double W; /* Grand total. */
968 static void display_dimensions (struct tab_table *, int first_difference,
969 struct table_entry *);
970 static void display_crosstabulation (void);
971 static void display_chisq (void);
972 static void display_symmetric (void);
973 static void display_risk (void);
974 static void display_directional (void);
975 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
976 static void table_value_missing (struct tab_table *table, int c, int r,
977 unsigned char opt, const union value *v,
978 const struct variable *var);
979 static void delete_missing (void);
981 /* Output pivot table beginning at PB and continuing until PE,
982 exclusive. For efficiency, *MATP is a pointer to a matrix that can
983 hold *MAXROWS entries. */
985 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
986 double **matp, double **row_totp, double **col_totp,
987 int *maxrows, int *maxcols, int *maxcells)
990 struct table_entry **tb = pb, **te; /* Table begin, table end. */
991 int tc = pe - pb; /* Table count. */
993 /* Table entry for header comparison. */
994 struct table_entry *cmp = NULL;
996 x = xtab[(*pb)->table];
997 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
999 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1001 /* Crosstabulation table initialization. */
1004 table = tab_create (nvar + n_cols,
1005 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1006 tab_headers (table, nvar - 1, 0, 2, 0);
1008 /* First header line. */
1009 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1010 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1012 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1014 /* Second header line. */
1018 for (i = 2; i < nvar; i++)
1019 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1020 TAB_RIGHT | TAT_TITLE,
1022 ? x->vars[i]->label : x->vars[i]->name));
1023 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1024 x->vars[ROW_VAR]->name);
1025 for (i = 0; i < n_cols; i++)
1026 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1028 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1031 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1032 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1036 char *title = local_alloc (x->nvar * 64 + 128);
1040 if (cmd.pivot == CRS_PIVOT)
1041 for (i = 0; i < nvar; i++)
1044 cp = stpcpy (cp, " by ");
1045 cp = stpcpy (cp, x->vars[i]->name);
1049 cp = spprintf (cp, "%s by %s for",
1050 x->vars[0]->name, x->vars[1]->name);
1051 for (i = 2; i < nvar; i++)
1053 char buf[64], *bufp;
1058 cp = stpcpy (cp, x->vars[i]->name);
1060 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1061 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1063 cp = stpcpy (cp, bufp);
1067 cp = stpcpy (cp, " [");
1068 for (i = 0; i < num_cells; i++)
1076 static const struct tuple cell_names[] =
1078 {CRS_CL_COUNT, N_("count")},
1079 {CRS_CL_ROW, N_("row %")},
1080 {CRS_CL_COLUMN, N_("column %")},
1081 {CRS_CL_TOTAL, N_("total %")},
1082 {CRS_CL_EXPECTED, N_("expected")},
1083 {CRS_CL_RESIDUAL, N_("residual")},
1084 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1085 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1089 const struct tuple *t;
1091 for (t = cell_names; t->value != cells[i]; t++)
1092 assert (t->value != -1);
1094 cp = stpcpy (cp, ", ");
1095 cp = stpcpy (cp, gettext (t->name));
1099 tab_title (table, "%s", title);
1103 tab_offset (table, 0, 2);
1108 /* Chi-square table initialization. */
1109 if (cmd.a_statistics[CRS_ST_CHISQ])
1111 chisq = tab_create (6 + (nvar - 2),
1112 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1113 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1115 tab_title (chisq, _("Chi-square tests."));
1117 tab_offset (chisq, nvar - 2, 0);
1118 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1119 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1120 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1121 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1122 _("Asymp. Sig. (2-sided)"));
1123 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1124 _("Exact. Sig. (2-sided)"));
1125 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1126 _("Exact. Sig. (1-sided)"));
1128 tab_offset (chisq, 0, 1);
1133 /* Symmetric measures. */
1134 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1135 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1136 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1137 || cmd.a_statistics[CRS_ST_KAPPA])
1139 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1140 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1141 tab_title (sym, _("Symmetric measures."));
1143 tab_offset (sym, nvar - 2, 0);
1144 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1145 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1146 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1147 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1148 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1149 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1150 tab_offset (sym, 0, 1);
1155 /* Risk estimate. */
1156 if (cmd.a_statistics[CRS_ST_RISK])
1158 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1159 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1160 tab_title (risk, _("Risk estimate."));
1162 tab_offset (risk, nvar - 2, 0);
1163 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1164 _("95%% Confidence Interval"));
1165 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1166 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1167 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1168 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1169 tab_hline (risk, TAL_1, 2, 3, 1);
1170 tab_vline (risk, TAL_1, 2, 0, 1);
1171 tab_offset (risk, 0, 2);
1176 /* Directional measures. */
1177 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1178 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1180 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1181 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1182 tab_title (direct, _("Directional measures."));
1184 tab_offset (direct, nvar - 2, 0);
1185 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1186 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1187 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1188 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1189 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1190 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1191 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1192 tab_offset (direct, 0, 1);
1199 /* Find pivot subtable if applicable. */
1200 te = find_pivot_extent (tb, &tc, 0);
1204 /* Find all the row variable values. */
1205 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1207 /* Allocate memory space for the column and row totals. */
1208 if (n_rows > *maxrows)
1210 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1211 row_tot = *row_totp;
1214 if (n_cols > *maxcols)
1216 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1217 col_tot = *col_totp;
1221 /* Allocate table space for the matrix. */
1222 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1223 tab_realloc (table, -1,
1224 max (tab_nr (table) + (n_rows + 1) * num_cells,
1225 tab_nr (table) * (pe - pb) / (te - tb)));
1227 if (mode == GENERAL)
1229 /* Allocate memory space for the matrix. */
1230 if (n_cols * n_rows > *maxcells)
1232 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1233 *maxcells = n_cols * n_rows;
1238 /* Build the matrix and calculate column totals. */
1240 union value *cur_col = cols;
1241 union value *cur_row = rows;
1243 double *cp = col_tot;
1244 struct table_entry **p;
1247 for (p = &tb[0]; p < te; p++)
1249 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1253 for (; cur_row < &rows[n_rows]; cur_row++)
1259 mp = &mat[cur_col - cols];
1262 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1269 *cp += *mp = (*p)->u.freq;
1274 /* Zero out the rest of the matrix. */
1275 for (; cur_row < &rows[n_rows]; cur_row++)
1281 if (cur_col < &cols[n_cols])
1283 const int rem_cols = n_cols - (cur_col - cols);
1286 for (c = 0; c < rem_cols; c++)
1288 mp = &mat[cur_col - cols];
1289 for (r = 0; r < n_rows; r++)
1291 for (c = 0; c < rem_cols; c++)
1293 mp += n_cols - rem_cols;
1301 double *tp = col_tot;
1303 assert (mode == INTEGER);
1304 mat = (*tb)->u.data;
1307 /* Calculate column totals. */
1308 for (c = 0; c < n_cols; c++)
1311 double *cp = &mat[c];
1313 for (r = 0; r < n_rows; r++)
1314 cum += cp[r * n_cols];
1322 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1323 ns_cols += *cp != 0.;
1326 /* Calculate row totals. */
1329 double *rp = row_tot;
1332 for (ns_rows = 0, r = n_rows; r--; )
1335 for (c = n_cols; c--; )
1343 /* Calculate grand total. */
1349 if (n_rows < n_cols)
1350 tp = row_tot, n = n_rows;
1352 tp = col_tot, n = n_cols;
1358 /* Find the first variable that differs from the last subtable,
1359 then display the values of the dimensioning variables for
1360 each table that needs it. */
1362 int first_difference = nvar - 1;
1365 for (; ; first_difference--)
1367 assert (first_difference >= 2);
1368 if (memcmp (&cmp->values[first_difference],
1369 &(*tb)->values[first_difference],
1370 sizeof *cmp->values))
1376 display_dimensions (table, first_difference, *tb);
1378 display_dimensions (chisq, first_difference, *tb);
1380 display_dimensions (sym, first_difference, *tb);
1382 display_dimensions (risk, first_difference, *tb);
1384 display_dimensions (direct, first_difference, *tb);
1388 display_crosstabulation ();
1389 if (cmd.miss == CRS_REPORT)
1394 display_symmetric ();
1398 display_directional ();
1409 tab_resize (chisq, 4 + (nvar - 2), -1);
1420 /* Delete missing rows and columns for statistical analysis when
1423 delete_missing (void)
1428 for (r = 0; r < n_rows; r++)
1429 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1433 for (c = 0; c < n_cols; c++)
1434 mat[c + r * n_cols] = 0.;
1442 for (c = 0; c < n_cols; c++)
1443 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1447 for (r = 0; r < n_rows; r++)
1448 mat[c + r * n_cols] = 0.;
1454 /* Prepare table T for submission, and submit it. */
1456 submit (struct tab_table *t)
1463 tab_resize (t, -1, 0);
1464 if (tab_nr (t) == tab_t (t))
1469 tab_offset (t, 0, 0);
1471 for (i = 2; i < nvar; i++)
1472 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1473 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1474 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1475 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1477 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1479 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1480 tab_dim (t, crosstabs_dim);
1484 /* Sets the widths of all the columns and heights of all the rows in
1485 table T for driver D. */
1487 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1491 /* Width of a numerical column. */
1492 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1493 if (cmd.miss == CRS_REPORT)
1494 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1496 /* Set width for header columns. */
1502 w = d->width - c * (t->nc - t->l);
1503 for (i = 0; i <= t->nc; i++)
1507 if (w < d->prop_em_width * 8)
1508 w = d->prop_em_width * 8;
1510 if (w > d->prop_em_width * 15)
1511 w = d->prop_em_width * 15;
1513 for (i = 0; i < t->l; i++)
1517 for (i = t->l; i < t->nc; i++)
1520 for (i = 0; i < t->nr; i++)
1521 t->h[i] = tab_natural_height (t, d, i);
1524 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1525 int *cnt, int pivot);
1526 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1527 int *cnt, int pivot);
1529 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1531 static struct table_entry **
1532 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1534 return (mode == GENERAL
1535 ? find_pivot_extent_general (tp, cnt, pivot)
1536 : find_pivot_extent_integer (tp, cnt, pivot));
1539 /* Find the extent of a region in TP that contains one table. If
1540 PIVOT != 0 that means a set of table entries with identical table
1541 number; otherwise they also have to have the same values for every
1542 dimension after the row and column dimensions. The table that is
1543 searched starts at TP and has length CNT. Returns the first entry
1544 after the last one in the table; sets *CNT to the number of
1545 remaining values. If there are no entries in TP at all, returns
1546 NULL. A yucky interface, admittedly, but it works. */
1547 static struct table_entry **
1548 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1550 struct table_entry *fp = *tp;
1555 x = xtab[(*tp)->table];
1563 if ((*tp)->table != fp->table)
1568 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1575 /* Integer mode correspondent to find_pivot_extent_general(). This
1576 could be optimized somewhat, but I just don't give a crap about
1577 CROSSTABS performance in integer mode, which is just a
1578 CROSSTABS wart as far as I'm concerned.
1580 That said, feel free to send optimization patches to me. */
1581 static struct table_entry **
1582 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1584 struct table_entry *fp = *tp;
1589 x = xtab[(*tp)->table];
1597 if ((*tp)->table != fp->table)
1602 if (memcmp (&(*tp)->values[2], &fp->values[2],
1603 sizeof (union value) * (x->nvar - 2)))
1610 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1611 result. WIDTH_ points to an int which is either 0 for a
1612 numeric value or a string width for a string value. */
1614 compare_value (const void *a_, const void *b_, const void *width_)
1616 const union value *a = a_;
1617 const union value *b = b_;
1618 const int *pwidth = width_;
1619 const int width = *pwidth;
1622 return (a->f < b->f) ? -1 : (a->f > b->f);
1624 return strncmp (a->s, b->s, width);
1627 /* Given an array of ENTRY_CNT table_entry structures starting at
1628 ENTRIES, creates a sorted list of the values that the variable
1629 with index VAR_IDX takes on. The values are returned as a
1630 malloc()'darray stored in *VALUES, with the number of values
1631 stored in *VALUE_CNT.
1634 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1635 union value **values, int *value_cnt)
1637 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1639 if (mode == GENERAL)
1641 int width = v->width;
1644 *values = xnmalloc (entry_cnt, sizeof **values);
1645 for (i = 0; i < entry_cnt; i++)
1646 (*values)[i] = entries[i]->values[var_idx];
1647 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1648 compare_value, &width);
1652 struct var_range *vr = get_var_range (v);
1655 assert (mode == INTEGER);
1656 *values = xnmalloc (vr->count, sizeof **values);
1657 for (i = 0; i < vr->count; i++)
1658 (*values)[i].f = i + vr->min;
1659 *value_cnt = vr->count;
1663 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1664 from V, displayed with print format spec from variable VAR. When
1665 in REPORT missing-value mode, missing values have an M appended. */
1667 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1668 const union value *v, const struct variable *var)
1672 const char *label = val_labs_find (var->val_labs, *v);
1675 tab_text (table, c, r, TAB_LEFT, label);
1679 s.string = tab_alloc (table, var->print.w);
1680 format_short (s.string, &var->print, v);
1681 s.length = strlen (s.string);
1682 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1683 s.string[s.length++] = 'M';
1684 while (s.length && *s.string == ' ')
1689 tab_raw (table, c, r, opt, &s);
1692 /* Draws a line across TABLE at the current row to indicate the most
1693 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1694 that changed, and puts the values that changed into the table. TB
1695 and X must be the corresponding table_entry and crosstab,
1698 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1700 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1702 for (; first_difference >= 2; first_difference--)
1703 table_value_missing (table, nvar - first_difference - 1, 0,
1704 TAB_RIGHT, &tb->values[first_difference],
1705 x->vars[first_difference]);
1708 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1709 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1710 additionally suffixed with a letter `M'. */
1712 format_cell_entry (struct tab_table *table, int c, int r, double value,
1713 char suffix, bool mark_missing)
1715 const struct fmt_spec f = {FMT_F, 10, 1};
1720 s.string = tab_alloc (table, 16);
1722 data_out (&v, &f, s.string);
1723 while (*s.string == ' ')
1729 s.string[s.length++] = suffix;
1731 s.string[s.length++] = 'M';
1733 tab_raw (table, c, r, TAB_RIGHT, &s);
1736 /* Displays the crosstabulation table. */
1738 display_crosstabulation (void)
1743 for (r = 0; r < n_rows; r++)
1744 table_value_missing (table, nvar - 2, r * num_cells,
1745 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1747 tab_text (table, nvar - 2, n_rows * num_cells,
1748 TAB_LEFT, _("Total"));
1750 /* Put in the actual cells. */
1755 tab_offset (table, nvar - 1, -1);
1756 for (r = 0; r < n_rows; r++)
1759 tab_hline (table, TAL_1, -1, n_cols, 0);
1760 for (c = 0; c < n_cols; c++)
1762 bool mark_missing = false;
1763 double expected_value = row_tot[r] * col_tot[c] / W;
1764 if (cmd.miss == CRS_REPORT
1765 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1766 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1768 mark_missing = true;
1769 for (i = 0; i < num_cells; i++)
1780 v = *mp / row_tot[r] * 100.;
1784 v = *mp / col_tot[c] * 100.;
1791 case CRS_CL_EXPECTED:
1794 case CRS_CL_RESIDUAL:
1795 v = *mp - expected_value;
1797 case CRS_CL_SRESIDUAL:
1798 v = (*mp - expected_value) / sqrt (expected_value);
1800 case CRS_CL_ASRESIDUAL:
1801 v = ((*mp - expected_value)
1802 / sqrt (expected_value
1803 * (1. - row_tot[r] / W)
1804 * (1. - col_tot[c] / W)));
1810 format_cell_entry (table, c, i, v, suffix, mark_missing);
1816 tab_offset (table, -1, tab_row (table) + num_cells);
1824 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1825 for (r = 0; r < n_rows; r++)
1828 bool mark_missing = false;
1830 if (cmd.miss == CRS_REPORT
1831 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1832 mark_missing = true;
1834 for (i = 0; i < num_cells; i++)
1848 v = row_tot[r] / W * 100.;
1852 v = row_tot[r] / W * 100.;
1855 case CRS_CL_EXPECTED:
1856 case CRS_CL_RESIDUAL:
1857 case CRS_CL_SRESIDUAL:
1858 case CRS_CL_ASRESIDUAL:
1865 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1866 tab_next_row (table);
1871 /* Column totals, grand total. */
1877 tab_hline (table, TAL_1, -1, n_cols, 0);
1878 for (c = 0; c <= n_cols; c++)
1880 double ct = c < n_cols ? col_tot[c] : W;
1881 bool mark_missing = false;
1885 if (cmd.miss == CRS_REPORT && c < n_cols
1886 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1887 mark_missing = true;
1889 for (i = 0; i < num_cells; i++)
1911 case CRS_CL_EXPECTED:
1912 case CRS_CL_RESIDUAL:
1913 case CRS_CL_SRESIDUAL:
1914 case CRS_CL_ASRESIDUAL:
1920 format_cell_entry (table, c, i, v, suffix, mark_missing);
1925 tab_offset (table, -1, tab_row (table) + last_row);
1928 tab_offset (table, 0, -1);
1931 static void calc_r (double *X, double *Y, double *, double *, double *);
1932 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1934 /* Display chi-square statistics. */
1936 display_chisq (void)
1938 static const char *chisq_stats[N_CHISQ] =
1940 N_("Pearson Chi-Square"),
1941 N_("Likelihood Ratio"),
1942 N_("Fisher's Exact Test"),
1943 N_("Continuity Correction"),
1944 N_("Linear-by-Linear Association"),
1946 double chisq_v[N_CHISQ];
1947 double fisher1, fisher2;
1953 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1955 tab_offset (chisq, nvar - 2, -1);
1957 for (i = 0; i < N_CHISQ; i++)
1959 if ((i != 2 && chisq_v[i] == SYSMIS)
1960 || (i == 2 && fisher1 == SYSMIS))
1964 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1967 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1968 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1969 tab_float (chisq, 3, 0, TAB_RIGHT,
1970 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1975 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1976 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1978 tab_next_row (chisq);
1981 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1982 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1983 tab_next_row (chisq);
1985 tab_offset (chisq, 0, -1);
1988 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1989 double[N_SYMMETRIC]);
1991 /* Display symmetric measures. */
1993 display_symmetric (void)
1995 static const char *categories[] =
1997 N_("Nominal by Nominal"),
1998 N_("Ordinal by Ordinal"),
1999 N_("Interval by Interval"),
2000 N_("Measure of Agreement"),
2003 static const char *stats[N_SYMMETRIC] =
2007 N_("Contingency Coefficient"),
2008 N_("Kendall's tau-b"),
2009 N_("Kendall's tau-c"),
2011 N_("Spearman Correlation"),
2016 static const int stats_categories[N_SYMMETRIC] =
2018 0, 0, 0, 1, 1, 1, 1, 2, 3,
2022 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2025 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2028 tab_offset (sym, nvar - 2, -1);
2030 for (i = 0; i < N_SYMMETRIC; i++)
2032 if (sym_v[i] == SYSMIS)
2035 if (stats_categories[i] != last_cat)
2037 last_cat = stats_categories[i];
2038 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2041 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2042 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2043 if (sym_ase[i] != SYSMIS)
2044 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2045 if (sym_t[i] != SYSMIS)
2046 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2047 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2051 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2052 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2055 tab_offset (sym, 0, -1);
2058 static int calc_risk (double[], double[], double[], union value *);
2060 /* Display risk estimate. */
2065 double risk_v[3], lower[3], upper[3];
2069 if (!calc_risk (risk_v, upper, lower, c))
2072 tab_offset (risk, nvar - 2, -1);
2074 for (i = 0; i < 3; i++)
2076 if (risk_v[i] == SYSMIS)
2082 if (x->vars[COL_VAR]->type == NUMERIC)
2083 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2084 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2086 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2087 x->vars[COL_VAR]->name,
2088 x->vars[COL_VAR]->width, c[0].s,
2089 x->vars[COL_VAR]->width, c[1].s);
2093 if (x->vars[ROW_VAR]->type == NUMERIC)
2094 sprintf (buf, _("For cohort %s = %g"),
2095 x->vars[ROW_VAR]->name, rows[i - 1].f);
2097 sprintf (buf, _("For cohort %s = %.*s"),
2098 x->vars[ROW_VAR]->name,
2099 x->vars[ROW_VAR]->width, rows[i - 1].s);
2103 tab_text (risk, 0, 0, TAB_LEFT, buf);
2104 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2105 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2106 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2107 tab_next_row (risk);
2110 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2111 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2112 tab_next_row (risk);
2114 tab_offset (risk, 0, -1);
2117 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2118 double[N_DIRECTIONAL]);
2120 /* Display directional measures. */
2122 display_directional (void)
2124 static const char *categories[] =
2126 N_("Nominal by Nominal"),
2127 N_("Ordinal by Ordinal"),
2128 N_("Nominal by Interval"),
2131 static const char *stats[] =
2134 N_("Goodman and Kruskal tau"),
2135 N_("Uncertainty Coefficient"),
2140 static const char *types[] =
2147 static const int stats_categories[N_DIRECTIONAL] =
2149 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2152 static const int stats_stats[N_DIRECTIONAL] =
2154 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2157 static const int stats_types[N_DIRECTIONAL] =
2159 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2162 static const int *stats_lookup[] =
2169 static const char **stats_names[] =
2181 double direct_v[N_DIRECTIONAL];
2182 double direct_ase[N_DIRECTIONAL];
2183 double direct_t[N_DIRECTIONAL];
2187 if (!calc_directional (direct_v, direct_ase, direct_t))
2190 tab_offset (direct, nvar - 2, -1);
2192 for (i = 0; i < N_DIRECTIONAL; i++)
2194 if (direct_v[i] == SYSMIS)
2200 for (j = 0; j < 3; j++)
2201 if (last[j] != stats_lookup[j][i])
2204 tab_hline (direct, TAL_1, j, 6, 0);
2209 int k = last[j] = stats_lookup[j][i];
2214 string = x->vars[0]->name;
2216 string = x->vars[1]->name;
2218 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2219 gettext (stats_names[j][k]), string);
2224 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2225 if (direct_ase[i] != SYSMIS)
2226 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2227 if (direct_t[i] != SYSMIS)
2228 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2229 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2230 tab_next_row (direct);
2233 tab_offset (direct, 0, -1);
2236 /* Statistical calculations. */
2238 /* Returns the value of the gamma (factorial) function for an integer
2241 gamma_int (double x)
2246 for (i = 2; i < x; i++)
2251 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2253 static inline double
2254 Pr (int a, int b, int c, int d)
2256 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2257 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2258 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2259 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2260 / gamma_int (a + b + c + d + 1.));
2263 /* Swap the contents of A and B. */
2265 swap (int *a, int *b)
2272 /* Calculate significance for Fisher's exact test as specified in
2273 _SPSS Statistical Algorithms_, Appendix 5. */
2275 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2279 if (min (c, d) < min (a, b))
2280 swap (&a, &c), swap (&b, &d);
2281 if (min (b, d) < min (a, c))
2282 swap (&a, &b), swap (&c, &d);
2286 swap (&a, &b), swap (&c, &d);
2288 swap (&a, &c), swap (&b, &d);
2292 for (x = 0; x <= a; x++)
2293 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2295 *fisher2 = *fisher1;
2296 for (x = 1; x <= b; x++)
2297 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2300 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2301 columns with values COLS and N_ROWS rows with values ROWS. Values
2302 in the matrix sum to W. */
2304 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2305 double *fisher1, double *fisher2)
2309 chisq[0] = chisq[1] = 0.;
2310 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2311 *fisher1 = *fisher2 = SYSMIS;
2313 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2315 if (ns_rows <= 1 || ns_cols <= 1)
2317 chisq[0] = chisq[1] = SYSMIS;
2321 for (r = 0; r < n_rows; r++)
2322 for (c = 0; c < n_cols; c++)
2324 const double expected = row_tot[r] * col_tot[c] / W;
2325 const double freq = mat[n_cols * r + c];
2326 const double residual = freq - expected;
2328 chisq[0] += residual * residual / expected;
2330 chisq[1] += freq * log (expected / freq);
2341 /* Calculate Yates and Fisher exact test. */
2342 if (ns_cols == 2 && ns_rows == 2)
2344 double f11, f12, f21, f22;
2350 for (i = j = 0; i < n_cols; i++)
2351 if (col_tot[i] != 0.)
2360 f11 = mat[nz_cols[0]];
2361 f12 = mat[nz_cols[1]];
2362 f21 = mat[nz_cols[0] + n_cols];
2363 f22 = mat[nz_cols[1] + n_cols];
2368 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2371 chisq[3] = (W * x * x
2372 / (f11 + f12) / (f21 + f22)
2373 / (f11 + f21) / (f12 + f22));
2381 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2382 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2385 /* Calculate Mantel-Haenszel. */
2386 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2388 double r, ase_0, ase_1;
2389 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2391 chisq[4] = (W - 1.) * r * r;
2396 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2397 ASE_1, and ase_0 into ASE_0. The row and column values must be
2398 passed in X and Y. */
2400 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2402 double SX, SY, S, T;
2404 double sum_XYf, sum_X2Y2f;
2405 double sum_Xr, sum_X2r;
2406 double sum_Yc, sum_Y2c;
2409 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2410 for (j = 0; j < n_cols; j++)
2412 double fij = mat[j + i * n_cols];
2413 double product = X[i] * Y[j];
2414 double temp = fij * product;
2416 sum_X2Y2f += temp * product;
2419 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2421 sum_Xr += X[i] * row_tot[i];
2422 sum_X2r += X[i] * X[i] * row_tot[i];
2426 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2428 sum_Yc += Y[i] * col_tot[i];
2429 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2433 S = sum_XYf - sum_Xr * sum_Yc / W;
2434 SX = sum_X2r - sum_Xr * sum_Xr / W;
2435 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2438 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2443 for (s = c = 0., i = 0; i < n_rows; i++)
2444 for (j = 0; j < n_cols; j++)
2446 double Xresid, Yresid;
2449 Xresid = X[i] - Xbar;
2450 Yresid = Y[j] - Ybar;
2451 temp = (T * Xresid * Yresid
2453 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2454 y = mat[j + i * n_cols] * temp * temp - c;
2459 *ase_1 = sqrt (s) / (T * T);
2463 static double somers_d_v[3];
2464 static double somers_d_ase[3];
2465 static double somers_d_t[3];
2467 /* Calculate symmetric statistics and their asymptotic standard
2468 errors. Returns 0 if none could be calculated. */
2470 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2471 double t[N_SYMMETRIC])
2473 int q = min (ns_rows, ns_cols);
2482 for (i = 0; i < N_SYMMETRIC; i++)
2483 v[i] = ase[i] = t[i] = SYSMIS;
2486 /* Phi, Cramer's V, contingency coefficient. */
2487 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2489 double Xp = 0.; /* Pearson chi-square. */
2494 for (r = 0; r < n_rows; r++)
2495 for (c = 0; c < n_cols; c++)
2497 const double expected = row_tot[r] * col_tot[c] / W;
2498 const double freq = mat[n_cols * r + c];
2499 const double residual = freq - expected;
2501 Xp += residual * residual / expected;
2505 if (cmd.a_statistics[CRS_ST_PHI])
2507 v[0] = sqrt (Xp / W);
2508 v[1] = sqrt (Xp / (W * (q - 1)));
2510 if (cmd.a_statistics[CRS_ST_CC])
2511 v[2] = sqrt (Xp / (Xp + W));
2514 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2515 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2520 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2527 for (r = 0; r < n_rows; r++)
2528 Dr -= row_tot[r] * row_tot[r];
2529 for (c = 0; c < n_cols; c++)
2530 Dc -= col_tot[c] * col_tot[c];
2536 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2537 for (c = 0; c < n_cols; c++)
2541 for (r = 0; r < n_rows; r++)
2542 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2552 for (i = 0; i < n_rows; i++)
2556 for (j = 1; j < n_cols; j++)
2557 Cij += col_tot[j] - cum[j + i * n_cols];
2560 for (j = 1; j < n_cols; j++)
2561 Dij += cum[j + (i - 1) * n_cols];
2565 double fij = mat[j + i * n_cols];
2571 assert (j < n_cols);
2573 Cij -= col_tot[j] - cum[j + i * n_cols];
2574 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2578 Cij += cum[j - 1 + (i - 1) * n_cols];
2579 Dij -= cum[j + (i - 1) * n_cols];
2585 if (cmd.a_statistics[CRS_ST_BTAU])
2586 v[3] = (P - Q) / sqrt (Dr * Dc);
2587 if (cmd.a_statistics[CRS_ST_CTAU])
2588 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2589 if (cmd.a_statistics[CRS_ST_GAMMA])
2590 v[5] = (P - Q) / (P + Q);
2592 /* ASE for tau-b, tau-c, gamma. Calculations could be
2593 eliminated here, at expense of memory. */
2598 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2599 for (i = 0; i < n_rows; i++)
2603 for (j = 1; j < n_cols; j++)
2604 Cij += col_tot[j] - cum[j + i * n_cols];
2607 for (j = 1; j < n_cols; j++)
2608 Dij += cum[j + (i - 1) * n_cols];
2612 double fij = mat[j + i * n_cols];
2614 if (cmd.a_statistics[CRS_ST_BTAU])
2616 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2617 + v[3] * (row_tot[i] * Dc
2618 + col_tot[j] * Dr));
2619 btau_cum += fij * temp * temp;
2623 const double temp = Cij - Dij;
2624 ctau_cum += fij * temp * temp;
2627 if (cmd.a_statistics[CRS_ST_GAMMA])
2629 const double temp = Q * Cij - P * Dij;
2630 gamma_cum += fij * temp * temp;
2633 if (cmd.a_statistics[CRS_ST_D])
2635 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2636 - (P - Q) * (W - row_tot[i]));
2637 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2638 - (Q - P) * (W - col_tot[j]));
2643 assert (j < n_cols);
2645 Cij -= col_tot[j] - cum[j + i * n_cols];
2646 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2650 Cij += cum[j - 1 + (i - 1) * n_cols];
2651 Dij -= cum[j + (i - 1) * n_cols];
2657 btau_var = ((btau_cum
2658 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2660 if (cmd.a_statistics[CRS_ST_BTAU])
2662 ase[3] = sqrt (btau_var);
2663 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2666 if (cmd.a_statistics[CRS_ST_CTAU])
2668 ase[4] = ((2 * q / ((q - 1) * W * W))
2669 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2670 t[4] = v[4] / ase[4];
2672 if (cmd.a_statistics[CRS_ST_GAMMA])
2674 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2675 t[5] = v[5] / (2. / (P + Q)
2676 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2678 if (cmd.a_statistics[CRS_ST_D])
2680 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2681 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2682 somers_d_t[0] = (somers_d_v[0]
2684 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2685 somers_d_v[1] = (P - Q) / Dc;
2686 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2687 somers_d_t[1] = (somers_d_v[1]
2689 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2690 somers_d_v[2] = (P - Q) / Dr;
2691 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2692 somers_d_t[2] = (somers_d_v[2]
2694 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2700 /* Spearman correlation, Pearson's r. */
2701 if (cmd.a_statistics[CRS_ST_CORR])
2703 double *R = local_alloc (sizeof *R * n_rows);
2704 double *C = local_alloc (sizeof *C * n_cols);
2707 double y, t, c = 0., s = 0.;
2712 R[i] = s + (row_tot[i] + 1.) / 2.;
2719 assert (i < n_rows);
2724 double y, t, c = 0., s = 0.;
2729 C[j] = s + (col_tot[j] + 1.) / 2;
2736 assert (j < n_cols);
2740 calc_r (R, C, &v[6], &t[6], &ase[6]);
2746 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2750 /* Cohen's kappa. */
2751 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2753 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2756 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2757 i < ns_rows; i++, j++)
2761 while (col_tot[j] == 0.)
2764 prod = row_tot[i] * col_tot[j];
2765 sum = row_tot[i] + col_tot[j];
2767 sum_fii += mat[j + i * n_cols];
2769 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2770 sum_riciri_ci += prod * sum;
2772 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2773 for (j = 0; j < ns_cols; j++)
2775 double sum = row_tot[i] + col_tot[j];
2776 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2779 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2781 ase[8] = sqrt ((W * W * sum_rici
2782 + sum_rici * sum_rici
2783 - W * sum_riciri_ci)
2784 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2786 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2787 / pow2 (W * W - sum_rici))
2788 + ((2. * (W - sum_fii)
2789 * (2. * sum_fii * sum_rici
2790 - W * sum_fiiri_ci))
2791 / cube (W * W - sum_rici))
2792 + (pow2 (W - sum_fii)
2793 * (W * sum_fijri_ci2 - 4.
2794 * sum_rici * sum_rici)
2795 / pow4 (W * W - sum_rici))));
2797 t[8] = v[8] / ase[8];
2804 /* Calculate risk estimate. */
2806 calc_risk (double *value, double *upper, double *lower, union value *c)
2808 double f11, f12, f21, f22;
2814 for (i = 0; i < 3; i++)
2815 value[i] = upper[i] = lower[i] = SYSMIS;
2818 if (ns_rows != 2 || ns_cols != 2)
2825 for (i = j = 0; i < n_cols; i++)
2826 if (col_tot[i] != 0.)
2835 f11 = mat[nz_cols[0]];
2836 f12 = mat[nz_cols[1]];
2837 f21 = mat[nz_cols[0] + n_cols];
2838 f22 = mat[nz_cols[1] + n_cols];
2840 c[0] = cols[nz_cols[0]];
2841 c[1] = cols[nz_cols[1]];
2844 value[0] = (f11 * f22) / (f12 * f21);
2845 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2846 lower[0] = value[0] * exp (-1.960 * v);
2847 upper[0] = value[0] * exp (1.960 * v);
2849 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2850 v = sqrt ((f12 / (f11 * (f11 + f12)))
2851 + (f22 / (f21 * (f21 + f22))));
2852 lower[1] = value[1] * exp (-1.960 * v);
2853 upper[1] = value[1] * exp (1.960 * v);
2855 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2856 v = sqrt ((f11 / (f12 * (f11 + f12)))
2857 + (f21 / (f22 * (f21 + f22))));
2858 lower[2] = value[2] * exp (-1.960 * v);
2859 upper[2] = value[2] * exp (1.960 * v);
2864 /* Calculate directional measures. */
2866 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2867 double t[N_DIRECTIONAL])
2872 for (i = 0; i < N_DIRECTIONAL; i++)
2873 v[i] = ase[i] = t[i] = SYSMIS;
2877 if (cmd.a_statistics[CRS_ST_LAMBDA])
2879 double *fim = xnmalloc (n_rows, sizeof *fim);
2880 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2881 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2882 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2883 double sum_fim, sum_fmj;
2885 int rm_index, cm_index;
2888 /* Find maximum for each row and their sum. */
2889 for (sum_fim = 0., i = 0; i < n_rows; i++)
2891 double max = mat[i * n_cols];
2894 for (j = 1; j < n_cols; j++)
2895 if (mat[j + i * n_cols] > max)
2897 max = mat[j + i * n_cols];
2901 sum_fim += fim[i] = max;
2902 fim_index[i] = index;
2905 /* Find maximum for each column. */
2906 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2908 double max = mat[j];
2911 for (i = 1; i < n_rows; i++)
2912 if (mat[j + i * n_cols] > max)
2914 max = mat[j + i * n_cols];
2918 sum_fmj += fmj[j] = max;
2919 fmj_index[j] = index;
2922 /* Find maximum row total. */
2925 for (i = 1; i < n_rows; i++)
2926 if (row_tot[i] > rm)
2932 /* Find maximum column total. */
2935 for (j = 1; j < n_cols; j++)
2936 if (col_tot[j] > cm)
2942 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2943 v[1] = (sum_fmj - rm) / (W - rm);
2944 v[2] = (sum_fim - cm) / (W - cm);
2946 /* ASE1 for Y given X. */
2950 for (accum = 0., i = 0; i < n_rows; i++)
2951 for (j = 0; j < n_cols; j++)
2953 const int deltaj = j == cm_index;
2954 accum += (mat[j + i * n_cols]
2955 * pow2 ((j == fim_index[i])
2960 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2963 /* ASE0 for Y given X. */
2967 for (accum = 0., i = 0; i < n_rows; i++)
2968 if (cm_index != fim_index[i])
2969 accum += (mat[i * n_cols + fim_index[i]]
2970 + mat[i * n_cols + cm_index]);
2971 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2974 /* ASE1 for X given Y. */
2978 for (accum = 0., i = 0; i < n_rows; i++)
2979 for (j = 0; j < n_cols; j++)
2981 const int deltaj = i == rm_index;
2982 accum += (mat[j + i * n_cols]
2983 * pow2 ((i == fmj_index[j])
2988 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2991 /* ASE0 for X given Y. */
2995 for (accum = 0., j = 0; j < n_cols; j++)
2996 if (rm_index != fmj_index[j])
2997 accum += (mat[j + n_cols * fmj_index[j]]
2998 + mat[j + n_cols * rm_index]);
2999 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3002 /* Symmetric ASE0 and ASE1. */
3007 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3008 for (j = 0; j < n_cols; j++)
3010 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3011 int temp1 = (i == rm_index) + (j == cm_index);
3012 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3013 accum1 += (mat[j + i * n_cols]
3014 * pow2 (temp0 + (v[0] - 1.) * temp1));
3016 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3017 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3018 / (2. * W - rm - cm));
3027 double sum_fij2_ri, sum_fij2_ci;
3028 double sum_ri2, sum_cj2;
3030 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3031 for (j = 0; j < n_cols; j++)
3033 double temp = pow2 (mat[j + i * n_cols]);
3034 sum_fij2_ri += temp / row_tot[i];
3035 sum_fij2_ci += temp / col_tot[j];
3038 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3039 sum_ri2 += row_tot[i] * row_tot[i];
3041 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3042 sum_cj2 += col_tot[j] * col_tot[j];
3044 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3045 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3049 if (cmd.a_statistics[CRS_ST_UC])
3051 double UX, UY, UXY, P;
3052 double ase1_yx, ase1_xy, ase1_sym;
3055 for (UX = 0., i = 0; i < n_rows; i++)
3056 if (row_tot[i] > 0.)
3057 UX -= row_tot[i] / W * log (row_tot[i] / W);
3059 for (UY = 0., j = 0; j < n_cols; j++)
3060 if (col_tot[j] > 0.)
3061 UY -= col_tot[j] / W * log (col_tot[j] / W);
3063 for (UXY = P = 0., i = 0; i < n_rows; i++)
3064 for (j = 0; j < n_cols; j++)
3066 double entry = mat[j + i * n_cols];
3071 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3072 UXY -= entry / W * log (entry / W);
3075 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3076 for (j = 0; j < n_cols; j++)
3078 double entry = mat[j + i * n_cols];
3083 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3084 + (UX - UXY) * log (col_tot[j] / W));
3085 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3086 + (UY - UXY) * log (row_tot[i] / W));
3087 ase1_sym += entry * pow2 ((UXY
3088 * log (row_tot[i] * col_tot[j] / (W * W)))
3089 - (UX + UY) * log (entry / W));
3092 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3093 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3094 t[5] = v[5] / ((2. / (W * (UX + UY)))
3095 * sqrt (P - pow2 (UX + UY - UXY) / W));
3097 v[6] = (UX + UY - UXY) / UX;
3098 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3099 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3101 v[7] = (UX + UY - UXY) / UY;
3102 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3103 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3107 if (cmd.a_statistics[CRS_ST_D])
3112 calc_symmetric (NULL, NULL, NULL);
3113 for (i = 0; i < 3; i++)
3115 v[8 + i] = somers_d_v[i];
3116 ase[8 + i] = somers_d_ase[i];
3117 t[8 + i] = somers_d_t[i];
3122 if (cmd.a_statistics[CRS_ST_ETA])
3125 double sum_Xr, sum_X2r;
3129 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3131 sum_Xr += rows[i].f * row_tot[i];
3132 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3134 SX = sum_X2r - sum_Xr * sum_Xr / W;
3136 for (SXW = 0., j = 0; j < n_cols; j++)
3140 for (cum = 0., i = 0; i < n_rows; i++)
3142 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3143 cum += rows[i].f * mat[j + i * n_cols];
3146 SXW -= cum * cum / col_tot[j];
3148 v[11] = sqrt (1. - SXW / SX);
3152 double sum_Yc, sum_Y2c;
3156 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3158 sum_Yc += cols[i].f * col_tot[i];
3159 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3161 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3163 for (SYW = 0., i = 0; i < n_rows; i++)
3167 for (cum = 0., j = 0; j < n_cols; j++)
3169 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3170 cum += cols[j].f * mat[j + i * n_cols];
3173 SYW -= cum * cum / row_tot[i];
3175 v[12] = sqrt (1. - SYW / SY);
3182 /* A wrapper around data_out() that limits string output to short
3183 string width and null terminates the result. */
3185 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3187 struct fmt_spec fmt_subst;
3189 /* Limit to short string width. */
3190 if (fmt_is_string (fp->type))
3194 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3195 if (fmt_subst.type == FMT_A)
3196 fmt_subst.w = min (8, fmt_subst.w);
3198 fmt_subst.w = min (16, fmt_subst.w);
3204 data_out (v, fp, s);
3206 /* Null terminate. */