1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/data-out.h>
41 #include <data/dictionary.h>
42 #include <data/format.h>
43 #include <data/procedure.h>
44 #include <data/value-labels.h>
45 #include <data/variable.h>
46 #include <language/command.h>
47 #include <language/dictionary/split-file.h>
48 #include <language/lexer/lexer.h>
49 #include <language/lexer/variable-parser.h>
50 #include <libpspp/alloc.h>
51 #include <libpspp/array.h>
52 #include <libpspp/assertion.h>
53 #include <libpspp/compiler.h>
54 #include <libpspp/hash.h>
55 #include <libpspp/magic.h>
56 #include <libpspp/message.h>
57 #include <libpspp/message.h>
58 #include <libpspp/misc.h>
59 #include <libpspp/pool.h>
60 #include <libpspp/str.h>
61 #include <output/output.h>
62 #include <output/table.h>
67 #define _(msgid) gettext (msgid)
68 #define N_(msgid) msgid
76 missing=miss:!table/include/report;
77 +write[wr_]=none,cells,all;
78 +format=fmt:!labels/nolabels/novallabs,
81 tabl:!tables/notables,
84 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
86 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
92 /* Number of chi-square statistics. */
95 /* Number of symmetric statistics. */
98 /* Number of directional statistics. */
99 #define N_DIRECTIONAL 13
101 /* A single table entry for general mode. */
104 int table; /* Flattened table number. */
107 double freq; /* Frequency count. */
108 double *data; /* Crosstabulation table for integer mode. */
111 union value values[1]; /* Values. */
114 /* A crosstabulation. */
117 int nvar; /* Number of variables. */
118 double missing; /* Missing cases count. */
119 int ofs; /* Integer mode: Offset into sorted_tab[]. */
120 struct variable *vars[2]; /* At least two variables; sorted by
121 larger indices first. */
124 /* Integer mode variable info. */
127 int min; /* Minimum value. */
128 int max; /* Maximum value + 1. */
129 int count; /* max - min. */
132 static inline struct var_range *
133 get_var_range (struct variable *v)
135 return var_get_aux (v);
138 /* Indexes into crosstab.v. */
145 /* General mode crosstabulation table. */
146 static struct hsh_table *gen_tab; /* Hash table. */
147 static int n_sorted_tab; /* Number of entries in sorted_tab. */
148 static struct table_entry **sorted_tab; /* Sorted table. */
150 /* Variables specifies on VARIABLES. */
151 static struct variable **variables;
152 static size_t variables_cnt;
155 static struct crosstab **xtab;
158 /* Integer or general mode? */
167 static int num_cells; /* Number of cells requested. */
168 static int cells[8]; /* Cells requested. */
171 static int write; /* One of WR_* that specifies the WRITE style. */
173 /* Command parsing info. */
174 static struct cmd_crosstabs cmd;
177 static struct pool *pl_tc; /* For table cells. */
178 static struct pool *pl_col; /* For column data. */
180 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
181 static void precalc (const struct ccase *, void *, const struct dataset *);
182 static bool calc_general (const struct ccase *, void *, const struct dataset *);
183 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
184 static bool postcalc (void *, const struct dataset *);
185 static void submit (struct tab_table *);
187 static void format_short (char *s, const struct fmt_spec *fp,
188 const union value *v);
190 /* Parse and execute CROSSTABS, then clean up. */
192 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
194 int result = internal_cmd_crosstabs (lexer, ds);
197 pool_destroy (pl_tc);
198 pool_destroy (pl_col);
203 /* Parses and executes the CROSSTABS procedure. */
205 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
214 pl_tc = pool_create ();
215 pl_col = pool_create ();
217 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
220 mode = variables ? INTEGER : GENERAL;
225 cmd.a_cells[CRS_CL_COUNT] = 1;
231 for (i = 0; i < CRS_CL_count; i++)
236 cmd.a_cells[CRS_CL_COUNT] = 1;
237 cmd.a_cells[CRS_CL_ROW] = 1;
238 cmd.a_cells[CRS_CL_COLUMN] = 1;
239 cmd.a_cells[CRS_CL_TOTAL] = 1;
241 if (cmd.a_cells[CRS_CL_ALL])
243 for (i = 0; i < CRS_CL_count; i++)
245 cmd.a_cells[CRS_CL_ALL] = 0;
247 cmd.a_cells[CRS_CL_NONE] = 0;
249 for (num_cells = i = 0; i < CRS_CL_count; i++)
251 cells[num_cells++] = i;
254 if (cmd.sbc_statistics)
259 for (i = 0; i < CRS_ST_count; i++)
260 if (cmd.a_statistics[i])
263 cmd.a_statistics[CRS_ST_CHISQ] = 1;
264 if (cmd.a_statistics[CRS_ST_ALL])
265 for (i = 0; i < CRS_ST_count; i++)
266 cmd.a_statistics[i] = 1;
270 if (cmd.miss == CRS_REPORT && mode == GENERAL)
272 msg (SE, _("Missing mode REPORT not allowed in general mode. "
273 "Assuming MISSING=TABLE."));
274 cmd.miss = CRS_TABLE;
278 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
279 cmd.a_write[CRS_WR_ALL] = 0;
280 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
282 msg (SE, _("Write mode ALL not allowed in general mode. "
283 "Assuming WRITE=CELLS."));
284 cmd.a_write[CRS_WR_CELLS] = 1;
287 && (cmd.a_write[CRS_WR_NONE]
288 + cmd.a_write[CRS_WR_ALL]
289 + cmd.a_write[CRS_WR_CELLS] == 0))
290 cmd.a_write[CRS_WR_CELLS] = 1;
291 if (cmd.a_write[CRS_WR_CELLS])
292 write = CRS_WR_CELLS;
293 else if (cmd.a_write[CRS_WR_ALL])
298 ok = procedure_with_splits (ds, precalc,
299 mode == GENERAL ? calc_general : calc_integer,
302 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
305 /* Parses the TABLES subcommand. */
307 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
309 struct var_set *var_set;
311 struct variable ***by = NULL;
312 size_t *by_nvar = NULL;
316 /* Ensure that this is a TABLES subcommand. */
317 if (!lex_match_id (lexer, "TABLES")
318 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
319 && lex_token (lexer) != T_ALL)
321 lex_match (lexer, '=');
323 if (variables != NULL)
324 var_set = var_set_create_from_array (variables, variables_cnt);
326 var_set = var_set_create_from_dict (dataset_dict (ds));
327 assert (var_set != NULL);
331 by = xnrealloc (by, n_by + 1, sizeof *by);
332 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
333 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
334 PV_NO_DUPLICATE | PV_NO_SCRATCH))
336 if (xalloc_oversized (nx, by_nvar[n_by]))
338 msg (SE, _("Too many crosstabulation variables or dimensions."));
344 if (!lex_match (lexer, T_BY))
348 lex_error (lexer, _("expecting BY"));
357 int *by_iter = xcalloc (n_by, sizeof *by_iter);
360 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
361 for (i = 0; i < nx; i++)
365 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
372 for (i = 0; i < n_by; i++)
373 x->vars[i] = by[i][by_iter[i]];
379 for (i = n_by - 1; i >= 0; i--)
381 if (++by_iter[i] < by_nvar[i])
394 /* All return paths lead here. */
398 for (i = 0; i < n_by; i++)
404 var_set_destroy (var_set);
409 /* Parses the VARIABLES subcommand. */
411 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
415 msg (SE, _("VARIABLES must be specified before TABLES."));
419 lex_match (lexer, '=');
423 size_t orig_nv = variables_cnt;
428 if (!parse_variables (lexer, dataset_dict (ds),
429 &variables, &variables_cnt,
430 (PV_APPEND | PV_NUMERIC
431 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
434 if (lex_token (lexer) != '(')
436 lex_error (lexer, "expecting `('");
441 if (!lex_force_int (lexer))
443 min = lex_integer (lexer);
446 lex_match (lexer, ',');
448 if (!lex_force_int (lexer))
450 max = lex_integer (lexer);
453 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
459 if (lex_token (lexer) != ')')
461 lex_error (lexer, "expecting `)'");
466 for (i = orig_nv; i < variables_cnt; i++)
468 struct var_range *vr = xmalloc (sizeof *vr);
471 vr->count = max - min + 1;
472 var_attach_aux (variables[i], vr, var_dtor_free);
475 if (lex_token (lexer) == '/')
487 /* Data file processing. */
489 static int compare_table_entry (const void *, const void *, const void *);
490 static unsigned hash_table_entry (const void *, const void *);
492 /* Set up the crosstabulation tables for processing. */
494 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
496 output_split_file_values (ds, first);
499 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
509 for (i = 0; i < nxtab; i++)
511 struct crosstab *x = xtab[i];
516 x->ofs = n_sorted_tab;
518 for (j = 2; j < x->nvar; j++)
519 count *= get_var_range (x->vars[j - 2])->count;
521 sorted_tab = xnrealloc (sorted_tab,
522 n_sorted_tab + count, sizeof *sorted_tab);
523 v = local_alloc (sizeof *v * x->nvar);
524 for (j = 2; j < x->nvar; j++)
525 v[j] = get_var_range (x->vars[j])->min;
526 for (j = 0; j < count; j++)
528 struct table_entry *te;
531 te = sorted_tab[n_sorted_tab++]
532 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
536 int row_cnt = get_var_range (x->vars[0])->count;
537 int col_cnt = get_var_range (x->vars[1])->count;
538 const int mat_size = row_cnt * col_cnt;
541 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
542 for (m = 0; m < mat_size; m++)
546 for (k = 2; k < x->nvar; k++)
547 te->values[k].f = v[k];
548 for (k = 2; k < x->nvar; k++)
550 struct var_range *vr = get_var_range (x->vars[k]);
551 if (++v[k] >= vr->max)
560 sorted_tab = xnrealloc (sorted_tab,
561 n_sorted_tab + 1, sizeof *sorted_tab);
562 sorted_tab[n_sorted_tab] = NULL;
567 /* Form crosstabulations for general mode. */
569 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
571 bool bad_warn = true;
574 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
576 /* Flattened current table index. */
579 for (t = 0; t < nxtab; t++)
581 struct crosstab *x = xtab[t];
582 const size_t entry_size = (sizeof (struct table_entry)
583 + sizeof (union value) * (x->nvar - 1));
584 struct table_entry *te = local_alloc (entry_size);
586 /* Construct table entry for the current record and table. */
592 for (j = 0; j < x->nvar; j++)
594 const union value *v = case_data (c, x->vars[j]);
595 if ((cmd.miss == CRS_TABLE && var_is_value_missing (x->vars[j], v))
596 || (cmd.miss == CRS_INCLUDE
597 && var_is_value_system_missing (x->vars[j], v)))
599 x->missing += weight;
603 if (var_is_numeric (x->vars[j]))
604 te->values[j].f = case_num (c, x->vars[j]);
607 memcpy (te->values[j].s, case_str (c, x->vars[j]),
608 var_get_width (x->vars[j]));
610 /* Necessary in order to simplify comparisons. */
611 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
612 sizeof (union value) - var_get_width (x->vars[j]));
617 /* Add record to hash table. */
619 struct table_entry **tepp
620 = (struct table_entry **) hsh_probe (gen_tab, te);
623 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
626 memcpy (tep, te, entry_size);
631 (*tepp)->u.freq += weight;
642 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
644 bool bad_warn = true;
647 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
649 /* Flattened current table index. */
652 for (t = 0; t < nxtab; t++)
654 struct crosstab *x = xtab[t];
659 for (i = 0; i < x->nvar; i++)
661 struct variable *const v = x->vars[i];
662 struct var_range *vr = get_var_range (v);
663 double value = case_num (c, v);
665 /* Note that the first test also rules out SYSMIS. */
666 if ((value < vr->min || value >= vr->max)
667 || (cmd.miss == CRS_TABLE && var_is_num_user_missing (v, value)))
669 x->missing += weight;
675 ofs += fact * ((int) value - vr->min);
681 struct variable *row_var = x->vars[ROW_VAR];
682 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
684 struct variable *col_var = x->vars[COL_VAR];
685 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
687 const int col_dim = get_var_range (col_var)->count;
689 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
698 /* Compare the table_entry's at A and B and return a strcmp()-type
701 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
703 const struct table_entry *a = a_;
704 const struct table_entry *b = b_;
706 if (a->table > b->table)
708 else if (a->table < b->table)
712 const struct crosstab *x = xtab[a->table];
715 for (i = x->nvar - 1; i >= 0; i--)
716 if (var_is_numeric (x->vars[i]))
718 const double diffnum = a->values[i].f - b->values[i].f;
721 else if (diffnum > 0)
726 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
727 var_get_width (x->vars[i]));
736 /* Calculate a hash value from table_entry A. */
738 hash_table_entry (const void *a_, const void *aux UNUSED)
740 const struct table_entry *a = a_;
745 for (i = 0; i < xtab[a->table]->nvar; i++)
746 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
751 /* Post-data reading calculations. */
753 static struct table_entry **find_pivot_extent (struct table_entry **,
754 int *cnt, int pivot);
755 static void enum_var_values (struct table_entry **entries, int entry_cnt,
757 union value **values, int *value_cnt);
758 static void output_pivot_table (struct table_entry **, struct table_entry **,
759 double **, double **, double **,
760 int *, int *, int *);
761 static void make_summary_table (void);
764 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
768 n_sorted_tab = hsh_count (gen_tab);
769 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
772 make_summary_table ();
774 /* Identify all the individual crosstabulation tables, and deal with
777 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
778 int pc = n_sorted_tab; /* Pivot count. */
780 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
781 int maxrows = 0, maxcols = 0, maxcells = 0;
785 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
789 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
790 &maxrows, &maxcols, &maxcells);
799 hsh_destroy (gen_tab);
804 static void insert_summary (struct tab_table *, int tab_index, double valid);
806 /* Output a table summarizing the cases processed. */
808 make_summary_table (void)
810 struct tab_table *summary;
812 struct table_entry **pb = sorted_tab, **pe;
813 int pc = n_sorted_tab;
816 summary = tab_create (7, 3 + nxtab, 1);
817 tab_title (summary, _("Summary."));
818 tab_headers (summary, 1, 0, 3, 0);
819 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
820 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
821 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
822 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
823 tab_hline (summary, TAL_1, 1, 6, 1);
824 tab_hline (summary, TAL_1, 1, 6, 2);
825 tab_vline (summary, TAL_1, 3, 1, 1);
826 tab_vline (summary, TAL_1, 5, 1, 1);
830 for (i = 0; i < 3; i++)
832 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
833 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
836 tab_offset (summary, 0, 3);
842 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
846 while (cur_tab < (*pb)->table)
847 insert_summary (summary, cur_tab++, 0.);
850 for (valid = 0.; pb < pe; pb++)
851 valid += (*pb)->u.freq;
854 const struct crosstab *const x = xtab[(*pb)->table];
855 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
856 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
857 const int count = n_cols * n_rows;
859 for (valid = 0.; pb < pe; pb++)
861 const double *data = (*pb)->u.data;
864 for (i = 0; i < count; i++)
868 insert_summary (summary, cur_tab++, valid);
873 while (cur_tab < nxtab)
874 insert_summary (summary, cur_tab++, 0.);
879 /* Inserts a line into T describing the crosstabulation at index
880 TAB_INDEX, which has VALID valid observations. */
882 insert_summary (struct tab_table *t, int tab_index, double valid)
884 struct crosstab *x = xtab[tab_index];
886 tab_hline (t, TAL_1, 0, 6, 0);
888 /* Crosstabulation name. */
890 char *buf = local_alloc (128 * x->nvar);
894 for (i = 0; i < x->nvar; i++)
897 cp = stpcpy (cp, " * ");
899 cp = stpcpy (cp, var_to_string (x->vars[i]));
901 tab_text (t, 0, 0, TAB_LEFT, buf);
906 /* Counts and percentages. */
916 for (i = 0; i < 3; i++)
918 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
919 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
930 static struct tab_table *table; /* Crosstabulation table. */
931 static struct tab_table *chisq; /* Chi-square table. */
932 static struct tab_table *sym; /* Symmetric measures table. */
933 static struct tab_table *risk; /* Risk estimate table. */
934 static struct tab_table *direct; /* Directional measures table. */
937 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
939 /* Column values, number of columns. */
940 static union value *cols;
943 /* Row values, number of rows. */
944 static union value *rows;
947 /* Number of statistically interesting columns/rows (columns/rows with
949 static int ns_cols, ns_rows;
951 /* Crosstabulation. */
952 static const struct crosstab *x;
954 /* Number of variables from the crosstabulation to consider. This is
955 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
958 /* Matrix contents. */
959 static double *mat; /* Matrix proper. */
960 static double *row_tot; /* Row totals. */
961 static double *col_tot; /* Column totals. */
962 static double W; /* Grand total. */
964 static void display_dimensions (struct tab_table *, int first_difference,
965 struct table_entry *);
966 static void display_crosstabulation (void);
967 static void display_chisq (void);
968 static void display_symmetric (void);
969 static void display_risk (void);
970 static void display_directional (void);
971 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
972 static void table_value_missing (struct tab_table *table, int c, int r,
973 unsigned char opt, const union value *v,
974 const struct variable *var);
975 static void delete_missing (void);
977 /* Output pivot table beginning at PB and continuing until PE,
978 exclusive. For efficiency, *MATP is a pointer to a matrix that can
979 hold *MAXROWS entries. */
981 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
982 double **matp, double **row_totp, double **col_totp,
983 int *maxrows, int *maxcols, int *maxcells)
986 struct table_entry **tb = pb, **te; /* Table begin, table end. */
987 int tc = pe - pb; /* Table count. */
989 /* Table entry for header comparison. */
990 struct table_entry *cmp = NULL;
992 x = xtab[(*pb)->table];
993 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
995 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
997 /* Crosstabulation table initialization. */
1000 table = tab_create (nvar + n_cols,
1001 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1002 tab_headers (table, nvar - 1, 0, 2, 0);
1004 /* First header line. */
1005 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1006 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1008 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1010 /* Second header line. */
1014 for (i = 2; i < nvar; i++)
1015 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1016 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1017 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1018 var_get_name (x->vars[ROW_VAR]));
1019 for (i = 0; i < n_cols; i++)
1020 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1022 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1025 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1026 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1030 char *title = local_alloc (x->nvar * 64 + 128);
1034 if (cmd.pivot == CRS_PIVOT)
1035 for (i = 0; i < nvar; i++)
1038 cp = stpcpy (cp, " by ");
1039 cp = stpcpy (cp, var_get_name (x->vars[i]));
1043 cp = spprintf (cp, "%s by %s for",
1044 var_get_name (x->vars[0]),
1045 var_get_name (x->vars[1]));
1046 for (i = 2; i < nvar; i++)
1048 char buf[64], *bufp;
1053 cp = stpcpy (cp, var_get_name (x->vars[i]));
1055 format_short (buf, var_get_print_format (x->vars[i]),
1057 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1059 cp = stpcpy (cp, bufp);
1063 cp = stpcpy (cp, " [");
1064 for (i = 0; i < num_cells; i++)
1072 static const struct tuple cell_names[] =
1074 {CRS_CL_COUNT, N_("count")},
1075 {CRS_CL_ROW, N_("row %")},
1076 {CRS_CL_COLUMN, N_("column %")},
1077 {CRS_CL_TOTAL, N_("total %")},
1078 {CRS_CL_EXPECTED, N_("expected")},
1079 {CRS_CL_RESIDUAL, N_("residual")},
1080 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1081 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1085 const struct tuple *t;
1087 for (t = cell_names; t->value != cells[i]; t++)
1088 assert (t->value != -1);
1090 cp = stpcpy (cp, ", ");
1091 cp = stpcpy (cp, gettext (t->name));
1095 tab_title (table, "%s", title);
1099 tab_offset (table, 0, 2);
1104 /* Chi-square table initialization. */
1105 if (cmd.a_statistics[CRS_ST_CHISQ])
1107 chisq = tab_create (6 + (nvar - 2),
1108 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1109 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1111 tab_title (chisq, _("Chi-square tests."));
1113 tab_offset (chisq, nvar - 2, 0);
1114 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1115 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1116 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1117 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1118 _("Asymp. Sig. (2-sided)"));
1119 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1120 _("Exact. Sig. (2-sided)"));
1121 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1122 _("Exact. Sig. (1-sided)"));
1124 tab_offset (chisq, 0, 1);
1129 /* Symmetric measures. */
1130 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1131 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1132 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1133 || cmd.a_statistics[CRS_ST_KAPPA])
1135 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1136 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1137 tab_title (sym, _("Symmetric measures."));
1139 tab_offset (sym, nvar - 2, 0);
1140 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1141 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1142 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1143 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1144 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1145 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1146 tab_offset (sym, 0, 1);
1151 /* Risk estimate. */
1152 if (cmd.a_statistics[CRS_ST_RISK])
1154 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1155 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1156 tab_title (risk, _("Risk estimate."));
1158 tab_offset (risk, nvar - 2, 0);
1159 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1160 _("95%% Confidence Interval"));
1161 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1162 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1163 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1164 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1165 tab_hline (risk, TAL_1, 2, 3, 1);
1166 tab_vline (risk, TAL_1, 2, 0, 1);
1167 tab_offset (risk, 0, 2);
1172 /* Directional measures. */
1173 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1174 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1176 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1177 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1178 tab_title (direct, _("Directional measures."));
1180 tab_offset (direct, nvar - 2, 0);
1181 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1182 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1183 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1184 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1185 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1186 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1187 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1188 tab_offset (direct, 0, 1);
1195 /* Find pivot subtable if applicable. */
1196 te = find_pivot_extent (tb, &tc, 0);
1200 /* Find all the row variable values. */
1201 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1203 /* Allocate memory space for the column and row totals. */
1204 if (n_rows > *maxrows)
1206 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1207 row_tot = *row_totp;
1210 if (n_cols > *maxcols)
1212 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1213 col_tot = *col_totp;
1217 /* Allocate table space for the matrix. */
1218 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1219 tab_realloc (table, -1,
1220 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1221 tab_nr (table) * (pe - pb) / (te - tb)));
1223 if (mode == GENERAL)
1225 /* Allocate memory space for the matrix. */
1226 if (n_cols * n_rows > *maxcells)
1228 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1229 *maxcells = n_cols * n_rows;
1234 /* Build the matrix and calculate column totals. */
1236 union value *cur_col = cols;
1237 union value *cur_row = rows;
1239 double *cp = col_tot;
1240 struct table_entry **p;
1243 for (p = &tb[0]; p < te; p++)
1245 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1249 for (; cur_row < &rows[n_rows]; cur_row++)
1255 mp = &mat[cur_col - cols];
1258 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1265 *cp += *mp = (*p)->u.freq;
1270 /* Zero out the rest of the matrix. */
1271 for (; cur_row < &rows[n_rows]; cur_row++)
1277 if (cur_col < &cols[n_cols])
1279 const int rem_cols = n_cols - (cur_col - cols);
1282 for (c = 0; c < rem_cols; c++)
1284 mp = &mat[cur_col - cols];
1285 for (r = 0; r < n_rows; r++)
1287 for (c = 0; c < rem_cols; c++)
1289 mp += n_cols - rem_cols;
1297 double *tp = col_tot;
1299 assert (mode == INTEGER);
1300 mat = (*tb)->u.data;
1303 /* Calculate column totals. */
1304 for (c = 0; c < n_cols; c++)
1307 double *cp = &mat[c];
1309 for (r = 0; r < n_rows; r++)
1310 cum += cp[r * n_cols];
1318 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1319 ns_cols += *cp != 0.;
1322 /* Calculate row totals. */
1325 double *rp = row_tot;
1328 for (ns_rows = 0, r = n_rows; r--; )
1331 for (c = n_cols; c--; )
1339 /* Calculate grand total. */
1345 if (n_rows < n_cols)
1346 tp = row_tot, n = n_rows;
1348 tp = col_tot, n = n_cols;
1354 /* Find the first variable that differs from the last subtable,
1355 then display the values of the dimensioning variables for
1356 each table that needs it. */
1358 int first_difference = nvar - 1;
1361 for (; ; first_difference--)
1363 assert (first_difference >= 2);
1364 if (memcmp (&cmp->values[first_difference],
1365 &(*tb)->values[first_difference],
1366 sizeof *cmp->values))
1372 display_dimensions (table, first_difference, *tb);
1374 display_dimensions (chisq, first_difference, *tb);
1376 display_dimensions (sym, first_difference, *tb);
1378 display_dimensions (risk, first_difference, *tb);
1380 display_dimensions (direct, first_difference, *tb);
1384 display_crosstabulation ();
1385 if (cmd.miss == CRS_REPORT)
1390 display_symmetric ();
1394 display_directional ();
1405 tab_resize (chisq, 4 + (nvar - 2), -1);
1416 /* Delete missing rows and columns for statistical analysis when
1419 delete_missing (void)
1424 for (r = 0; r < n_rows; r++)
1425 if (var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1429 for (c = 0; c < n_cols; c++)
1430 mat[c + r * n_cols] = 0.;
1438 for (c = 0; c < n_cols; c++)
1439 if (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1443 for (r = 0; r < n_rows; r++)
1444 mat[c + r * n_cols] = 0.;
1450 /* Prepare table T for submission, and submit it. */
1452 submit (struct tab_table *t)
1459 tab_resize (t, -1, 0);
1460 if (tab_nr (t) == tab_t (t))
1465 tab_offset (t, 0, 0);
1467 for (i = 2; i < nvar; i++)
1468 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1469 var_to_string (x->vars[i]));
1470 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1471 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1473 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1475 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1476 tab_dim (t, crosstabs_dim);
1480 /* Sets the widths of all the columns and heights of all the rows in
1481 table T for driver D. */
1483 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1487 /* Width of a numerical column. */
1488 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1489 if (cmd.miss == CRS_REPORT)
1490 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1492 /* Set width for header columns. */
1498 w = d->width - c * (t->nc - t->l);
1499 for (i = 0; i <= t->nc; i++)
1503 if (w < d->prop_em_width * 8)
1504 w = d->prop_em_width * 8;
1506 if (w > d->prop_em_width * 15)
1507 w = d->prop_em_width * 15;
1509 for (i = 0; i < t->l; i++)
1513 for (i = t->l; i < t->nc; i++)
1516 for (i = 0; i < t->nr; i++)
1517 t->h[i] = tab_natural_height (t, d, i);
1520 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1521 int *cnt, int pivot);
1522 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1523 int *cnt, int pivot);
1525 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1527 static struct table_entry **
1528 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1530 return (mode == GENERAL
1531 ? find_pivot_extent_general (tp, cnt, pivot)
1532 : find_pivot_extent_integer (tp, cnt, pivot));
1535 /* Find the extent of a region in TP that contains one table. If
1536 PIVOT != 0 that means a set of table entries with identical table
1537 number; otherwise they also have to have the same values for every
1538 dimension after the row and column dimensions. The table that is
1539 searched starts at TP and has length CNT. Returns the first entry
1540 after the last one in the table; sets *CNT to the number of
1541 remaining values. If there are no entries in TP at all, returns
1542 NULL. A yucky interface, admittedly, but it works. */
1543 static struct table_entry **
1544 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1546 struct table_entry *fp = *tp;
1551 x = xtab[(*tp)->table];
1559 if ((*tp)->table != fp->table)
1564 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1571 /* Integer mode correspondent to find_pivot_extent_general(). This
1572 could be optimized somewhat, but I just don't give a crap about
1573 CROSSTABS performance in integer mode, which is just a
1574 CROSSTABS wart as far as I'm concerned.
1576 That said, feel free to send optimization patches to me. */
1577 static struct table_entry **
1578 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1580 struct table_entry *fp = *tp;
1585 x = xtab[(*tp)->table];
1593 if ((*tp)->table != fp->table)
1598 if (memcmp (&(*tp)->values[2], &fp->values[2],
1599 sizeof (union value) * (x->nvar - 2)))
1606 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1607 result. WIDTH_ points to an int which is either 0 for a
1608 numeric value or a string width for a string value. */
1610 compare_value (const void *a_, const void *b_, const void *width_)
1612 const union value *a = a_;
1613 const union value *b = b_;
1614 const int *pwidth = width_;
1615 const int width = *pwidth;
1618 return (a->f < b->f) ? -1 : (a->f > b->f);
1620 return strncmp (a->s, b->s, width);
1623 /* Given an array of ENTRY_CNT table_entry structures starting at
1624 ENTRIES, creates a sorted list of the values that the variable
1625 with index VAR_IDX takes on. The values are returned as a
1626 malloc()'darray stored in *VALUES, with the number of values
1627 stored in *VALUE_CNT.
1630 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1631 union value **values, int *value_cnt)
1633 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1635 if (mode == GENERAL)
1637 int width = var_get_width (v);
1640 *values = xnmalloc (entry_cnt, sizeof **values);
1641 for (i = 0; i < entry_cnt; i++)
1642 (*values)[i] = entries[i]->values[var_idx];
1643 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1644 compare_value, &width);
1648 struct var_range *vr = get_var_range (v);
1651 assert (mode == INTEGER);
1652 *values = xnmalloc (vr->count, sizeof **values);
1653 for (i = 0; i < vr->count; i++)
1654 (*values)[i].f = i + vr->min;
1655 *value_cnt = vr->count;
1659 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1660 from V, displayed with print format spec from variable VAR. When
1661 in REPORT missing-value mode, missing values have an M appended. */
1663 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1664 const union value *v, const struct variable *var)
1667 const struct fmt_spec *print = var_get_print_format (var);
1669 const char *label = var_lookup_value_label (var, v);
1672 tab_text (table, c, r, TAB_LEFT, label);
1676 s.string = tab_alloc (table, print->w);
1677 format_short (s.string, print, v);
1678 s.length = strlen (s.string);
1679 if (cmd.miss == CRS_REPORT && var_is_num_user_missing (var, v->f))
1680 s.string[s.length++] = 'M';
1681 while (s.length && *s.string == ' ')
1686 tab_raw (table, c, r, opt, &s);
1689 /* Draws a line across TABLE at the current row to indicate the most
1690 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1691 that changed, and puts the values that changed into the table. TB
1692 and X must be the corresponding table_entry and crosstab,
1695 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1697 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1699 for (; first_difference >= 2; first_difference--)
1700 table_value_missing (table, nvar - first_difference - 1, 0,
1701 TAB_RIGHT, &tb->values[first_difference],
1702 x->vars[first_difference]);
1705 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1706 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1707 additionally suffixed with a letter `M'. */
1709 format_cell_entry (struct tab_table *table, int c, int r, double value,
1710 char suffix, bool mark_missing)
1712 const struct fmt_spec f = {FMT_F, 10, 1};
1717 s.string = tab_alloc (table, 16);
1719 data_out (&v, &f, s.string);
1720 while (*s.string == ' ')
1726 s.string[s.length++] = suffix;
1728 s.string[s.length++] = 'M';
1730 tab_raw (table, c, r, TAB_RIGHT, &s);
1733 /* Displays the crosstabulation table. */
1735 display_crosstabulation (void)
1740 for (r = 0; r < n_rows; r++)
1741 table_value_missing (table, nvar - 2, r * num_cells,
1742 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1744 tab_text (table, nvar - 2, n_rows * num_cells,
1745 TAB_LEFT, _("Total"));
1747 /* Put in the actual cells. */
1752 tab_offset (table, nvar - 1, -1);
1753 for (r = 0; r < n_rows; r++)
1756 tab_hline (table, TAL_1, -1, n_cols, 0);
1757 for (c = 0; c < n_cols; c++)
1759 bool mark_missing = false;
1760 double expected_value = row_tot[r] * col_tot[c] / W;
1761 if (cmd.miss == CRS_REPORT
1762 && (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f)
1763 || var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f)))
1764 mark_missing = true;
1765 for (i = 0; i < num_cells; i++)
1776 v = *mp / row_tot[r] * 100.;
1780 v = *mp / col_tot[c] * 100.;
1787 case CRS_CL_EXPECTED:
1790 case CRS_CL_RESIDUAL:
1791 v = *mp - expected_value;
1793 case CRS_CL_SRESIDUAL:
1794 v = (*mp - expected_value) / sqrt (expected_value);
1796 case CRS_CL_ASRESIDUAL:
1797 v = ((*mp - expected_value)
1798 / sqrt (expected_value
1799 * (1. - row_tot[r] / W)
1800 * (1. - col_tot[c] / W)));
1806 format_cell_entry (table, c, i, v, suffix, mark_missing);
1812 tab_offset (table, -1, tab_row (table) + num_cells);
1820 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1821 for (r = 0; r < n_rows; r++)
1824 bool mark_missing = false;
1826 if (cmd.miss == CRS_REPORT
1827 && var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1828 mark_missing = true;
1830 for (i = 0; i < num_cells; i++)
1844 v = row_tot[r] / W * 100.;
1848 v = row_tot[r] / W * 100.;
1851 case CRS_CL_EXPECTED:
1852 case CRS_CL_RESIDUAL:
1853 case CRS_CL_SRESIDUAL:
1854 case CRS_CL_ASRESIDUAL:
1861 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1862 tab_next_row (table);
1867 /* Column totals, grand total. */
1873 tab_hline (table, TAL_1, -1, n_cols, 0);
1874 for (c = 0; c <= n_cols; c++)
1876 double ct = c < n_cols ? col_tot[c] : W;
1877 bool mark_missing = false;
1881 if (cmd.miss == CRS_REPORT && c < n_cols
1882 && var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1883 mark_missing = true;
1885 for (i = 0; i < num_cells; i++)
1907 case CRS_CL_EXPECTED:
1908 case CRS_CL_RESIDUAL:
1909 case CRS_CL_SRESIDUAL:
1910 case CRS_CL_ASRESIDUAL:
1916 format_cell_entry (table, c, i, v, suffix, mark_missing);
1921 tab_offset (table, -1, tab_row (table) + last_row);
1924 tab_offset (table, 0, -1);
1927 static void calc_r (double *X, double *Y, double *, double *, double *);
1928 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1930 /* Display chi-square statistics. */
1932 display_chisq (void)
1934 static const char *chisq_stats[N_CHISQ] =
1936 N_("Pearson Chi-Square"),
1937 N_("Likelihood Ratio"),
1938 N_("Fisher's Exact Test"),
1939 N_("Continuity Correction"),
1940 N_("Linear-by-Linear Association"),
1942 double chisq_v[N_CHISQ];
1943 double fisher1, fisher2;
1949 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1951 tab_offset (chisq, nvar - 2, -1);
1953 for (i = 0; i < N_CHISQ; i++)
1955 if ((i != 2 && chisq_v[i] == SYSMIS)
1956 || (i == 2 && fisher1 == SYSMIS))
1960 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1963 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1964 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1965 tab_float (chisq, 3, 0, TAB_RIGHT,
1966 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1971 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1972 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1974 tab_next_row (chisq);
1977 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1978 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1979 tab_next_row (chisq);
1981 tab_offset (chisq, 0, -1);
1984 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1985 double[N_SYMMETRIC]);
1987 /* Display symmetric measures. */
1989 display_symmetric (void)
1991 static const char *categories[] =
1993 N_("Nominal by Nominal"),
1994 N_("Ordinal by Ordinal"),
1995 N_("Interval by Interval"),
1996 N_("Measure of Agreement"),
1999 static const char *stats[N_SYMMETRIC] =
2003 N_("Contingency Coefficient"),
2004 N_("Kendall's tau-b"),
2005 N_("Kendall's tau-c"),
2007 N_("Spearman Correlation"),
2012 static const int stats_categories[N_SYMMETRIC] =
2014 0, 0, 0, 1, 1, 1, 1, 2, 3,
2018 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2021 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2024 tab_offset (sym, nvar - 2, -1);
2026 for (i = 0; i < N_SYMMETRIC; i++)
2028 if (sym_v[i] == SYSMIS)
2031 if (stats_categories[i] != last_cat)
2033 last_cat = stats_categories[i];
2034 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2037 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2038 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2039 if (sym_ase[i] != SYSMIS)
2040 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2041 if (sym_t[i] != SYSMIS)
2042 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2043 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2047 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2048 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2051 tab_offset (sym, 0, -1);
2054 static int calc_risk (double[], double[], double[], union value *);
2056 /* Display risk estimate. */
2061 double risk_v[3], lower[3], upper[3];
2065 if (!calc_risk (risk_v, upper, lower, c))
2068 tab_offset (risk, nvar - 2, -1);
2070 for (i = 0; i < 3; i++)
2072 if (risk_v[i] == SYSMIS)
2078 if (var_is_numeric (x->vars[COL_VAR]))
2079 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2080 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2082 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2083 var_get_name (x->vars[COL_VAR]),
2084 var_get_width (x->vars[COL_VAR]), c[0].s,
2085 var_get_width (x->vars[COL_VAR]), c[1].s);
2089 if (var_is_numeric (x->vars[ROW_VAR]))
2090 sprintf (buf, _("For cohort %s = %g"),
2091 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2093 sprintf (buf, _("For cohort %s = %.*s"),
2094 var_get_name (x->vars[ROW_VAR]),
2095 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2099 tab_text (risk, 0, 0, TAB_LEFT, buf);
2100 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2101 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2102 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2103 tab_next_row (risk);
2106 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2107 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2108 tab_next_row (risk);
2110 tab_offset (risk, 0, -1);
2113 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2114 double[N_DIRECTIONAL]);
2116 /* Display directional measures. */
2118 display_directional (void)
2120 static const char *categories[] =
2122 N_("Nominal by Nominal"),
2123 N_("Ordinal by Ordinal"),
2124 N_("Nominal by Interval"),
2127 static const char *stats[] =
2130 N_("Goodman and Kruskal tau"),
2131 N_("Uncertainty Coefficient"),
2136 static const char *types[] =
2143 static const int stats_categories[N_DIRECTIONAL] =
2145 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2148 static const int stats_stats[N_DIRECTIONAL] =
2150 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2153 static const int stats_types[N_DIRECTIONAL] =
2155 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2158 static const int *stats_lookup[] =
2165 static const char **stats_names[] =
2177 double direct_v[N_DIRECTIONAL];
2178 double direct_ase[N_DIRECTIONAL];
2179 double direct_t[N_DIRECTIONAL];
2183 if (!calc_directional (direct_v, direct_ase, direct_t))
2186 tab_offset (direct, nvar - 2, -1);
2188 for (i = 0; i < N_DIRECTIONAL; i++)
2190 if (direct_v[i] == SYSMIS)
2196 for (j = 0; j < 3; j++)
2197 if (last[j] != stats_lookup[j][i])
2200 tab_hline (direct, TAL_1, j, 6, 0);
2205 int k = last[j] = stats_lookup[j][i];
2210 string = var_get_name (x->vars[0]);
2212 string = var_get_name (x->vars[1]);
2214 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2215 gettext (stats_names[j][k]), string);
2220 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2221 if (direct_ase[i] != SYSMIS)
2222 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2223 if (direct_t[i] != SYSMIS)
2224 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2225 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2226 tab_next_row (direct);
2229 tab_offset (direct, 0, -1);
2232 /* Statistical calculations. */
2234 /* Returns the value of the gamma (factorial) function for an integer
2237 gamma_int (double x)
2242 for (i = 2; i < x; i++)
2247 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2249 static inline double
2250 Pr (int a, int b, int c, int d)
2252 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2253 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2254 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2255 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2256 / gamma_int (a + b + c + d + 1.));
2259 /* Swap the contents of A and B. */
2261 swap (int *a, int *b)
2268 /* Calculate significance for Fisher's exact test as specified in
2269 _SPSS Statistical Algorithms_, Appendix 5. */
2271 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2275 if (MIN (c, d) < MIN (a, b))
2276 swap (&a, &c), swap (&b, &d);
2277 if (MIN (b, d) < MIN (a, c))
2278 swap (&a, &b), swap (&c, &d);
2282 swap (&a, &b), swap (&c, &d);
2284 swap (&a, &c), swap (&b, &d);
2288 for (x = 0; x <= a; x++)
2289 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2291 *fisher2 = *fisher1;
2292 for (x = 1; x <= b; x++)
2293 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2296 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2297 columns with values COLS and N_ROWS rows with values ROWS. Values
2298 in the matrix sum to W. */
2300 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2301 double *fisher1, double *fisher2)
2305 chisq[0] = chisq[1] = 0.;
2306 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2307 *fisher1 = *fisher2 = SYSMIS;
2309 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2311 if (ns_rows <= 1 || ns_cols <= 1)
2313 chisq[0] = chisq[1] = SYSMIS;
2317 for (r = 0; r < n_rows; r++)
2318 for (c = 0; c < n_cols; c++)
2320 const double expected = row_tot[r] * col_tot[c] / W;
2321 const double freq = mat[n_cols * r + c];
2322 const double residual = freq - expected;
2324 chisq[0] += residual * residual / expected;
2326 chisq[1] += freq * log (expected / freq);
2337 /* Calculate Yates and Fisher exact test. */
2338 if (ns_cols == 2 && ns_rows == 2)
2340 double f11, f12, f21, f22;
2346 for (i = j = 0; i < n_cols; i++)
2347 if (col_tot[i] != 0.)
2356 f11 = mat[nz_cols[0]];
2357 f12 = mat[nz_cols[1]];
2358 f21 = mat[nz_cols[0] + n_cols];
2359 f22 = mat[nz_cols[1] + n_cols];
2364 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2367 chisq[3] = (W * x * x
2368 / (f11 + f12) / (f21 + f22)
2369 / (f11 + f21) / (f12 + f22));
2377 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2378 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2381 /* Calculate Mantel-Haenszel. */
2382 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2384 double r, ase_0, ase_1;
2385 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2387 chisq[4] = (W - 1.) * r * r;
2392 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2393 ASE_1, and ase_0 into ASE_0. The row and column values must be
2394 passed in X and Y. */
2396 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2398 double SX, SY, S, T;
2400 double sum_XYf, sum_X2Y2f;
2401 double sum_Xr, sum_X2r;
2402 double sum_Yc, sum_Y2c;
2405 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2406 for (j = 0; j < n_cols; j++)
2408 double fij = mat[j + i * n_cols];
2409 double product = X[i] * Y[j];
2410 double temp = fij * product;
2412 sum_X2Y2f += temp * product;
2415 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2417 sum_Xr += X[i] * row_tot[i];
2418 sum_X2r += X[i] * X[i] * row_tot[i];
2422 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2424 sum_Yc += Y[i] * col_tot[i];
2425 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2429 S = sum_XYf - sum_Xr * sum_Yc / W;
2430 SX = sum_X2r - sum_Xr * sum_Xr / W;
2431 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2434 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2439 for (s = c = 0., i = 0; i < n_rows; i++)
2440 for (j = 0; j < n_cols; j++)
2442 double Xresid, Yresid;
2445 Xresid = X[i] - Xbar;
2446 Yresid = Y[j] - Ybar;
2447 temp = (T * Xresid * Yresid
2449 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2450 y = mat[j + i * n_cols] * temp * temp - c;
2455 *ase_1 = sqrt (s) / (T * T);
2459 static double somers_d_v[3];
2460 static double somers_d_ase[3];
2461 static double somers_d_t[3];
2463 /* Calculate symmetric statistics and their asymptotic standard
2464 errors. Returns 0 if none could be calculated. */
2466 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2467 double t[N_SYMMETRIC])
2469 int q = MIN (ns_rows, ns_cols);
2478 for (i = 0; i < N_SYMMETRIC; i++)
2479 v[i] = ase[i] = t[i] = SYSMIS;
2482 /* Phi, Cramer's V, contingency coefficient. */
2483 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2485 double Xp = 0.; /* Pearson chi-square. */
2490 for (r = 0; r < n_rows; r++)
2491 for (c = 0; c < n_cols; c++)
2493 const double expected = row_tot[r] * col_tot[c] / W;
2494 const double freq = mat[n_cols * r + c];
2495 const double residual = freq - expected;
2497 Xp += residual * residual / expected;
2501 if (cmd.a_statistics[CRS_ST_PHI])
2503 v[0] = sqrt (Xp / W);
2504 v[1] = sqrt (Xp / (W * (q - 1)));
2506 if (cmd.a_statistics[CRS_ST_CC])
2507 v[2] = sqrt (Xp / (Xp + W));
2510 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2511 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2516 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2523 for (r = 0; r < n_rows; r++)
2524 Dr -= row_tot[r] * row_tot[r];
2525 for (c = 0; c < n_cols; c++)
2526 Dc -= col_tot[c] * col_tot[c];
2532 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2533 for (c = 0; c < n_cols; c++)
2537 for (r = 0; r < n_rows; r++)
2538 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2548 for (i = 0; i < n_rows; i++)
2552 for (j = 1; j < n_cols; j++)
2553 Cij += col_tot[j] - cum[j + i * n_cols];
2556 for (j = 1; j < n_cols; j++)
2557 Dij += cum[j + (i - 1) * n_cols];
2561 double fij = mat[j + i * n_cols];
2567 assert (j < n_cols);
2569 Cij -= col_tot[j] - cum[j + i * n_cols];
2570 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2574 Cij += cum[j - 1 + (i - 1) * n_cols];
2575 Dij -= cum[j + (i - 1) * n_cols];
2581 if (cmd.a_statistics[CRS_ST_BTAU])
2582 v[3] = (P - Q) / sqrt (Dr * Dc);
2583 if (cmd.a_statistics[CRS_ST_CTAU])
2584 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2585 if (cmd.a_statistics[CRS_ST_GAMMA])
2586 v[5] = (P - Q) / (P + Q);
2588 /* ASE for tau-b, tau-c, gamma. Calculations could be
2589 eliminated here, at expense of memory. */
2594 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2595 for (i = 0; i < n_rows; i++)
2599 for (j = 1; j < n_cols; j++)
2600 Cij += col_tot[j] - cum[j + i * n_cols];
2603 for (j = 1; j < n_cols; j++)
2604 Dij += cum[j + (i - 1) * n_cols];
2608 double fij = mat[j + i * n_cols];
2610 if (cmd.a_statistics[CRS_ST_BTAU])
2612 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2613 + v[3] * (row_tot[i] * Dc
2614 + col_tot[j] * Dr));
2615 btau_cum += fij * temp * temp;
2619 const double temp = Cij - Dij;
2620 ctau_cum += fij * temp * temp;
2623 if (cmd.a_statistics[CRS_ST_GAMMA])
2625 const double temp = Q * Cij - P * Dij;
2626 gamma_cum += fij * temp * temp;
2629 if (cmd.a_statistics[CRS_ST_D])
2631 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2632 - (P - Q) * (W - row_tot[i]));
2633 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2634 - (Q - P) * (W - col_tot[j]));
2639 assert (j < n_cols);
2641 Cij -= col_tot[j] - cum[j + i * n_cols];
2642 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2646 Cij += cum[j - 1 + (i - 1) * n_cols];
2647 Dij -= cum[j + (i - 1) * n_cols];
2653 btau_var = ((btau_cum
2654 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2656 if (cmd.a_statistics[CRS_ST_BTAU])
2658 ase[3] = sqrt (btau_var);
2659 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2662 if (cmd.a_statistics[CRS_ST_CTAU])
2664 ase[4] = ((2 * q / ((q - 1) * W * W))
2665 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2666 t[4] = v[4] / ase[4];
2668 if (cmd.a_statistics[CRS_ST_GAMMA])
2670 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2671 t[5] = v[5] / (2. / (P + Q)
2672 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2674 if (cmd.a_statistics[CRS_ST_D])
2676 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2677 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2678 somers_d_t[0] = (somers_d_v[0]
2680 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2681 somers_d_v[1] = (P - Q) / Dc;
2682 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2683 somers_d_t[1] = (somers_d_v[1]
2685 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2686 somers_d_v[2] = (P - Q) / Dr;
2687 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2688 somers_d_t[2] = (somers_d_v[2]
2690 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2696 /* Spearman correlation, Pearson's r. */
2697 if (cmd.a_statistics[CRS_ST_CORR])
2699 double *R = local_alloc (sizeof *R * n_rows);
2700 double *C = local_alloc (sizeof *C * n_cols);
2703 double y, t, c = 0., s = 0.;
2708 R[i] = s + (row_tot[i] + 1.) / 2.;
2715 assert (i < n_rows);
2720 double y, t, c = 0., s = 0.;
2725 C[j] = s + (col_tot[j] + 1.) / 2;
2732 assert (j < n_cols);
2736 calc_r (R, C, &v[6], &t[6], &ase[6]);
2742 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2746 /* Cohen's kappa. */
2747 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2749 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2752 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2753 i < ns_rows; i++, j++)
2757 while (col_tot[j] == 0.)
2760 prod = row_tot[i] * col_tot[j];
2761 sum = row_tot[i] + col_tot[j];
2763 sum_fii += mat[j + i * n_cols];
2765 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2766 sum_riciri_ci += prod * sum;
2768 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2769 for (j = 0; j < ns_cols; j++)
2771 double sum = row_tot[i] + col_tot[j];
2772 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2775 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2777 ase[8] = sqrt ((W * W * sum_rici
2778 + sum_rici * sum_rici
2779 - W * sum_riciri_ci)
2780 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2782 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2783 / pow2 (W * W - sum_rici))
2784 + ((2. * (W - sum_fii)
2785 * (2. * sum_fii * sum_rici
2786 - W * sum_fiiri_ci))
2787 / cube (W * W - sum_rici))
2788 + (pow2 (W - sum_fii)
2789 * (W * sum_fijri_ci2 - 4.
2790 * sum_rici * sum_rici)
2791 / pow4 (W * W - sum_rici))));
2793 t[8] = v[8] / ase[8];
2800 /* Calculate risk estimate. */
2802 calc_risk (double *value, double *upper, double *lower, union value *c)
2804 double f11, f12, f21, f22;
2810 for (i = 0; i < 3; i++)
2811 value[i] = upper[i] = lower[i] = SYSMIS;
2814 if (ns_rows != 2 || ns_cols != 2)
2821 for (i = j = 0; i < n_cols; i++)
2822 if (col_tot[i] != 0.)
2831 f11 = mat[nz_cols[0]];
2832 f12 = mat[nz_cols[1]];
2833 f21 = mat[nz_cols[0] + n_cols];
2834 f22 = mat[nz_cols[1] + n_cols];
2836 c[0] = cols[nz_cols[0]];
2837 c[1] = cols[nz_cols[1]];
2840 value[0] = (f11 * f22) / (f12 * f21);
2841 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2842 lower[0] = value[0] * exp (-1.960 * v);
2843 upper[0] = value[0] * exp (1.960 * v);
2845 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2846 v = sqrt ((f12 / (f11 * (f11 + f12)))
2847 + (f22 / (f21 * (f21 + f22))));
2848 lower[1] = value[1] * exp (-1.960 * v);
2849 upper[1] = value[1] * exp (1.960 * v);
2851 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2852 v = sqrt ((f11 / (f12 * (f11 + f12)))
2853 + (f21 / (f22 * (f21 + f22))));
2854 lower[2] = value[2] * exp (-1.960 * v);
2855 upper[2] = value[2] * exp (1.960 * v);
2860 /* Calculate directional measures. */
2862 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2863 double t[N_DIRECTIONAL])
2868 for (i = 0; i < N_DIRECTIONAL; i++)
2869 v[i] = ase[i] = t[i] = SYSMIS;
2873 if (cmd.a_statistics[CRS_ST_LAMBDA])
2875 double *fim = xnmalloc (n_rows, sizeof *fim);
2876 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2877 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2878 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2879 double sum_fim, sum_fmj;
2881 int rm_index, cm_index;
2884 /* Find maximum for each row and their sum. */
2885 for (sum_fim = 0., i = 0; i < n_rows; i++)
2887 double max = mat[i * n_cols];
2890 for (j = 1; j < n_cols; j++)
2891 if (mat[j + i * n_cols] > max)
2893 max = mat[j + i * n_cols];
2897 sum_fim += fim[i] = max;
2898 fim_index[i] = index;
2901 /* Find maximum for each column. */
2902 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2904 double max = mat[j];
2907 for (i = 1; i < n_rows; i++)
2908 if (mat[j + i * n_cols] > max)
2910 max = mat[j + i * n_cols];
2914 sum_fmj += fmj[j] = max;
2915 fmj_index[j] = index;
2918 /* Find maximum row total. */
2921 for (i = 1; i < n_rows; i++)
2922 if (row_tot[i] > rm)
2928 /* Find maximum column total. */
2931 for (j = 1; j < n_cols; j++)
2932 if (col_tot[j] > cm)
2938 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2939 v[1] = (sum_fmj - rm) / (W - rm);
2940 v[2] = (sum_fim - cm) / (W - cm);
2942 /* ASE1 for Y given X. */
2946 for (accum = 0., i = 0; i < n_rows; i++)
2947 for (j = 0; j < n_cols; j++)
2949 const int deltaj = j == cm_index;
2950 accum += (mat[j + i * n_cols]
2951 * pow2 ((j == fim_index[i])
2956 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2959 /* ASE0 for Y given X. */
2963 for (accum = 0., i = 0; i < n_rows; i++)
2964 if (cm_index != fim_index[i])
2965 accum += (mat[i * n_cols + fim_index[i]]
2966 + mat[i * n_cols + cm_index]);
2967 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2970 /* ASE1 for X given Y. */
2974 for (accum = 0., i = 0; i < n_rows; i++)
2975 for (j = 0; j < n_cols; j++)
2977 const int deltaj = i == rm_index;
2978 accum += (mat[j + i * n_cols]
2979 * pow2 ((i == fmj_index[j])
2984 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2987 /* ASE0 for X given Y. */
2991 for (accum = 0., j = 0; j < n_cols; j++)
2992 if (rm_index != fmj_index[j])
2993 accum += (mat[j + n_cols * fmj_index[j]]
2994 + mat[j + n_cols * rm_index]);
2995 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2998 /* Symmetric ASE0 and ASE1. */
3003 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3004 for (j = 0; j < n_cols; j++)
3006 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3007 int temp1 = (i == rm_index) + (j == cm_index);
3008 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3009 accum1 += (mat[j + i * n_cols]
3010 * pow2 (temp0 + (v[0] - 1.) * temp1));
3012 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3013 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3014 / (2. * W - rm - cm));
3023 double sum_fij2_ri, sum_fij2_ci;
3024 double sum_ri2, sum_cj2;
3026 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3027 for (j = 0; j < n_cols; j++)
3029 double temp = pow2 (mat[j + i * n_cols]);
3030 sum_fij2_ri += temp / row_tot[i];
3031 sum_fij2_ci += temp / col_tot[j];
3034 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3035 sum_ri2 += row_tot[i] * row_tot[i];
3037 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3038 sum_cj2 += col_tot[j] * col_tot[j];
3040 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3041 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3045 if (cmd.a_statistics[CRS_ST_UC])
3047 double UX, UY, UXY, P;
3048 double ase1_yx, ase1_xy, ase1_sym;
3051 for (UX = 0., i = 0; i < n_rows; i++)
3052 if (row_tot[i] > 0.)
3053 UX -= row_tot[i] / W * log (row_tot[i] / W);
3055 for (UY = 0., j = 0; j < n_cols; j++)
3056 if (col_tot[j] > 0.)
3057 UY -= col_tot[j] / W * log (col_tot[j] / W);
3059 for (UXY = P = 0., i = 0; i < n_rows; i++)
3060 for (j = 0; j < n_cols; j++)
3062 double entry = mat[j + i * n_cols];
3067 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3068 UXY -= entry / W * log (entry / W);
3071 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3072 for (j = 0; j < n_cols; j++)
3074 double entry = mat[j + i * n_cols];
3079 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3080 + (UX - UXY) * log (col_tot[j] / W));
3081 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3082 + (UY - UXY) * log (row_tot[i] / W));
3083 ase1_sym += entry * pow2 ((UXY
3084 * log (row_tot[i] * col_tot[j] / (W * W)))
3085 - (UX + UY) * log (entry / W));
3088 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3089 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3090 t[5] = v[5] / ((2. / (W * (UX + UY)))
3091 * sqrt (P - pow2 (UX + UY - UXY) / W));
3093 v[6] = (UX + UY - UXY) / UX;
3094 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3095 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3097 v[7] = (UX + UY - UXY) / UY;
3098 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3099 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3103 if (cmd.a_statistics[CRS_ST_D])
3108 calc_symmetric (NULL, NULL, NULL);
3109 for (i = 0; i < 3; i++)
3111 v[8 + i] = somers_d_v[i];
3112 ase[8 + i] = somers_d_ase[i];
3113 t[8 + i] = somers_d_t[i];
3118 if (cmd.a_statistics[CRS_ST_ETA])
3121 double sum_Xr, sum_X2r;
3125 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3127 sum_Xr += rows[i].f * row_tot[i];
3128 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3130 SX = sum_X2r - sum_Xr * sum_Xr / W;
3132 for (SXW = 0., j = 0; j < n_cols; j++)
3136 for (cum = 0., i = 0; i < n_rows; i++)
3138 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3139 cum += rows[i].f * mat[j + i * n_cols];
3142 SXW -= cum * cum / col_tot[j];
3144 v[11] = sqrt (1. - SXW / SX);
3148 double sum_Yc, sum_Y2c;
3152 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3154 sum_Yc += cols[i].f * col_tot[i];
3155 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3157 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3159 for (SYW = 0., i = 0; i < n_rows; i++)
3163 for (cum = 0., j = 0; j < n_cols; j++)
3165 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3166 cum += cols[j].f * mat[j + i * n_cols];
3169 SYW -= cum * cum / row_tot[i];
3171 v[12] = sqrt (1. - SYW / SY);
3178 /* A wrapper around data_out() that limits string output to short
3179 string width and null terminates the result. */
3181 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3183 struct fmt_spec fmt_subst;
3185 /* Limit to short string width. */
3186 if (fmt_is_string (fp->type))
3190 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3191 if (fmt_subst.type == FMT_A)
3192 fmt_subst.w = MIN (8, fmt_subst.w);
3194 fmt_subst.w = MIN (16, fmt_subst.w);
3200 data_out (v, fp, s);
3202 /* Null terminate. */