1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/dictionary.h>
41 #include <data/procedure.h>
42 #include <data/value-labels.h>
43 #include <data/variable.h>
44 #include <language/command.h>
45 #include <language/dictionary/split-file.h>
46 #include <language/lexer/lexer.h>
47 #include <language/lexer/variable-parser.h>
48 #include <libpspp/alloc.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/magic.h>
54 #include <libpspp/message.h>
55 #include <libpspp/message.h>
56 #include <libpspp/misc.h>
57 #include <libpspp/pool.h>
58 #include <libpspp/str.h>
59 #include <output/output.h>
60 #include <output/table.h>
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) msgid
72 missing=miss:!table/include/report;
73 +write[wr_]=none,cells,all;
74 +format=fmt:!labels/nolabels/novallabs,
77 tabl:!tables/notables,
80 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
82 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
88 /* Number of chi-square statistics. */
91 /* Number of symmetric statistics. */
94 /* Number of directional statistics. */
95 #define N_DIRECTIONAL 13
97 /* A single table entry for general mode. */
100 int table; /* Flattened table number. */
103 double freq; /* Frequency count. */
104 double *data; /* Crosstabulation table for integer mode. */
107 union value values[1]; /* Values. */
110 /* A crosstabulation. */
113 int nvar; /* Number of variables. */
114 double missing; /* Missing cases count. */
115 int ofs; /* Integer mode: Offset into sorted_tab[]. */
116 struct variable *vars[2]; /* At least two variables; sorted by
117 larger indices first. */
120 /* Integer mode variable info. */
123 int min; /* Minimum value. */
124 int max; /* Maximum value + 1. */
125 int count; /* max - min. */
128 static inline struct var_range *
129 get_var_range (struct variable *v)
132 assert (v->aux != NULL);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct dataset *ds);
179 static void precalc (const struct ccase *, void *, const struct dataset *);
180 static bool calc_general (const struct ccase *, void *, const struct dataset *);
181 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
182 static bool postcalc (void *, const struct dataset *);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct dataset *ds)
192 int result = internal_cmd_crosstabs (ds);
195 pool_destroy (pl_tc);
196 pool_destroy (pl_col);
201 /* Parses and executes the CROSSTABS procedure. */
203 internal_cmd_crosstabs (struct dataset *ds)
212 pl_tc = pool_create ();
213 pl_col = pool_create ();
215 if (!parse_crosstabs (ds, &cmd, NULL))
218 mode = variables ? INTEGER : GENERAL;
223 cmd.a_cells[CRS_CL_COUNT] = 1;
229 for (i = 0; i < CRS_CL_count; i++)
234 cmd.a_cells[CRS_CL_COUNT] = 1;
235 cmd.a_cells[CRS_CL_ROW] = 1;
236 cmd.a_cells[CRS_CL_COLUMN] = 1;
237 cmd.a_cells[CRS_CL_TOTAL] = 1;
239 if (cmd.a_cells[CRS_CL_ALL])
241 for (i = 0; i < CRS_CL_count; i++)
243 cmd.a_cells[CRS_CL_ALL] = 0;
245 cmd.a_cells[CRS_CL_NONE] = 0;
247 for (num_cells = i = 0; i < CRS_CL_count; i++)
249 cells[num_cells++] = i;
252 if (cmd.sbc_statistics)
257 for (i = 0; i < CRS_ST_count; i++)
258 if (cmd.a_statistics[i])
261 cmd.a_statistics[CRS_ST_CHISQ] = 1;
262 if (cmd.a_statistics[CRS_ST_ALL])
263 for (i = 0; i < CRS_ST_count; i++)
264 cmd.a_statistics[i] = 1;
268 if (cmd.miss == CRS_REPORT && mode == GENERAL)
270 msg (SE, _("Missing mode REPORT not allowed in general mode. "
271 "Assuming MISSING=TABLE."));
272 cmd.miss = CRS_TABLE;
276 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
277 cmd.a_write[CRS_WR_ALL] = 0;
278 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
280 msg (SE, _("Write mode ALL not allowed in general mode. "
281 "Assuming WRITE=CELLS."));
282 cmd.a_write[CRS_WR_CELLS] = 1;
285 && (cmd.a_write[CRS_WR_NONE]
286 + cmd.a_write[CRS_WR_ALL]
287 + cmd.a_write[CRS_WR_CELLS] == 0))
288 cmd.a_write[CRS_WR_CELLS] = 1;
289 if (cmd.a_write[CRS_WR_CELLS])
290 write = CRS_WR_CELLS;
291 else if (cmd.a_write[CRS_WR_ALL])
296 ok = procedure_with_splits (ds, precalc,
297 mode == GENERAL ? calc_general : calc_integer,
300 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
303 /* Parses the TABLES subcommand. */
305 crs_custom_tables (struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
307 struct var_set *var_set;
309 struct variable ***by = NULL;
310 size_t *by_nvar = NULL;
314 /* Ensure that this is a TABLES subcommand. */
315 if (!lex_match_id ("TABLES")
316 && (token != T_ID || dict_lookup_var (dataset_dict (ds), tokid) == NULL)
321 if (variables != NULL)
322 var_set = var_set_create_from_array (variables, variables_cnt);
324 var_set = var_set_create_from_dict (dataset_dict (ds));
325 assert (var_set != NULL);
329 by = xnrealloc (by, n_by + 1, sizeof *by);
330 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
331 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
332 PV_NO_DUPLICATE | PV_NO_SCRATCH))
334 if (xalloc_oversized (nx, by_nvar[n_by]))
336 msg (SE, _("Too many crosstabulation variables or dimensions."));
342 if (!lex_match (T_BY))
346 lex_error (_("expecting BY"));
355 int *by_iter = xcalloc (n_by, sizeof *by_iter);
358 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
359 for (i = 0; i < nx; i++)
363 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
370 for (i = 0; i < n_by; i++)
371 x->vars[i] = by[i][by_iter[i]];
377 for (i = n_by - 1; i >= 0; i--)
379 if (++by_iter[i] < by_nvar[i])
392 /* All return paths lead here. */
396 for (i = 0; i < n_by; i++)
402 var_set_destroy (var_set);
407 /* Parses the VARIABLES subcommand. */
409 crs_custom_variables (struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
413 msg (SE, _("VARIABLES must be specified before TABLES."));
421 size_t orig_nv = variables_cnt;
426 if (!parse_variables (dataset_dict (ds), &variables, &variables_cnt,
427 (PV_APPEND | PV_NUMERIC
428 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 lex_error ("expecting `('");
438 if (!lex_force_int ())
440 min = lex_integer ();
445 if (!lex_force_int ())
447 max = lex_integer ();
450 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 lex_error ("expecting `)'");
463 for (i = orig_nv; i < variables_cnt; i++)
465 struct var_range *vr = xmalloc (sizeof *vr);
468 vr->count = max - min + 1;
469 var_attach_aux (variables[i], vr, var_dtor_free);
484 /* Data file processing. */
486 static int compare_table_entry (const void *, const void *, const void *);
487 static unsigned hash_table_entry (const void *, const void *);
489 /* Set up the crosstabulation tables for processing. */
491 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
493 output_split_file_values (ds, first);
496 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
506 for (i = 0; i < nxtab; i++)
508 struct crosstab *x = xtab[i];
513 x->ofs = n_sorted_tab;
515 for (j = 2; j < x->nvar; j++)
516 count *= get_var_range (x->vars[j - 2])->count;
518 sorted_tab = xnrealloc (sorted_tab,
519 n_sorted_tab + count, sizeof *sorted_tab);
520 v = local_alloc (sizeof *v * x->nvar);
521 for (j = 2; j < x->nvar; j++)
522 v[j] = get_var_range (x->vars[j])->min;
523 for (j = 0; j < count; j++)
525 struct table_entry *te;
528 te = sorted_tab[n_sorted_tab++]
529 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
533 int row_cnt = get_var_range (x->vars[0])->count;
534 int col_cnt = get_var_range (x->vars[1])->count;
535 const int mat_size = row_cnt * col_cnt;
538 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
539 for (m = 0; m < mat_size; m++)
543 for (k = 2; k < x->nvar; k++)
544 te->values[k].f = v[k];
545 for (k = 2; k < x->nvar; k++)
547 struct var_range *vr = get_var_range (x->vars[k]);
548 if (++v[k] >= vr->max)
557 sorted_tab = xnrealloc (sorted_tab,
558 n_sorted_tab + 1, sizeof *sorted_tab);
559 sorted_tab[n_sorted_tab] = NULL;
564 /* Form crosstabulations for general mode. */
566 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
568 bool bad_warn = true;
571 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
573 /* Flattened current table index. */
576 for (t = 0; t < nxtab; t++)
578 struct crosstab *x = xtab[t];
579 const size_t entry_size = (sizeof (struct table_entry)
580 + sizeof (union value) * (x->nvar - 1));
581 struct table_entry *te = local_alloc (entry_size);
583 /* Construct table entry for the current record and table. */
589 for (j = 0; j < x->nvar; j++)
591 const union value *v = case_data (c, x->vars[j]->fv);
592 const struct missing_values *mv = &x->vars[j]->miss;
593 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
594 || (cmd.miss == CRS_INCLUDE
595 && mv_is_value_system_missing (mv, v)))
597 x->missing += weight;
601 if (x->vars[j]->type == NUMERIC)
602 te->values[j].f = case_num (c, x->vars[j]->fv);
605 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
608 /* Necessary in order to simplify comparisons. */
609 memset (&te->values[j].s[x->vars[j]->width], 0,
610 sizeof (union value) - x->vars[j]->width);
615 /* Add record to hash table. */
617 struct table_entry **tepp
618 = (struct table_entry **) hsh_probe (gen_tab, te);
621 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
624 memcpy (tep, te, entry_size);
629 (*tepp)->u.freq += weight;
640 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
642 bool bad_warn = true;
645 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
647 /* Flattened current table index. */
650 for (t = 0; t < nxtab; t++)
652 struct crosstab *x = xtab[t];
657 for (i = 0; i < x->nvar; i++)
659 struct variable *const v = x->vars[i];
660 struct var_range *vr = get_var_range (v);
661 double value = case_num (c, v->fv);
663 /* Note that the first test also rules out SYSMIS. */
664 if ((value < vr->min || value >= vr->max)
665 || (cmd.miss == CRS_TABLE
666 && mv_is_num_user_missing (&v->miss, value)))
668 x->missing += weight;
674 ofs += fact * ((int) value - vr->min);
680 struct variable *row_var = x->vars[ROW_VAR];
681 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
683 struct variable *col_var = x->vars[COL_VAR];
684 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
686 const int col_dim = get_var_range (col_var)->count;
688 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
697 /* Compare the table_entry's at A and B and return a strcmp()-type
700 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
702 const struct table_entry *a = a_;
703 const struct table_entry *b = b_;
705 if (a->table > b->table)
707 else if (a->table < b->table)
711 const struct crosstab *x = xtab[a->table];
714 for (i = x->nvar - 1; i >= 0; i--)
715 if (x->vars[i]->type == NUMERIC)
717 const double diffnum = a->values[i].f - b->values[i].f;
720 else if (diffnum > 0)
725 assert (x->vars[i]->type == ALPHA);
727 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
738 /* Calculate a hash value from table_entry A. */
740 hash_table_entry (const void *a_, const void *aux UNUSED)
742 const struct table_entry *a = a_;
747 for (i = 0; i < xtab[a->table]->nvar; i++)
748 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
753 /* Post-data reading calculations. */
755 static struct table_entry **find_pivot_extent (struct table_entry **,
756 int *cnt, int pivot);
757 static void enum_var_values (struct table_entry **entries, int entry_cnt,
759 union value **values, int *value_cnt);
760 static void output_pivot_table (struct table_entry **, struct table_entry **,
761 double **, double **, double **,
762 int *, int *, int *);
763 static void make_summary_table (void);
766 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
770 n_sorted_tab = hsh_count (gen_tab);
771 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
774 make_summary_table ();
776 /* Identify all the individual crosstabulation tables, and deal with
779 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
780 int pc = n_sorted_tab; /* Pivot count. */
782 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
783 int maxrows = 0, maxcols = 0, maxcells = 0;
787 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
791 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
792 &maxrows, &maxcols, &maxcells);
801 hsh_destroy (gen_tab);
806 static void insert_summary (struct tab_table *, int tab_index, double valid);
808 /* Output a table summarizing the cases processed. */
810 make_summary_table (void)
812 struct tab_table *summary;
814 struct table_entry **pb = sorted_tab, **pe;
815 int pc = n_sorted_tab;
818 summary = tab_create (7, 3 + nxtab, 1);
819 tab_title (summary, _("Summary."));
820 tab_headers (summary, 1, 0, 3, 0);
821 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
822 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
823 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
824 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
825 tab_hline (summary, TAL_1, 1, 6, 1);
826 tab_hline (summary, TAL_1, 1, 6, 2);
827 tab_vline (summary, TAL_1, 3, 1, 1);
828 tab_vline (summary, TAL_1, 5, 1, 1);
832 for (i = 0; i < 3; i++)
834 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
835 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
838 tab_offset (summary, 0, 3);
844 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
848 while (cur_tab < (*pb)->table)
849 insert_summary (summary, cur_tab++, 0.);
852 for (valid = 0.; pb < pe; pb++)
853 valid += (*pb)->u.freq;
856 const struct crosstab *const x = xtab[(*pb)->table];
857 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
858 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
859 const int count = n_cols * n_rows;
861 for (valid = 0.; pb < pe; pb++)
863 const double *data = (*pb)->u.data;
866 for (i = 0; i < count; i++)
870 insert_summary (summary, cur_tab++, valid);
875 while (cur_tab < nxtab)
876 insert_summary (summary, cur_tab++, 0.);
881 /* Inserts a line into T describing the crosstabulation at index
882 TAB_INDEX, which has VALID valid observations. */
884 insert_summary (struct tab_table *t, int tab_index, double valid)
886 struct crosstab *x = xtab[tab_index];
888 tab_hline (t, TAL_1, 0, 6, 0);
890 /* Crosstabulation name. */
892 char *buf = local_alloc (128 * x->nvar);
896 for (i = 0; i < x->nvar; i++)
899 cp = stpcpy (cp, " * ");
902 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
904 tab_text (t, 0, 0, TAB_LEFT, buf);
909 /* Counts and percentages. */
919 for (i = 0; i < 3; i++)
921 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
922 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
933 static struct tab_table *table; /* Crosstabulation table. */
934 static struct tab_table *chisq; /* Chi-square table. */
935 static struct tab_table *sym; /* Symmetric measures table. */
936 static struct tab_table *risk; /* Risk estimate table. */
937 static struct tab_table *direct; /* Directional measures table. */
940 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
942 /* Column values, number of columns. */
943 static union value *cols;
946 /* Row values, number of rows. */
947 static union value *rows;
950 /* Number of statistically interesting columns/rows (columns/rows with
952 static int ns_cols, ns_rows;
954 /* Crosstabulation. */
955 static const struct crosstab *x;
957 /* Number of variables from the crosstabulation to consider. This is
958 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
961 /* Matrix contents. */
962 static double *mat; /* Matrix proper. */
963 static double *row_tot; /* Row totals. */
964 static double *col_tot; /* Column totals. */
965 static double W; /* Grand total. */
967 static void display_dimensions (struct tab_table *, int first_difference,
968 struct table_entry *);
969 static void display_crosstabulation (void);
970 static void display_chisq (void);
971 static void display_symmetric (void);
972 static void display_risk (void);
973 static void display_directional (void);
974 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
975 static void table_value_missing (struct tab_table *table, int c, int r,
976 unsigned char opt, const union value *v,
977 const struct variable *var);
978 static void delete_missing (void);
980 /* Output pivot table beginning at PB and continuing until PE,
981 exclusive. For efficiency, *MATP is a pointer to a matrix that can
982 hold *MAXROWS entries. */
984 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
985 double **matp, double **row_totp, double **col_totp,
986 int *maxrows, int *maxcols, int *maxcells)
989 struct table_entry **tb = pb, **te; /* Table begin, table end. */
990 int tc = pe - pb; /* Table count. */
992 /* Table entry for header comparison. */
993 struct table_entry *cmp = NULL;
995 x = xtab[(*pb)->table];
996 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
998 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1000 /* Crosstabulation table initialization. */
1003 table = tab_create (nvar + n_cols,
1004 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1005 tab_headers (table, nvar - 1, 0, 2, 0);
1007 /* First header line. */
1008 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1009 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1011 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1013 /* Second header line. */
1017 for (i = 2; i < nvar; i++)
1018 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1019 TAB_RIGHT | TAT_TITLE,
1021 ? x->vars[i]->label : x->vars[i]->name));
1022 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1023 x->vars[ROW_VAR]->name);
1024 for (i = 0; i < n_cols; i++)
1025 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1027 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1030 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1031 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1035 char *title = local_alloc (x->nvar * 64 + 128);
1039 if (cmd.pivot == CRS_PIVOT)
1040 for (i = 0; i < nvar; i++)
1043 cp = stpcpy (cp, " by ");
1044 cp = stpcpy (cp, x->vars[i]->name);
1048 cp = spprintf (cp, "%s by %s for",
1049 x->vars[0]->name, x->vars[1]->name);
1050 for (i = 2; i < nvar; i++)
1052 char buf[64], *bufp;
1057 cp = stpcpy (cp, x->vars[i]->name);
1059 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1060 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1062 cp = stpcpy (cp, bufp);
1066 cp = stpcpy (cp, " [");
1067 for (i = 0; i < num_cells; i++)
1075 static const struct tuple cell_names[] =
1077 {CRS_CL_COUNT, N_("count")},
1078 {CRS_CL_ROW, N_("row %")},
1079 {CRS_CL_COLUMN, N_("column %")},
1080 {CRS_CL_TOTAL, N_("total %")},
1081 {CRS_CL_EXPECTED, N_("expected")},
1082 {CRS_CL_RESIDUAL, N_("residual")},
1083 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1084 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1088 const struct tuple *t;
1090 for (t = cell_names; t->value != cells[i]; t++)
1091 assert (t->value != -1);
1093 cp = stpcpy (cp, ", ");
1094 cp = stpcpy (cp, gettext (t->name));
1098 tab_title (table, "%s", title);
1102 tab_offset (table, 0, 2);
1107 /* Chi-square table initialization. */
1108 if (cmd.a_statistics[CRS_ST_CHISQ])
1110 chisq = tab_create (6 + (nvar - 2),
1111 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1112 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1114 tab_title (chisq, _("Chi-square tests."));
1116 tab_offset (chisq, nvar - 2, 0);
1117 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1118 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1119 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1120 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Asymp. Sig. (2-sided)"));
1122 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1123 _("Exact. Sig. (2-sided)"));
1124 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1125 _("Exact. Sig. (1-sided)"));
1127 tab_offset (chisq, 0, 1);
1132 /* Symmetric measures. */
1133 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1134 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1135 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1136 || cmd.a_statistics[CRS_ST_KAPPA])
1138 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1139 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1140 tab_title (sym, _("Symmetric measures."));
1142 tab_offset (sym, nvar - 2, 0);
1143 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1144 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1145 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1146 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1147 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1148 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1149 tab_offset (sym, 0, 1);
1154 /* Risk estimate. */
1155 if (cmd.a_statistics[CRS_ST_RISK])
1157 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1158 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1159 tab_title (risk, _("Risk estimate."));
1161 tab_offset (risk, nvar - 2, 0);
1162 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1163 _("95%% Confidence Interval"));
1164 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1165 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1166 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1167 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1168 tab_hline (risk, TAL_1, 2, 3, 1);
1169 tab_vline (risk, TAL_1, 2, 0, 1);
1170 tab_offset (risk, 0, 2);
1175 /* Directional measures. */
1176 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1177 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1179 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1180 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1181 tab_title (direct, _("Directional measures."));
1183 tab_offset (direct, nvar - 2, 0);
1184 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1185 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1186 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1187 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1188 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1189 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1190 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1191 tab_offset (direct, 0, 1);
1198 /* Find pivot subtable if applicable. */
1199 te = find_pivot_extent (tb, &tc, 0);
1203 /* Find all the row variable values. */
1204 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1206 /* Allocate memory space for the column and row totals. */
1207 if (n_rows > *maxrows)
1209 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1210 row_tot = *row_totp;
1213 if (n_cols > *maxcols)
1215 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1216 col_tot = *col_totp;
1220 /* Allocate table space for the matrix. */
1221 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1222 tab_realloc (table, -1,
1223 max (tab_nr (table) + (n_rows + 1) * num_cells,
1224 tab_nr (table) * (pe - pb) / (te - tb)));
1226 if (mode == GENERAL)
1228 /* Allocate memory space for the matrix. */
1229 if (n_cols * n_rows > *maxcells)
1231 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1232 *maxcells = n_cols * n_rows;
1237 /* Build the matrix and calculate column totals. */
1239 union value *cur_col = cols;
1240 union value *cur_row = rows;
1242 double *cp = col_tot;
1243 struct table_entry **p;
1246 for (p = &tb[0]; p < te; p++)
1248 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1252 for (; cur_row < &rows[n_rows]; cur_row++)
1258 mp = &mat[cur_col - cols];
1261 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1268 *cp += *mp = (*p)->u.freq;
1273 /* Zero out the rest of the matrix. */
1274 for (; cur_row < &rows[n_rows]; cur_row++)
1280 if (cur_col < &cols[n_cols])
1282 const int rem_cols = n_cols - (cur_col - cols);
1285 for (c = 0; c < rem_cols; c++)
1287 mp = &mat[cur_col - cols];
1288 for (r = 0; r < n_rows; r++)
1290 for (c = 0; c < rem_cols; c++)
1292 mp += n_cols - rem_cols;
1300 double *tp = col_tot;
1302 assert (mode == INTEGER);
1303 mat = (*tb)->u.data;
1306 /* Calculate column totals. */
1307 for (c = 0; c < n_cols; c++)
1310 double *cp = &mat[c];
1312 for (r = 0; r < n_rows; r++)
1313 cum += cp[r * n_cols];
1321 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1322 ns_cols += *cp != 0.;
1325 /* Calculate row totals. */
1328 double *rp = row_tot;
1331 for (ns_rows = 0, r = n_rows; r--; )
1334 for (c = n_cols; c--; )
1342 /* Calculate grand total. */
1348 if (n_rows < n_cols)
1349 tp = row_tot, n = n_rows;
1351 tp = col_tot, n = n_cols;
1357 /* Find the first variable that differs from the last subtable,
1358 then display the values of the dimensioning variables for
1359 each table that needs it. */
1361 int first_difference = nvar - 1;
1364 for (; ; first_difference--)
1366 assert (first_difference >= 2);
1367 if (memcmp (&cmp->values[first_difference],
1368 &(*tb)->values[first_difference],
1369 sizeof *cmp->values))
1375 display_dimensions (table, first_difference, *tb);
1377 display_dimensions (chisq, first_difference, *tb);
1379 display_dimensions (sym, first_difference, *tb);
1381 display_dimensions (risk, first_difference, *tb);
1383 display_dimensions (direct, first_difference, *tb);
1387 display_crosstabulation ();
1388 if (cmd.miss == CRS_REPORT)
1393 display_symmetric ();
1397 display_directional ();
1408 tab_resize (chisq, 4 + (nvar - 2), -1);
1419 /* Delete missing rows and columns for statistical analysis when
1422 delete_missing (void)
1427 for (r = 0; r < n_rows; r++)
1428 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1432 for (c = 0; c < n_cols; c++)
1433 mat[c + r * n_cols] = 0.;
1441 for (c = 0; c < n_cols; c++)
1442 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1446 for (r = 0; r < n_rows; r++)
1447 mat[c + r * n_cols] = 0.;
1453 /* Prepare table T for submission, and submit it. */
1455 submit (struct tab_table *t)
1462 tab_resize (t, -1, 0);
1463 if (tab_nr (t) == tab_t (t))
1468 tab_offset (t, 0, 0);
1470 for (i = 2; i < nvar; i++)
1471 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1472 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1473 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1474 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1476 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1478 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1479 tab_dim (t, crosstabs_dim);
1483 /* Sets the widths of all the columns and heights of all the rows in
1484 table T for driver D. */
1486 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1490 /* Width of a numerical column. */
1491 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1492 if (cmd.miss == CRS_REPORT)
1493 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1495 /* Set width for header columns. */
1501 w = d->width - c * (t->nc - t->l);
1502 for (i = 0; i <= t->nc; i++)
1506 if (w < d->prop_em_width * 8)
1507 w = d->prop_em_width * 8;
1509 if (w > d->prop_em_width * 15)
1510 w = d->prop_em_width * 15;
1512 for (i = 0; i < t->l; i++)
1516 for (i = t->l; i < t->nc; i++)
1519 for (i = 0; i < t->nr; i++)
1520 t->h[i] = tab_natural_height (t, d, i);
1523 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1524 int *cnt, int pivot);
1525 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1526 int *cnt, int pivot);
1528 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1530 static struct table_entry **
1531 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1533 return (mode == GENERAL
1534 ? find_pivot_extent_general (tp, cnt, pivot)
1535 : find_pivot_extent_integer (tp, cnt, pivot));
1538 /* Find the extent of a region in TP that contains one table. If
1539 PIVOT != 0 that means a set of table entries with identical table
1540 number; otherwise they also have to have the same values for every
1541 dimension after the row and column dimensions. The table that is
1542 searched starts at TP and has length CNT. Returns the first entry
1543 after the last one in the table; sets *CNT to the number of
1544 remaining values. If there are no entries in TP at all, returns
1545 NULL. A yucky interface, admittedly, but it works. */
1546 static struct table_entry **
1547 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1549 struct table_entry *fp = *tp;
1554 x = xtab[(*tp)->table];
1562 if ((*tp)->table != fp->table)
1567 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1574 /* Integer mode correspondent to find_pivot_extent_general(). This
1575 could be optimized somewhat, but I just don't give a crap about
1576 CROSSTABS performance in integer mode, which is just a
1577 CROSSTABS wart as far as I'm concerned.
1579 That said, feel free to send optimization patches to me. */
1580 static struct table_entry **
1581 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1583 struct table_entry *fp = *tp;
1588 x = xtab[(*tp)->table];
1596 if ((*tp)->table != fp->table)
1601 if (memcmp (&(*tp)->values[2], &fp->values[2],
1602 sizeof (union value) * (x->nvar - 2)))
1609 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1610 result. WIDTH_ points to an int which is either 0 for a
1611 numeric value or a string width for a string value. */
1613 compare_value (const void *a_, const void *b_, const void *width_)
1615 const union value *a = a_;
1616 const union value *b = b_;
1617 const int *pwidth = width_;
1618 const int width = *pwidth;
1621 return (a->f < b->f) ? -1 : (a->f > b->f);
1623 return strncmp (a->s, b->s, width);
1626 /* Given an array of ENTRY_CNT table_entry structures starting at
1627 ENTRIES, creates a sorted list of the values that the variable
1628 with index VAR_IDX takes on. The values are returned as a
1629 malloc()'darray stored in *VALUES, with the number of values
1630 stored in *VALUE_CNT.
1633 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1634 union value **values, int *value_cnt)
1636 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1638 if (mode == GENERAL)
1640 int width = v->width;
1643 *values = xnmalloc (entry_cnt, sizeof **values);
1644 for (i = 0; i < entry_cnt; i++)
1645 (*values)[i] = entries[i]->values[var_idx];
1646 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1647 compare_value, &width);
1651 struct var_range *vr = get_var_range (v);
1654 assert (mode == INTEGER);
1655 *values = xnmalloc (vr->count, sizeof **values);
1656 for (i = 0; i < vr->count; i++)
1657 (*values)[i].f = i + vr->min;
1658 *value_cnt = vr->count;
1662 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1663 from V, displayed with print format spec from variable VAR. When
1664 in REPORT missing-value mode, missing values have an M appended. */
1666 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1667 const union value *v, const struct variable *var)
1671 const char *label = val_labs_find (var->val_labs, *v);
1674 tab_text (table, c, r, TAB_LEFT, label);
1678 s.string = tab_alloc (table, var->print.w);
1679 format_short (s.string, &var->print, v);
1680 s.length = strlen (s.string);
1681 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1682 s.string[s.length++] = 'M';
1683 while (s.length && *s.string == ' ')
1688 tab_raw (table, c, r, opt, &s);
1691 /* Draws a line across TABLE at the current row to indicate the most
1692 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1693 that changed, and puts the values that changed into the table. TB
1694 and X must be the corresponding table_entry and crosstab,
1697 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1699 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1701 for (; first_difference >= 2; first_difference--)
1702 table_value_missing (table, nvar - first_difference - 1, 0,
1703 TAB_RIGHT, &tb->values[first_difference],
1704 x->vars[first_difference]);
1707 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1708 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1709 additionally suffixed with a letter `M'. */
1711 format_cell_entry (struct tab_table *table, int c, int r, double value,
1712 char suffix, bool mark_missing)
1714 const struct fmt_spec f = {FMT_F, 10, 1};
1719 s.string = tab_alloc (table, 16);
1721 data_out (s.string, &f, &v);
1722 while (*s.string == ' ')
1728 s.string[s.length++] = suffix;
1730 s.string[s.length++] = 'M';
1732 tab_raw (table, c, r, TAB_RIGHT, &s);
1735 /* Displays the crosstabulation table. */
1737 display_crosstabulation (void)
1742 for (r = 0; r < n_rows; r++)
1743 table_value_missing (table, nvar - 2, r * num_cells,
1744 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1746 tab_text (table, nvar - 2, n_rows * num_cells,
1747 TAB_LEFT, _("Total"));
1749 /* Put in the actual cells. */
1754 tab_offset (table, nvar - 1, -1);
1755 for (r = 0; r < n_rows; r++)
1758 tab_hline (table, TAL_1, -1, n_cols, 0);
1759 for (c = 0; c < n_cols; c++)
1761 bool mark_missing = false;
1762 double expected_value = row_tot[r] * col_tot[c] / W;
1763 if (cmd.miss == CRS_REPORT
1764 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1765 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1767 mark_missing = true;
1768 for (i = 0; i < num_cells; i++)
1779 v = *mp / row_tot[r] * 100.;
1783 v = *mp / col_tot[c] * 100.;
1790 case CRS_CL_EXPECTED:
1793 case CRS_CL_RESIDUAL:
1794 v = *mp - expected_value;
1796 case CRS_CL_SRESIDUAL:
1797 v = (*mp - expected_value) / sqrt (expected_value);
1799 case CRS_CL_ASRESIDUAL:
1800 v = ((*mp - expected_value)
1801 / sqrt (expected_value
1802 * (1. - row_tot[r] / W)
1803 * (1. - col_tot[c] / W)));
1809 format_cell_entry (table, c, i, v, suffix, mark_missing);
1815 tab_offset (table, -1, tab_row (table) + num_cells);
1823 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1824 for (r = 0; r < n_rows; r++)
1827 bool mark_missing = false;
1829 if (cmd.miss == CRS_REPORT
1830 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1831 mark_missing = true;
1833 for (i = 0; i < num_cells; i++)
1847 v = row_tot[r] / W * 100.;
1851 v = row_tot[r] / W * 100.;
1854 case CRS_CL_EXPECTED:
1855 case CRS_CL_RESIDUAL:
1856 case CRS_CL_SRESIDUAL:
1857 case CRS_CL_ASRESIDUAL:
1864 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1865 tab_next_row (table);
1870 /* Column totals, grand total. */
1876 tab_hline (table, TAL_1, -1, n_cols, 0);
1877 for (c = 0; c <= n_cols; c++)
1879 double ct = c < n_cols ? col_tot[c] : W;
1880 bool mark_missing = false;
1884 if (cmd.miss == CRS_REPORT && c < n_cols
1885 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1886 mark_missing = true;
1888 for (i = 0; i < num_cells; i++)
1910 case CRS_CL_EXPECTED:
1911 case CRS_CL_RESIDUAL:
1912 case CRS_CL_SRESIDUAL:
1913 case CRS_CL_ASRESIDUAL:
1919 format_cell_entry (table, c, i, v, suffix, mark_missing);
1924 tab_offset (table, -1, tab_row (table) + last_row);
1927 tab_offset (table, 0, -1);
1930 static void calc_r (double *X, double *Y, double *, double *, double *);
1931 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1933 /* Display chi-square statistics. */
1935 display_chisq (void)
1937 static const char *chisq_stats[N_CHISQ] =
1939 N_("Pearson Chi-Square"),
1940 N_("Likelihood Ratio"),
1941 N_("Fisher's Exact Test"),
1942 N_("Continuity Correction"),
1943 N_("Linear-by-Linear Association"),
1945 double chisq_v[N_CHISQ];
1946 double fisher1, fisher2;
1952 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1954 tab_offset (chisq, nvar - 2, -1);
1956 for (i = 0; i < N_CHISQ; i++)
1958 if ((i != 2 && chisq_v[i] == SYSMIS)
1959 || (i == 2 && fisher1 == SYSMIS))
1963 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1966 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1967 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1968 tab_float (chisq, 3, 0, TAB_RIGHT,
1969 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1974 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1975 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1977 tab_next_row (chisq);
1980 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1981 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1982 tab_next_row (chisq);
1984 tab_offset (chisq, 0, -1);
1987 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1988 double[N_SYMMETRIC]);
1990 /* Display symmetric measures. */
1992 display_symmetric (void)
1994 static const char *categories[] =
1996 N_("Nominal by Nominal"),
1997 N_("Ordinal by Ordinal"),
1998 N_("Interval by Interval"),
1999 N_("Measure of Agreement"),
2002 static const char *stats[N_SYMMETRIC] =
2006 N_("Contingency Coefficient"),
2007 N_("Kendall's tau-b"),
2008 N_("Kendall's tau-c"),
2010 N_("Spearman Correlation"),
2015 static const int stats_categories[N_SYMMETRIC] =
2017 0, 0, 0, 1, 1, 1, 1, 2, 3,
2021 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2024 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2027 tab_offset (sym, nvar - 2, -1);
2029 for (i = 0; i < N_SYMMETRIC; i++)
2031 if (sym_v[i] == SYSMIS)
2034 if (stats_categories[i] != last_cat)
2036 last_cat = stats_categories[i];
2037 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2040 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2041 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2042 if (sym_ase[i] != SYSMIS)
2043 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2044 if (sym_t[i] != SYSMIS)
2045 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2046 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2050 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2051 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2054 tab_offset (sym, 0, -1);
2057 static int calc_risk (double[], double[], double[], union value *);
2059 /* Display risk estimate. */
2064 double risk_v[3], lower[3], upper[3];
2068 if (!calc_risk (risk_v, upper, lower, c))
2071 tab_offset (risk, nvar - 2, -1);
2073 for (i = 0; i < 3; i++)
2075 if (risk_v[i] == SYSMIS)
2081 if (x->vars[COL_VAR]->type == NUMERIC)
2082 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2083 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2085 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2086 x->vars[COL_VAR]->name,
2087 x->vars[COL_VAR]->width, c[0].s,
2088 x->vars[COL_VAR]->width, c[1].s);
2092 if (x->vars[ROW_VAR]->type == NUMERIC)
2093 sprintf (buf, _("For cohort %s = %g"),
2094 x->vars[ROW_VAR]->name, rows[i - 1].f);
2096 sprintf (buf, _("For cohort %s = %.*s"),
2097 x->vars[ROW_VAR]->name,
2098 x->vars[ROW_VAR]->width, rows[i - 1].s);
2102 tab_text (risk, 0, 0, TAB_LEFT, buf);
2103 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2104 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2105 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2106 tab_next_row (risk);
2109 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2110 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2111 tab_next_row (risk);
2113 tab_offset (risk, 0, -1);
2116 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2117 double[N_DIRECTIONAL]);
2119 /* Display directional measures. */
2121 display_directional (void)
2123 static const char *categories[] =
2125 N_("Nominal by Nominal"),
2126 N_("Ordinal by Ordinal"),
2127 N_("Nominal by Interval"),
2130 static const char *stats[] =
2133 N_("Goodman and Kruskal tau"),
2134 N_("Uncertainty Coefficient"),
2139 static const char *types[] =
2146 static const int stats_categories[N_DIRECTIONAL] =
2148 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2151 static const int stats_stats[N_DIRECTIONAL] =
2153 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2156 static const int stats_types[N_DIRECTIONAL] =
2158 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2161 static const int *stats_lookup[] =
2168 static const char **stats_names[] =
2180 double direct_v[N_DIRECTIONAL];
2181 double direct_ase[N_DIRECTIONAL];
2182 double direct_t[N_DIRECTIONAL];
2186 if (!calc_directional (direct_v, direct_ase, direct_t))
2189 tab_offset (direct, nvar - 2, -1);
2191 for (i = 0; i < N_DIRECTIONAL; i++)
2193 if (direct_v[i] == SYSMIS)
2199 for (j = 0; j < 3; j++)
2200 if (last[j] != stats_lookup[j][i])
2203 tab_hline (direct, TAL_1, j, 6, 0);
2208 int k = last[j] = stats_lookup[j][i];
2213 string = x->vars[0]->name;
2215 string = x->vars[1]->name;
2217 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2218 gettext (stats_names[j][k]), string);
2223 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2224 if (direct_ase[i] != SYSMIS)
2225 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2226 if (direct_t[i] != SYSMIS)
2227 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2228 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2229 tab_next_row (direct);
2232 tab_offset (direct, 0, -1);
2235 /* Statistical calculations. */
2237 /* Returns the value of the gamma (factorial) function for an integer
2240 gamma_int (double x)
2245 for (i = 2; i < x; i++)
2250 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2252 static inline double
2253 Pr (int a, int b, int c, int d)
2255 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2256 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2257 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2258 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2259 / gamma_int (a + b + c + d + 1.));
2262 /* Swap the contents of A and B. */
2264 swap (int *a, int *b)
2271 /* Calculate significance for Fisher's exact test as specified in
2272 _SPSS Statistical Algorithms_, Appendix 5. */
2274 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2278 if (min (c, d) < min (a, b))
2279 swap (&a, &c), swap (&b, &d);
2280 if (min (b, d) < min (a, c))
2281 swap (&a, &b), swap (&c, &d);
2285 swap (&a, &b), swap (&c, &d);
2287 swap (&a, &c), swap (&b, &d);
2291 for (x = 0; x <= a; x++)
2292 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2294 *fisher2 = *fisher1;
2295 for (x = 1; x <= b; x++)
2296 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2299 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2300 columns with values COLS and N_ROWS rows with values ROWS. Values
2301 in the matrix sum to W. */
2303 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2304 double *fisher1, double *fisher2)
2308 chisq[0] = chisq[1] = 0.;
2309 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2310 *fisher1 = *fisher2 = SYSMIS;
2312 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2314 if (ns_rows <= 1 || ns_cols <= 1)
2316 chisq[0] = chisq[1] = SYSMIS;
2320 for (r = 0; r < n_rows; r++)
2321 for (c = 0; c < n_cols; c++)
2323 const double expected = row_tot[r] * col_tot[c] / W;
2324 const double freq = mat[n_cols * r + c];
2325 const double residual = freq - expected;
2327 chisq[0] += residual * residual / expected;
2329 chisq[1] += freq * log (expected / freq);
2340 /* Calculate Yates and Fisher exact test. */
2341 if (ns_cols == 2 && ns_rows == 2)
2343 double f11, f12, f21, f22;
2349 for (i = j = 0; i < n_cols; i++)
2350 if (col_tot[i] != 0.)
2359 f11 = mat[nz_cols[0]];
2360 f12 = mat[nz_cols[1]];
2361 f21 = mat[nz_cols[0] + n_cols];
2362 f22 = mat[nz_cols[1] + n_cols];
2367 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2370 chisq[3] = (W * x * x
2371 / (f11 + f12) / (f21 + f22)
2372 / (f11 + f21) / (f12 + f22));
2380 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2381 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2384 /* Calculate Mantel-Haenszel. */
2385 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2387 double r, ase_0, ase_1;
2388 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2390 chisq[4] = (W - 1.) * r * r;
2395 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2396 ASE_1, and ase_0 into ASE_0. The row and column values must be
2397 passed in X and Y. */
2399 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2401 double SX, SY, S, T;
2403 double sum_XYf, sum_X2Y2f;
2404 double sum_Xr, sum_X2r;
2405 double sum_Yc, sum_Y2c;
2408 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2409 for (j = 0; j < n_cols; j++)
2411 double fij = mat[j + i * n_cols];
2412 double product = X[i] * Y[j];
2413 double temp = fij * product;
2415 sum_X2Y2f += temp * product;
2418 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2420 sum_Xr += X[i] * row_tot[i];
2421 sum_X2r += X[i] * X[i] * row_tot[i];
2425 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2427 sum_Yc += Y[i] * col_tot[i];
2428 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2432 S = sum_XYf - sum_Xr * sum_Yc / W;
2433 SX = sum_X2r - sum_Xr * sum_Xr / W;
2434 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2437 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2442 for (s = c = 0., i = 0; i < n_rows; i++)
2443 for (j = 0; j < n_cols; j++)
2445 double Xresid, Yresid;
2448 Xresid = X[i] - Xbar;
2449 Yresid = Y[j] - Ybar;
2450 temp = (T * Xresid * Yresid
2452 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2453 y = mat[j + i * n_cols] * temp * temp - c;
2458 *ase_1 = sqrt (s) / (T * T);
2462 static double somers_d_v[3];
2463 static double somers_d_ase[3];
2464 static double somers_d_t[3];
2466 /* Calculate symmetric statistics and their asymptotic standard
2467 errors. Returns 0 if none could be calculated. */
2469 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2470 double t[N_SYMMETRIC])
2472 int q = min (ns_rows, ns_cols);
2481 for (i = 0; i < N_SYMMETRIC; i++)
2482 v[i] = ase[i] = t[i] = SYSMIS;
2485 /* Phi, Cramer's V, contingency coefficient. */
2486 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2488 double Xp = 0.; /* Pearson chi-square. */
2493 for (r = 0; r < n_rows; r++)
2494 for (c = 0; c < n_cols; c++)
2496 const double expected = row_tot[r] * col_tot[c] / W;
2497 const double freq = mat[n_cols * r + c];
2498 const double residual = freq - expected;
2500 Xp += residual * residual / expected;
2504 if (cmd.a_statistics[CRS_ST_PHI])
2506 v[0] = sqrt (Xp / W);
2507 v[1] = sqrt (Xp / (W * (q - 1)));
2509 if (cmd.a_statistics[CRS_ST_CC])
2510 v[2] = sqrt (Xp / (Xp + W));
2513 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2514 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2519 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2526 for (r = 0; r < n_rows; r++)
2527 Dr -= row_tot[r] * row_tot[r];
2528 for (c = 0; c < n_cols; c++)
2529 Dc -= col_tot[c] * col_tot[c];
2535 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2536 for (c = 0; c < n_cols; c++)
2540 for (r = 0; r < n_rows; r++)
2541 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2551 for (i = 0; i < n_rows; i++)
2555 for (j = 1; j < n_cols; j++)
2556 Cij += col_tot[j] - cum[j + i * n_cols];
2559 for (j = 1; j < n_cols; j++)
2560 Dij += cum[j + (i - 1) * n_cols];
2564 double fij = mat[j + i * n_cols];
2570 assert (j < n_cols);
2572 Cij -= col_tot[j] - cum[j + i * n_cols];
2573 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2577 Cij += cum[j - 1 + (i - 1) * n_cols];
2578 Dij -= cum[j + (i - 1) * n_cols];
2584 if (cmd.a_statistics[CRS_ST_BTAU])
2585 v[3] = (P - Q) / sqrt (Dr * Dc);
2586 if (cmd.a_statistics[CRS_ST_CTAU])
2587 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2588 if (cmd.a_statistics[CRS_ST_GAMMA])
2589 v[5] = (P - Q) / (P + Q);
2591 /* ASE for tau-b, tau-c, gamma. Calculations could be
2592 eliminated here, at expense of memory. */
2597 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2598 for (i = 0; i < n_rows; i++)
2602 for (j = 1; j < n_cols; j++)
2603 Cij += col_tot[j] - cum[j + i * n_cols];
2606 for (j = 1; j < n_cols; j++)
2607 Dij += cum[j + (i - 1) * n_cols];
2611 double fij = mat[j + i * n_cols];
2613 if (cmd.a_statistics[CRS_ST_BTAU])
2615 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2616 + v[3] * (row_tot[i] * Dc
2617 + col_tot[j] * Dr));
2618 btau_cum += fij * temp * temp;
2622 const double temp = Cij - Dij;
2623 ctau_cum += fij * temp * temp;
2626 if (cmd.a_statistics[CRS_ST_GAMMA])
2628 const double temp = Q * Cij - P * Dij;
2629 gamma_cum += fij * temp * temp;
2632 if (cmd.a_statistics[CRS_ST_D])
2634 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2635 - (P - Q) * (W - row_tot[i]));
2636 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2637 - (Q - P) * (W - col_tot[j]));
2642 assert (j < n_cols);
2644 Cij -= col_tot[j] - cum[j + i * n_cols];
2645 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2649 Cij += cum[j - 1 + (i - 1) * n_cols];
2650 Dij -= cum[j + (i - 1) * n_cols];
2656 btau_var = ((btau_cum
2657 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2659 if (cmd.a_statistics[CRS_ST_BTAU])
2661 ase[3] = sqrt (btau_var);
2662 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2665 if (cmd.a_statistics[CRS_ST_CTAU])
2667 ase[4] = ((2 * q / ((q - 1) * W * W))
2668 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2669 t[4] = v[4] / ase[4];
2671 if (cmd.a_statistics[CRS_ST_GAMMA])
2673 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2674 t[5] = v[5] / (2. / (P + Q)
2675 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2677 if (cmd.a_statistics[CRS_ST_D])
2679 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2680 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2681 somers_d_t[0] = (somers_d_v[0]
2683 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2684 somers_d_v[1] = (P - Q) / Dc;
2685 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2686 somers_d_t[1] = (somers_d_v[1]
2688 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2689 somers_d_v[2] = (P - Q) / Dr;
2690 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2691 somers_d_t[2] = (somers_d_v[2]
2693 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2699 /* Spearman correlation, Pearson's r. */
2700 if (cmd.a_statistics[CRS_ST_CORR])
2702 double *R = local_alloc (sizeof *R * n_rows);
2703 double *C = local_alloc (sizeof *C * n_cols);
2706 double y, t, c = 0., s = 0.;
2711 R[i] = s + (row_tot[i] + 1.) / 2.;
2718 assert (i < n_rows);
2723 double y, t, c = 0., s = 0.;
2728 C[j] = s + (col_tot[j] + 1.) / 2;
2735 assert (j < n_cols);
2739 calc_r (R, C, &v[6], &t[6], &ase[6]);
2745 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2749 /* Cohen's kappa. */
2750 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2752 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2755 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2756 i < ns_rows; i++, j++)
2760 while (col_tot[j] == 0.)
2763 prod = row_tot[i] * col_tot[j];
2764 sum = row_tot[i] + col_tot[j];
2766 sum_fii += mat[j + i * n_cols];
2768 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2769 sum_riciri_ci += prod * sum;
2771 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2772 for (j = 0; j < ns_cols; j++)
2774 double sum = row_tot[i] + col_tot[j];
2775 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2778 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2780 ase[8] = sqrt ((W * W * sum_rici
2781 + sum_rici * sum_rici
2782 - W * sum_riciri_ci)
2783 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2785 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2786 / pow2 (W * W - sum_rici))
2787 + ((2. * (W - sum_fii)
2788 * (2. * sum_fii * sum_rici
2789 - W * sum_fiiri_ci))
2790 / cube (W * W - sum_rici))
2791 + (pow2 (W - sum_fii)
2792 * (W * sum_fijri_ci2 - 4.
2793 * sum_rici * sum_rici)
2794 / pow4 (W * W - sum_rici))));
2796 t[8] = v[8] / ase[8];
2803 /* Calculate risk estimate. */
2805 calc_risk (double *value, double *upper, double *lower, union value *c)
2807 double f11, f12, f21, f22;
2813 for (i = 0; i < 3; i++)
2814 value[i] = upper[i] = lower[i] = SYSMIS;
2817 if (ns_rows != 2 || ns_cols != 2)
2824 for (i = j = 0; i < n_cols; i++)
2825 if (col_tot[i] != 0.)
2834 f11 = mat[nz_cols[0]];
2835 f12 = mat[nz_cols[1]];
2836 f21 = mat[nz_cols[0] + n_cols];
2837 f22 = mat[nz_cols[1] + n_cols];
2839 c[0] = cols[nz_cols[0]];
2840 c[1] = cols[nz_cols[1]];
2843 value[0] = (f11 * f22) / (f12 * f21);
2844 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2845 lower[0] = value[0] * exp (-1.960 * v);
2846 upper[0] = value[0] * exp (1.960 * v);
2848 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2849 v = sqrt ((f12 / (f11 * (f11 + f12)))
2850 + (f22 / (f21 * (f21 + f22))));
2851 lower[1] = value[1] * exp (-1.960 * v);
2852 upper[1] = value[1] * exp (1.960 * v);
2854 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2855 v = sqrt ((f11 / (f12 * (f11 + f12)))
2856 + (f21 / (f22 * (f21 + f22))));
2857 lower[2] = value[2] * exp (-1.960 * v);
2858 upper[2] = value[2] * exp (1.960 * v);
2863 /* Calculate directional measures. */
2865 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2866 double t[N_DIRECTIONAL])
2871 for (i = 0; i < N_DIRECTIONAL; i++)
2872 v[i] = ase[i] = t[i] = SYSMIS;
2876 if (cmd.a_statistics[CRS_ST_LAMBDA])
2878 double *fim = xnmalloc (n_rows, sizeof *fim);
2879 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2880 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2881 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2882 double sum_fim, sum_fmj;
2884 int rm_index, cm_index;
2887 /* Find maximum for each row and their sum. */
2888 for (sum_fim = 0., i = 0; i < n_rows; i++)
2890 double max = mat[i * n_cols];
2893 for (j = 1; j < n_cols; j++)
2894 if (mat[j + i * n_cols] > max)
2896 max = mat[j + i * n_cols];
2900 sum_fim += fim[i] = max;
2901 fim_index[i] = index;
2904 /* Find maximum for each column. */
2905 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2907 double max = mat[j];
2910 for (i = 1; i < n_rows; i++)
2911 if (mat[j + i * n_cols] > max)
2913 max = mat[j + i * n_cols];
2917 sum_fmj += fmj[j] = max;
2918 fmj_index[j] = index;
2921 /* Find maximum row total. */
2924 for (i = 1; i < n_rows; i++)
2925 if (row_tot[i] > rm)
2931 /* Find maximum column total. */
2934 for (j = 1; j < n_cols; j++)
2935 if (col_tot[j] > cm)
2941 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2942 v[1] = (sum_fmj - rm) / (W - rm);
2943 v[2] = (sum_fim - cm) / (W - cm);
2945 /* ASE1 for Y given X. */
2949 for (accum = 0., i = 0; i < n_rows; i++)
2950 for (j = 0; j < n_cols; j++)
2952 const int deltaj = j == cm_index;
2953 accum += (mat[j + i * n_cols]
2954 * pow2 ((j == fim_index[i])
2959 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2962 /* ASE0 for Y given X. */
2966 for (accum = 0., i = 0; i < n_rows; i++)
2967 if (cm_index != fim_index[i])
2968 accum += (mat[i * n_cols + fim_index[i]]
2969 + mat[i * n_cols + cm_index]);
2970 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2973 /* ASE1 for X given Y. */
2977 for (accum = 0., i = 0; i < n_rows; i++)
2978 for (j = 0; j < n_cols; j++)
2980 const int deltaj = i == rm_index;
2981 accum += (mat[j + i * n_cols]
2982 * pow2 ((i == fmj_index[j])
2987 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2990 /* ASE0 for X given Y. */
2994 for (accum = 0., j = 0; j < n_cols; j++)
2995 if (rm_index != fmj_index[j])
2996 accum += (mat[j + n_cols * fmj_index[j]]
2997 + mat[j + n_cols * rm_index]);
2998 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3001 /* Symmetric ASE0 and ASE1. */
3006 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3007 for (j = 0; j < n_cols; j++)
3009 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3010 int temp1 = (i == rm_index) + (j == cm_index);
3011 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3012 accum1 += (mat[j + i * n_cols]
3013 * pow2 (temp0 + (v[0] - 1.) * temp1));
3015 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3016 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3017 / (2. * W - rm - cm));
3026 double sum_fij2_ri, sum_fij2_ci;
3027 double sum_ri2, sum_cj2;
3029 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3030 for (j = 0; j < n_cols; j++)
3032 double temp = pow2 (mat[j + i * n_cols]);
3033 sum_fij2_ri += temp / row_tot[i];
3034 sum_fij2_ci += temp / col_tot[j];
3037 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3038 sum_ri2 += row_tot[i] * row_tot[i];
3040 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3041 sum_cj2 += col_tot[j] * col_tot[j];
3043 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3044 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3048 if (cmd.a_statistics[CRS_ST_UC])
3050 double UX, UY, UXY, P;
3051 double ase1_yx, ase1_xy, ase1_sym;
3054 for (UX = 0., i = 0; i < n_rows; i++)
3055 if (row_tot[i] > 0.)
3056 UX -= row_tot[i] / W * log (row_tot[i] / W);
3058 for (UY = 0., j = 0; j < n_cols; j++)
3059 if (col_tot[j] > 0.)
3060 UY -= col_tot[j] / W * log (col_tot[j] / W);
3062 for (UXY = P = 0., i = 0; i < n_rows; i++)
3063 for (j = 0; j < n_cols; j++)
3065 double entry = mat[j + i * n_cols];
3070 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3071 UXY -= entry / W * log (entry / W);
3074 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3075 for (j = 0; j < n_cols; j++)
3077 double entry = mat[j + i * n_cols];
3082 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3083 + (UX - UXY) * log (col_tot[j] / W));
3084 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3085 + (UY - UXY) * log (row_tot[i] / W));
3086 ase1_sym += entry * pow2 ((UXY
3087 * log (row_tot[i] * col_tot[j] / (W * W)))
3088 - (UX + UY) * log (entry / W));
3091 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3092 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3093 t[5] = v[5] / ((2. / (W * (UX + UY)))
3094 * sqrt (P - pow2 (UX + UY - UXY) / W));
3096 v[6] = (UX + UY - UXY) / UX;
3097 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3098 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3100 v[7] = (UX + UY - UXY) / UY;
3101 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3102 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3106 if (cmd.a_statistics[CRS_ST_D])
3111 calc_symmetric (NULL, NULL, NULL);
3112 for (i = 0; i < 3; i++)
3114 v[8 + i] = somers_d_v[i];
3115 ase[8 + i] = somers_d_ase[i];
3116 t[8 + i] = somers_d_t[i];
3121 if (cmd.a_statistics[CRS_ST_ETA])
3124 double sum_Xr, sum_X2r;
3128 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3130 sum_Xr += rows[i].f * row_tot[i];
3131 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3133 SX = sum_X2r - sum_Xr * sum_Xr / W;
3135 for (SXW = 0., j = 0; j < n_cols; j++)
3139 for (cum = 0., i = 0; i < n_rows; i++)
3141 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3142 cum += rows[i].f * mat[j + i * n_cols];
3145 SXW -= cum * cum / col_tot[j];
3147 v[11] = sqrt (1. - SXW / SX);
3151 double sum_Yc, sum_Y2c;
3155 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3157 sum_Yc += cols[i].f * col_tot[i];
3158 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3160 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3162 for (SYW = 0., i = 0; i < n_rows; i++)
3166 for (cum = 0., j = 0; j < n_cols; j++)
3168 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3169 cum += cols[j].f * mat[j + i * n_cols];
3172 SYW -= cum * cum / row_tot[i];
3174 v[12] = sqrt (1. - SYW / SY);
3181 /* A wrapper around data_out() that limits string output to short
3182 string width and null terminates the result. */
3184 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3186 struct fmt_spec fmt_subst;
3188 /* Limit to short string width. */
3189 if (formats[fp->type].cat & FCAT_STRING)
3193 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3194 if (fmt_subst.type == FMT_A)
3195 fmt_subst.w = min (8, fmt_subst.w);
3197 fmt_subst.w = min (16, fmt_subst.w);
3203 data_out (s, fp, v);
3205 /* Null terminate. */