1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/dictionary.h>
41 #include <data/procedure.h>
42 #include <data/value-labels.h>
43 #include <data/variable.h>
44 #include <language/command.h>
45 #include <language/dictionary/split-file.h>
46 #include <language/lexer/lexer.h>
47 #include <libpspp/alloc.h>
48 #include <libpspp/array.h>
49 #include <libpspp/compiler.h>
50 #include <libpspp/hash.h>
51 #include <libpspp/magic.h>
52 #include <libpspp/message.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
61 #define _(msgid) gettext (msgid)
62 #define N_(msgid) msgid
70 +missing=miss:!table/include/report;
71 +write[wr_]=none,cells,all;
72 +format=fmt:!labels/nolabels/novallabs,
75 tabl:!tables/notables,
78 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
80 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
86 /* Number of chi-square statistics. */
89 /* Number of symmetric statistics. */
92 /* Number of directional statistics. */
93 #define N_DIRECTIONAL 13
95 /* A single table entry for general mode. */
98 int table; /* Flattened table number. */
101 double freq; /* Frequency count. */
102 double *data; /* Crosstabulation table for integer mode. */
105 union value values[1]; /* Values. */
108 /* A crosstabulation. */
111 int nvar; /* Number of variables. */
112 double missing; /* Missing cases count. */
113 int ofs; /* Integer mode: Offset into sorted_tab[]. */
114 struct variable *vars[2]; /* At least two variables; sorted by
115 larger indices first. */
118 /* Integer mode variable info. */
121 int min; /* Minimum value. */
122 int max; /* Maximum value + 1. */
123 int count; /* max - min. */
126 static inline struct var_range *
127 get_var_range (struct variable *v)
130 assert (v->aux != NULL);
134 /* Indexes into crosstab.v. */
141 /* General mode crosstabulation table. */
142 static struct hsh_table *gen_tab; /* Hash table. */
143 static int n_sorted_tab; /* Number of entries in sorted_tab. */
144 static struct table_entry **sorted_tab; /* Sorted table. */
146 /* Variables specifies on VARIABLES. */
147 static struct variable **variables;
148 static size_t variables_cnt;
151 static struct crosstab **xtab;
154 /* Integer or general mode? */
163 static int num_cells; /* Number of cells requested. */
164 static int cells[8]; /* Cells requested. */
167 static int write; /* One of WR_* that specifies the WRITE style. */
169 /* Command parsing info. */
170 static struct cmd_crosstabs cmd;
173 static struct pool *pl_tc; /* For table cells. */
174 static struct pool *pl_col; /* For column data. */
176 static int internal_cmd_crosstabs (void);
177 static void precalc (const struct ccase *, void *);
178 static bool calc_general (const struct ccase *, void *);
179 static bool calc_integer (const struct ccase *, void *);
180 static void postcalc (void *);
181 static void submit (struct tab_table *);
183 static void format_short (char *s, const struct fmt_spec *fp,
184 const union value *v);
186 /* Parse and execute CROSSTABS, then clean up. */
190 int result = internal_cmd_crosstabs ();
193 pool_destroy (pl_tc);
194 pool_destroy (pl_col);
199 /* Parses and executes the CROSSTABS procedure. */
201 internal_cmd_crosstabs (void)
210 pl_tc = pool_create ();
211 pl_col = pool_create ();
213 if (!parse_crosstabs (&cmd))
216 mode = variables ? INTEGER : GENERAL;
221 cmd.a_cells[CRS_CL_COUNT] = 1;
227 for (i = 0; i < CRS_CL_count; i++)
232 cmd.a_cells[CRS_CL_COUNT] = 1;
233 cmd.a_cells[CRS_CL_ROW] = 1;
234 cmd.a_cells[CRS_CL_COLUMN] = 1;
235 cmd.a_cells[CRS_CL_TOTAL] = 1;
237 if (cmd.a_cells[CRS_CL_ALL])
239 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_ALL] = 0;
243 cmd.a_cells[CRS_CL_NONE] = 0;
245 for (num_cells = i = 0; i < CRS_CL_count; i++)
247 cells[num_cells++] = i;
250 if (cmd.sbc_statistics)
255 for (i = 0; i < CRS_ST_count; i++)
256 if (cmd.a_statistics[i])
259 cmd.a_statistics[CRS_ST_CHISQ] = 1;
260 if (cmd.a_statistics[CRS_ST_ALL])
261 for (i = 0; i < CRS_ST_count; i++)
262 cmd.a_statistics[i] = 1;
266 if (cmd.miss == CRS_REPORT && mode == GENERAL)
268 msg (SE, _("Missing mode REPORT not allowed in general mode. "
269 "Assuming MISSING=TABLE."));
270 cmd.miss = CRS_TABLE;
274 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
275 cmd.a_write[CRS_WR_ALL] = 0;
276 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
278 msg (SE, _("Write mode ALL not allowed in general mode. "
279 "Assuming WRITE=CELLS."));
280 cmd.a_write[CRS_WR_CELLS] = 1;
283 && (cmd.a_write[CRS_WR_NONE]
284 + cmd.a_write[CRS_WR_ALL]
285 + cmd.a_write[CRS_WR_CELLS] == 0))
286 cmd.a_write[CRS_WR_CELLS] = 1;
287 if (cmd.a_write[CRS_WR_CELLS])
288 write = CRS_WR_CELLS;
289 else if (cmd.a_write[CRS_WR_ALL])
294 ok = procedure_with_splits (precalc,
295 mode == GENERAL ? calc_general : calc_integer,
298 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
301 /* Parses the TABLES subcommand. */
303 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
305 struct var_set *var_set;
307 struct variable ***by = NULL;
308 size_t *by_nvar = NULL;
312 /* Ensure that this is a TABLES subcommand. */
313 if (!lex_match_id ("TABLES")
314 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
319 if (variables != NULL)
320 var_set = var_set_create_from_array (variables, variables_cnt);
322 var_set = var_set_create_from_dict (default_dict);
323 assert (var_set != NULL);
327 by = xnrealloc (by, n_by + 1, sizeof *by);
328 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
329 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
330 PV_NO_DUPLICATE | PV_NO_SCRATCH))
332 if (xalloc_oversized (nx, by_nvar[n_by]))
334 msg (SE, _("Too many crosstabulation variables or dimensions."));
340 if (!lex_match (T_BY))
344 lex_error (_("expecting BY"));
353 int *by_iter = xcalloc (n_by, sizeof *by_iter);
356 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
357 for (i = 0; i < nx; i++)
361 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
368 for (i = 0; i < n_by; i++)
369 x->vars[i] = by[i][by_iter[i]];
375 for (i = n_by - 1; i >= 0; i--)
377 if (++by_iter[i] < by_nvar[i])
390 /* All return paths lead here. */
394 for (i = 0; i < n_by; i++)
400 var_set_destroy (var_set);
405 /* Parses the VARIABLES subcommand. */
407 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
411 msg (SE, _("VARIABLES must be specified before TABLES."));
419 size_t orig_nv = variables_cnt;
424 if (!parse_variables (default_dict, &variables, &variables_cnt,
425 (PV_APPEND | PV_NUMERIC
426 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
431 lex_error ("expecting `('");
436 if (!lex_force_int ())
438 min = lex_integer ();
443 if (!lex_force_int ())
445 max = lex_integer ();
448 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
456 lex_error ("expecting `)'");
461 for (i = orig_nv; i < variables_cnt; i++)
463 struct var_range *vr = xmalloc (sizeof *vr);
466 vr->count = max - min + 1;
467 var_attach_aux (variables[i], vr, var_dtor_free);
482 /* Data file processing. */
484 static int compare_table_entry (const void *, const void *, void *);
485 static unsigned hash_table_entry (const void *, void *);
487 /* Set up the crosstabulation tables for processing. */
489 precalc (const struct ccase *first, void *aux UNUSED)
491 output_split_file_values (first);
494 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
504 for (i = 0; i < nxtab; i++)
506 struct crosstab *x = xtab[i];
511 x->ofs = n_sorted_tab;
513 for (j = 2; j < x->nvar; j++)
514 count *= get_var_range (x->vars[j - 2])->count;
516 sorted_tab = xnrealloc (sorted_tab,
517 n_sorted_tab + count, sizeof *sorted_tab);
518 v = local_alloc (sizeof *v * x->nvar);
519 for (j = 2; j < x->nvar; j++)
520 v[j] = get_var_range (x->vars[j])->min;
521 for (j = 0; j < count; j++)
523 struct table_entry *te;
526 te = sorted_tab[n_sorted_tab++]
527 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
531 int row_cnt = get_var_range (x->vars[0])->count;
532 int col_cnt = get_var_range (x->vars[1])->count;
533 const int mat_size = row_cnt * col_cnt;
536 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
537 for (m = 0; m < mat_size; m++)
541 for (k = 2; k < x->nvar; k++)
542 te->values[k].f = v[k];
543 for (k = 2; k < x->nvar; k++)
545 struct var_range *vr = get_var_range (x->vars[k]);
546 if (++v[k] >= vr->max)
555 sorted_tab = xnrealloc (sorted_tab,
556 n_sorted_tab + 1, sizeof *sorted_tab);
557 sorted_tab[n_sorted_tab] = NULL;
561 /* Form crosstabulations for general mode. */
563 calc_general (const struct ccase *c, void *aux UNUSED)
568 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
570 /* Flattened current table index. */
573 for (t = 0; t < nxtab; t++)
575 struct crosstab *x = xtab[t];
576 const size_t entry_size = (sizeof (struct table_entry)
577 + sizeof (union value) * (x->nvar - 1));
578 struct table_entry *te = local_alloc (entry_size);
580 /* Construct table entry for the current record and table. */
586 for (j = 0; j < x->nvar; j++)
588 const union value *v = case_data (c, x->vars[j]->fv);
589 const struct missing_values *mv = &x->vars[j]->miss;
590 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
591 || (cmd.miss == CRS_INCLUDE
592 && mv_is_value_system_missing (mv, v)))
594 x->missing += weight;
598 if (x->vars[j]->type == NUMERIC)
599 te->values[j].f = case_num (c, x->vars[j]->fv);
602 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
605 /* Necessary in order to simplify comparisons. */
606 memset (&te->values[j].s[x->vars[j]->width], 0,
607 sizeof (union value) - x->vars[j]->width);
612 /* Add record to hash table. */
614 struct table_entry **tepp
615 = (struct table_entry **) hsh_probe (gen_tab, te);
618 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
621 memcpy (tep, te, entry_size);
626 (*tepp)->u.freq += weight;
637 calc_integer (const struct ccase *c, void *aux UNUSED)
642 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
644 /* Flattened current table index. */
647 for (t = 0; t < nxtab; t++)
649 struct crosstab *x = xtab[t];
654 for (i = 0; i < x->nvar; i++)
656 struct variable *const v = x->vars[i];
657 struct var_range *vr = get_var_range (v);
658 double value = case_num (c, v->fv);
660 /* Note that the first test also rules out SYSMIS. */
661 if ((value < vr->min || value >= vr->max)
662 || (cmd.miss == CRS_TABLE
663 && mv_is_num_user_missing (&v->miss, value)))
665 x->missing += weight;
671 ofs += fact * ((int) value - vr->min);
677 struct variable *row_var = x->vars[ROW_VAR];
678 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
680 struct variable *col_var = x->vars[COL_VAR];
681 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
683 const int col_dim = get_var_range (col_var)->count;
685 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
694 /* Compare the table_entry's at A and B and return a strcmp()-type
697 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
699 const struct table_entry *a = a_;
700 const struct table_entry *b = b_;
702 if (a->table > b->table)
704 else if (a->table < b->table)
708 const struct crosstab *x = xtab[a->table];
711 for (i = x->nvar - 1; i >= 0; i--)
712 if (x->vars[i]->type == NUMERIC)
714 const double diffnum = a->values[i].f - b->values[i].f;
717 else if (diffnum > 0)
722 assert (x->vars[i]->type == ALPHA);
724 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
735 /* Calculate a hash value from table_entry A. */
737 hash_table_entry (const void *a_, void *foo UNUSED)
739 const struct table_entry *a = a_;
744 for (i = 0; i < xtab[a->table]->nvar; i++)
745 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
750 /* Post-data reading calculations. */
752 static struct table_entry **find_pivot_extent (struct table_entry **,
753 int *cnt, int pivot);
754 static void enum_var_values (struct table_entry **entries, int entry_cnt,
756 union value **values, int *value_cnt);
757 static void output_pivot_table (struct table_entry **, struct table_entry **,
758 double **, double **, double **,
759 int *, int *, int *);
760 static void make_summary_table (void);
763 postcalc (void *aux UNUSED)
767 n_sorted_tab = hsh_count (gen_tab);
768 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
771 make_summary_table ();
773 /* Identify all the individual crosstabulation tables, and deal with
776 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
777 int pc = n_sorted_tab; /* Pivot count. */
779 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
780 int maxrows = 0, maxcols = 0, maxcells = 0;
784 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
788 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
789 &maxrows, &maxcols, &maxcells);
798 hsh_destroy (gen_tab);
801 static void insert_summary (struct tab_table *, int tab_index, double valid);
803 /* Output a table summarizing the cases processed. */
805 make_summary_table (void)
807 struct tab_table *summary;
809 struct table_entry **pb = sorted_tab, **pe;
810 int pc = n_sorted_tab;
813 summary = tab_create (7, 3 + nxtab, 1);
814 tab_title (summary, _("Summary."));
815 tab_headers (summary, 1, 0, 3, 0);
816 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
817 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
818 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
819 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
820 tab_hline (summary, TAL_1, 1, 6, 1);
821 tab_hline (summary, TAL_1, 1, 6, 2);
822 tab_vline (summary, TAL_1, 3, 1, 1);
823 tab_vline (summary, TAL_1, 5, 1, 1);
827 for (i = 0; i < 3; i++)
829 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
830 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
833 tab_offset (summary, 0, 3);
839 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
843 while (cur_tab < (*pb)->table)
844 insert_summary (summary, cur_tab++, 0.);
847 for (valid = 0.; pb < pe; pb++)
848 valid += (*pb)->u.freq;
851 const struct crosstab *const x = xtab[(*pb)->table];
852 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
853 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
854 const int count = n_cols * n_rows;
856 for (valid = 0.; pb < pe; pb++)
858 const double *data = (*pb)->u.data;
861 for (i = 0; i < count; i++)
865 insert_summary (summary, cur_tab++, valid);
870 while (cur_tab < nxtab)
871 insert_summary (summary, cur_tab++, 0.);
876 /* Inserts a line into T describing the crosstabulation at index
877 TAB_INDEX, which has VALID valid observations. */
879 insert_summary (struct tab_table *t, int tab_index, double valid)
881 struct crosstab *x = xtab[tab_index];
883 tab_hline (t, TAL_1, 0, 6, 0);
885 /* Crosstabulation name. */
887 char *buf = local_alloc (128 * x->nvar);
891 for (i = 0; i < x->nvar; i++)
894 cp = stpcpy (cp, " * ");
897 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
899 tab_text (t, 0, 0, TAB_LEFT, buf);
904 /* Counts and percentages. */
914 for (i = 0; i < 3; i++)
916 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
917 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
928 static struct tab_table *table; /* Crosstabulation table. */
929 static struct tab_table *chisq; /* Chi-square table. */
930 static struct tab_table *sym; /* Symmetric measures table. */
931 static struct tab_table *risk; /* Risk estimate table. */
932 static struct tab_table *direct; /* Directional measures table. */
935 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
937 /* Column values, number of columns. */
938 static union value *cols;
941 /* Row values, number of rows. */
942 static union value *rows;
945 /* Number of statistically interesting columns/rows (columns/rows with
947 static int ns_cols, ns_rows;
949 /* Crosstabulation. */
950 static struct crosstab *x;
952 /* Number of variables from the crosstabulation to consider. This is
953 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
956 /* Matrix contents. */
957 static double *mat; /* Matrix proper. */
958 static double *row_tot; /* Row totals. */
959 static double *col_tot; /* Column totals. */
960 static double W; /* Grand total. */
962 static void display_dimensions (struct tab_table *, int first_difference,
963 struct table_entry *);
964 static void display_crosstabulation (void);
965 static void display_chisq (void);
966 static void display_symmetric (void);
967 static void display_risk (void);
968 static void display_directional (void);
969 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
970 static void table_value_missing (struct tab_table *table, int c, int r,
971 unsigned char opt, const union value *v,
972 const struct variable *var);
973 static void delete_missing (void);
975 /* Output pivot table beginning at PB and continuing until PE,
976 exclusive. For efficiency, *MATP is a pointer to a matrix that can
977 hold *MAXROWS entries. */
979 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
980 double **matp, double **row_totp, double **col_totp,
981 int *maxrows, int *maxcols, int *maxcells)
984 struct table_entry **tb = pb, **te; /* Table begin, table end. */
985 int tc = pe - pb; /* Table count. */
987 /* Table entry for header comparison. */
988 struct table_entry *cmp = NULL;
990 x = xtab[(*pb)->table];
991 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
993 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
995 /* Crosstabulation table initialization. */
998 table = tab_create (nvar + n_cols,
999 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1000 tab_headers (table, nvar - 1, 0, 2, 0);
1002 /* First header line. */
1003 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1004 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1006 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1008 /* Second header line. */
1012 for (i = 2; i < nvar; i++)
1013 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1014 TAB_RIGHT | TAT_TITLE,
1016 ? x->vars[i]->label : x->vars[i]->name));
1017 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1018 x->vars[ROW_VAR]->name);
1019 for (i = 0; i < n_cols; i++)
1020 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1022 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1025 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1026 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1030 char *title = local_alloc (x->nvar * 64 + 128);
1034 if (cmd.pivot == CRS_PIVOT)
1035 for (i = 0; i < nvar; i++)
1038 cp = stpcpy (cp, " by ");
1039 cp = stpcpy (cp, x->vars[i]->name);
1043 cp = spprintf (cp, "%s by %s for",
1044 x->vars[0]->name, x->vars[1]->name);
1045 for (i = 2; i < nvar; i++)
1047 char buf[64], *bufp;
1052 cp = stpcpy (cp, x->vars[i]->name);
1054 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1055 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1057 cp = stpcpy (cp, bufp);
1061 cp = stpcpy (cp, " [");
1062 for (i = 0; i < num_cells; i++)
1070 static const struct tuple cell_names[] =
1072 {CRS_CL_COUNT, N_("count")},
1073 {CRS_CL_ROW, N_("row %")},
1074 {CRS_CL_COLUMN, N_("column %")},
1075 {CRS_CL_TOTAL, N_("total %")},
1076 {CRS_CL_EXPECTED, N_("expected")},
1077 {CRS_CL_RESIDUAL, N_("residual")},
1078 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1079 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1083 const struct tuple *t;
1085 for (t = cell_names; t->value != cells[i]; t++)
1086 assert (t->value != -1);
1088 cp = stpcpy (cp, ", ");
1089 cp = stpcpy (cp, gettext (t->name));
1093 tab_title (table, "%s", title);
1097 tab_offset (table, 0, 2);
1102 /* Chi-square table initialization. */
1103 if (cmd.a_statistics[CRS_ST_CHISQ])
1105 chisq = tab_create (6 + (nvar - 2),
1106 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1107 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1109 tab_title (chisq, _("Chi-square tests."));
1111 tab_offset (chisq, nvar - 2, 0);
1112 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1113 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1114 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1115 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1116 _("Asymp. Sig. (2-sided)"));
1117 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1118 _("Exact. Sig. (2-sided)"));
1119 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1120 _("Exact. Sig. (1-sided)"));
1122 tab_offset (chisq, 0, 1);
1127 /* Symmetric measures. */
1128 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1129 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1130 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1131 || cmd.a_statistics[CRS_ST_KAPPA])
1133 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1134 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1135 tab_title (sym, _("Symmetric measures."));
1137 tab_offset (sym, nvar - 2, 0);
1138 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1139 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1140 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1141 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1142 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1143 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1144 tab_offset (sym, 0, 1);
1149 /* Risk estimate. */
1150 if (cmd.a_statistics[CRS_ST_RISK])
1152 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1153 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1154 tab_title (risk, _("Risk estimate."));
1156 tab_offset (risk, nvar - 2, 0);
1157 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1158 _("95%% Confidence Interval"));
1159 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1160 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1161 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1162 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1163 tab_hline (risk, TAL_1, 2, 3, 1);
1164 tab_vline (risk, TAL_1, 2, 0, 1);
1165 tab_offset (risk, 0, 2);
1170 /* Directional measures. */
1171 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1172 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1174 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1175 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1176 tab_title (direct, _("Directional measures."));
1178 tab_offset (direct, nvar - 2, 0);
1179 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1180 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1181 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1182 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1183 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1184 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1185 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1186 tab_offset (direct, 0, 1);
1193 /* Find pivot subtable if applicable. */
1194 te = find_pivot_extent (tb, &tc, 0);
1198 /* Find all the row variable values. */
1199 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1201 /* Allocate memory space for the column and row totals. */
1202 if (n_rows > *maxrows)
1204 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1205 row_tot = *row_totp;
1208 if (n_cols > *maxcols)
1210 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1211 col_tot = *col_totp;
1215 /* Allocate table space for the matrix. */
1216 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1217 tab_realloc (table, -1,
1218 max (tab_nr (table) + (n_rows + 1) * num_cells,
1219 tab_nr (table) * (pe - pb) / (te - tb)));
1221 if (mode == GENERAL)
1223 /* Allocate memory space for the matrix. */
1224 if (n_cols * n_rows > *maxcells)
1226 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1227 *maxcells = n_cols * n_rows;
1232 /* Build the matrix and calculate column totals. */
1234 union value *cur_col = cols;
1235 union value *cur_row = rows;
1237 double *cp = col_tot;
1238 struct table_entry **p;
1241 for (p = &tb[0]; p < te; p++)
1243 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1247 for (; cur_row < &rows[n_rows]; cur_row++)
1253 mp = &mat[cur_col - cols];
1256 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1263 *cp += *mp = (*p)->u.freq;
1268 /* Zero out the rest of the matrix. */
1269 for (; cur_row < &rows[n_rows]; cur_row++)
1275 if (cur_col < &cols[n_cols])
1277 const int rem_cols = n_cols - (cur_col - cols);
1280 for (c = 0; c < rem_cols; c++)
1282 mp = &mat[cur_col - cols];
1283 for (r = 0; r < n_rows; r++)
1285 for (c = 0; c < rem_cols; c++)
1287 mp += n_cols - rem_cols;
1295 double *tp = col_tot;
1297 assert (mode == INTEGER);
1298 mat = (*tb)->u.data;
1301 /* Calculate column totals. */
1302 for (c = 0; c < n_cols; c++)
1305 double *cp = &mat[c];
1307 for (r = 0; r < n_rows; r++)
1308 cum += cp[r * n_cols];
1316 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1317 ns_cols += *cp != 0.;
1320 /* Calculate row totals. */
1323 double *rp = row_tot;
1326 for (ns_rows = 0, r = n_rows; r--; )
1329 for (c = n_cols; c--; )
1337 /* Calculate grand total. */
1343 if (n_rows < n_cols)
1344 tp = row_tot, n = n_rows;
1346 tp = col_tot, n = n_cols;
1352 /* Find the first variable that differs from the last subtable,
1353 then display the values of the dimensioning variables for
1354 each table that needs it. */
1356 int first_difference = nvar - 1;
1359 for (; ; first_difference--)
1361 assert (first_difference >= 2);
1362 if (memcmp (&cmp->values[first_difference],
1363 &(*tb)->values[first_difference],
1364 sizeof *cmp->values))
1370 display_dimensions (table, first_difference, *tb);
1372 display_dimensions (chisq, first_difference, *tb);
1374 display_dimensions (sym, first_difference, *tb);
1376 display_dimensions (risk, first_difference, *tb);
1378 display_dimensions (direct, first_difference, *tb);
1382 display_crosstabulation ();
1383 if (cmd.miss == CRS_REPORT)
1388 display_symmetric ();
1392 display_directional ();
1403 tab_resize (chisq, 4 + (nvar - 2), -1);
1414 /* Delete missing rows and columns for statistical analysis when
1417 delete_missing (void)
1422 for (r = 0; r < n_rows; r++)
1423 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1427 for (c = 0; c < n_cols; c++)
1428 mat[c + r * n_cols] = 0.;
1436 for (c = 0; c < n_cols; c++)
1437 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1441 for (r = 0; r < n_rows; r++)
1442 mat[c + r * n_cols] = 0.;
1448 /* Prepare table T for submission, and submit it. */
1450 submit (struct tab_table *t)
1457 tab_resize (t, -1, 0);
1458 if (tab_nr (t) == tab_t (t))
1463 tab_offset (t, 0, 0);
1465 for (i = 2; i < nvar; i++)
1466 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1467 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1468 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1469 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1471 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1473 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1474 tab_dim (t, crosstabs_dim);
1478 /* Sets the widths of all the columns and heights of all the rows in
1479 table T for driver D. */
1481 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1485 /* Width of a numerical column. */
1486 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1487 if (cmd.miss == CRS_REPORT)
1488 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1490 /* Set width for header columns. */
1496 w = d->width - c * (t->nc - t->l);
1497 for (i = 0; i <= t->nc; i++)
1501 if (w < d->prop_em_width * 8)
1502 w = d->prop_em_width * 8;
1504 if (w > d->prop_em_width * 15)
1505 w = d->prop_em_width * 15;
1507 for (i = 0; i < t->l; i++)
1511 for (i = t->l; i < t->nc; i++)
1514 for (i = 0; i < t->nr; i++)
1515 t->h[i] = tab_natural_height (t, d, i);
1518 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1519 int *cnt, int pivot);
1520 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1521 int *cnt, int pivot);
1523 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1525 static struct table_entry **
1526 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1528 return (mode == GENERAL
1529 ? find_pivot_extent_general (tp, cnt, pivot)
1530 : find_pivot_extent_integer (tp, cnt, pivot));
1533 /* Find the extent of a region in TP that contains one table. If
1534 PIVOT != 0 that means a set of table entries with identical table
1535 number; otherwise they also have to have the same values for every
1536 dimension after the row and column dimensions. The table that is
1537 searched starts at TP and has length CNT. Returns the first entry
1538 after the last one in the table; sets *CNT to the number of
1539 remaining values. If there are no entries in TP at all, returns
1540 NULL. A yucky interface, admittedly, but it works. */
1541 static struct table_entry **
1542 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1544 struct table_entry *fp = *tp;
1549 x = xtab[(*tp)->table];
1557 if ((*tp)->table != fp->table)
1562 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1569 /* Integer mode correspondent to find_pivot_extent_general(). This
1570 could be optimized somewhat, but I just don't give a crap about
1571 CROSSTABS performance in integer mode, which is just a
1572 CROSSTABS wart as far as I'm concerned.
1574 That said, feel free to send optimization patches to me. */
1575 static struct table_entry **
1576 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1578 struct table_entry *fp = *tp;
1583 x = xtab[(*tp)->table];
1591 if ((*tp)->table != fp->table)
1596 if (memcmp (&(*tp)->values[2], &fp->values[2],
1597 sizeof (union value) * (x->nvar - 2)))
1604 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1605 result. WIDTH_ points to an int which is either 0 for a
1606 numeric value or a string width for a string value. */
1608 compare_value (const void *a_, const void *b_, void *width_)
1610 const union value *a = a_;
1611 const union value *b = b_;
1612 const int *pwidth = width_;
1613 const int width = *pwidth;
1616 return (a->f < b->f) ? -1 : (a->f > b->f);
1618 return strncmp (a->s, b->s, width);
1621 /* Given an array of ENTRY_CNT table_entry structures starting at
1622 ENTRIES, creates a sorted list of the values that the variable
1623 with index VAR_IDX takes on. The values are returned as a
1624 malloc()'darray stored in *VALUES, with the number of values
1625 stored in *VALUE_CNT.
1628 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1629 union value **values, int *value_cnt)
1631 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1633 if (mode == GENERAL)
1635 int width = v->width;
1638 *values = xnmalloc (entry_cnt, sizeof **values);
1639 for (i = 0; i < entry_cnt; i++)
1640 (*values)[i] = entries[i]->values[var_idx];
1641 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1642 compare_value, &width);
1646 struct var_range *vr = get_var_range (v);
1649 assert (mode == INTEGER);
1650 *values = xnmalloc (vr->count, sizeof **values);
1651 for (i = 0; i < vr->count; i++)
1652 (*values)[i].f = i + vr->min;
1653 *value_cnt = vr->count;
1657 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1658 from V, displayed with print format spec from variable VAR. When
1659 in REPORT missing-value mode, missing values have an M appended. */
1661 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1662 const union value *v, const struct variable *var)
1664 struct fixed_string s;
1666 const char *label = val_labs_find (var->val_labs, *v);
1669 tab_text (table, c, r, TAB_LEFT, label);
1673 s.string = tab_alloc (table, var->print.w);
1674 format_short (s.string, &var->print, v);
1675 s.length = strlen (s.string);
1676 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1677 s.string[s.length++] = 'M';
1678 while (s.length && *s.string == ' ')
1683 tab_raw (table, c, r, opt, &s);
1686 /* Draws a line across TABLE at the current row to indicate the most
1687 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1688 that changed, and puts the values that changed into the table. TB
1689 and X must be the corresponding table_entry and crosstab,
1692 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1694 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1696 for (; first_difference >= 2; first_difference--)
1697 table_value_missing (table, nvar - first_difference - 1, 0,
1698 TAB_RIGHT, &tb->values[first_difference],
1699 x->vars[first_difference]);
1702 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1703 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1704 additionally suffixed with a letter `M'. */
1706 format_cell_entry (struct tab_table *table, int c, int r, double value,
1707 char suffix, int mark_missing)
1709 const struct fmt_spec f = {FMT_F, 10, 1};
1711 struct fixed_string s;
1714 s.string = tab_alloc (table, 16);
1716 data_out (s.string, &f, &v);
1717 while (*s.string == ' ')
1723 s.string[s.length++] = suffix;
1725 s.string[s.length++] = 'M';
1727 tab_raw (table, c, r, TAB_RIGHT, &s);
1730 /* Displays the crosstabulation table. */
1732 display_crosstabulation (void)
1737 for (r = 0; r < n_rows; r++)
1738 table_value_missing (table, nvar - 2, r * num_cells,
1739 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1741 tab_text (table, nvar - 2, n_rows * num_cells,
1742 TAB_LEFT, _("Total"));
1744 /* Put in the actual cells. */
1749 tab_offset (table, nvar - 1, -1);
1750 for (r = 0; r < n_rows; r++)
1753 tab_hline (table, TAL_1, -1, n_cols, 0);
1754 for (c = 0; c < n_cols; c++)
1756 int mark_missing = 0;
1757 double expected_value = row_tot[r] * col_tot[c] / W;
1758 if (cmd.miss == CRS_REPORT
1759 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1760 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1763 for (i = 0; i < num_cells; i++)
1774 v = *mp / row_tot[r] * 100.;
1778 v = *mp / col_tot[c] * 100.;
1785 case CRS_CL_EXPECTED:
1788 case CRS_CL_RESIDUAL:
1789 v = *mp - expected_value;
1791 case CRS_CL_SRESIDUAL:
1792 v = (*mp - expected_value) / sqrt (expected_value);
1794 case CRS_CL_ASRESIDUAL:
1795 v = ((*mp - expected_value)
1796 / sqrt (expected_value
1797 * (1. - row_tot[r] / W)
1798 * (1. - col_tot[c] / W)));
1805 format_cell_entry (table, c, i, v, suffix, mark_missing);
1811 tab_offset (table, -1, tab_row (table) + num_cells);
1819 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1820 for (r = 0; r < n_rows; r++)
1823 int mark_missing = 0;
1825 if (cmd.miss == CRS_REPORT
1826 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1829 for (i = 0; i < num_cells; i++)
1843 v = row_tot[r] / W * 100.;
1847 v = row_tot[r] / W * 100.;
1850 case CRS_CL_EXPECTED:
1851 case CRS_CL_RESIDUAL:
1852 case CRS_CL_SRESIDUAL:
1853 case CRS_CL_ASRESIDUAL:
1861 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1862 tab_next_row (table);
1867 /* Column totals, grand total. */
1873 tab_hline (table, TAL_1, -1, n_cols, 0);
1874 for (c = 0; c <= n_cols; c++)
1876 double ct = c < n_cols ? col_tot[c] : W;
1877 int mark_missing = 0;
1881 if (cmd.miss == CRS_REPORT && c < n_cols
1882 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1885 for (i = 0; i < num_cells; i++)
1907 case CRS_CL_EXPECTED:
1908 case CRS_CL_RESIDUAL:
1909 case CRS_CL_SRESIDUAL:
1910 case CRS_CL_ASRESIDUAL:
1917 format_cell_entry (table, c, i, v, suffix, mark_missing);
1922 tab_offset (table, -1, tab_row (table) + last_row);
1925 tab_offset (table, 0, -1);
1928 static void calc_r (double *X, double *Y, double *, double *, double *);
1929 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1931 /* Display chi-square statistics. */
1933 display_chisq (void)
1935 static const char *chisq_stats[N_CHISQ] =
1937 N_("Pearson Chi-Square"),
1938 N_("Likelihood Ratio"),
1939 N_("Fisher's Exact Test"),
1940 N_("Continuity Correction"),
1941 N_("Linear-by-Linear Association"),
1943 double chisq_v[N_CHISQ];
1944 double fisher1, fisher2;
1950 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1952 tab_offset (chisq, nvar - 2, -1);
1954 for (i = 0; i < N_CHISQ; i++)
1956 if ((i != 2 && chisq_v[i] == SYSMIS)
1957 || (i == 2 && fisher1 == SYSMIS))
1961 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1964 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1965 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1966 tab_float (chisq, 3, 0, TAB_RIGHT,
1967 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1972 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1973 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1975 tab_next_row (chisq);
1978 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1979 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1980 tab_next_row (chisq);
1982 tab_offset (chisq, 0, -1);
1985 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1986 double[N_SYMMETRIC]);
1988 /* Display symmetric measures. */
1990 display_symmetric (void)
1992 static const char *categories[] =
1994 N_("Nominal by Nominal"),
1995 N_("Ordinal by Ordinal"),
1996 N_("Interval by Interval"),
1997 N_("Measure of Agreement"),
2000 static const char *stats[N_SYMMETRIC] =
2004 N_("Contingency Coefficient"),
2005 N_("Kendall's tau-b"),
2006 N_("Kendall's tau-c"),
2008 N_("Spearman Correlation"),
2013 static const int stats_categories[N_SYMMETRIC] =
2015 0, 0, 0, 1, 1, 1, 1, 2, 3,
2019 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2022 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2025 tab_offset (sym, nvar - 2, -1);
2027 for (i = 0; i < N_SYMMETRIC; i++)
2029 if (sym_v[i] == SYSMIS)
2032 if (stats_categories[i] != last_cat)
2034 last_cat = stats_categories[i];
2035 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2038 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2039 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2040 if (sym_ase[i] != SYSMIS)
2041 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2042 if (sym_t[i] != SYSMIS)
2043 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2044 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2048 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2049 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2052 tab_offset (sym, 0, -1);
2055 static int calc_risk (double[], double[], double[], union value *);
2057 /* Display risk estimate. */
2062 double risk_v[3], lower[3], upper[3];
2066 if (!calc_risk (risk_v, upper, lower, c))
2069 tab_offset (risk, nvar - 2, -1);
2071 for (i = 0; i < 3; i++)
2073 if (risk_v[i] == SYSMIS)
2079 if (x->vars[COL_VAR]->type == NUMERIC)
2080 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2081 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2083 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2084 x->vars[COL_VAR]->name,
2085 x->vars[COL_VAR]->width, c[0].s,
2086 x->vars[COL_VAR]->width, c[1].s);
2090 if (x->vars[ROW_VAR]->type == NUMERIC)
2091 sprintf (buf, _("For cohort %s = %g"),
2092 x->vars[ROW_VAR]->name, rows[i - 1].f);
2094 sprintf (buf, _("For cohort %s = %.*s"),
2095 x->vars[ROW_VAR]->name,
2096 x->vars[ROW_VAR]->width, rows[i - 1].s);
2100 tab_text (risk, 0, 0, TAB_LEFT, buf);
2101 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2102 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2103 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2104 tab_next_row (risk);
2107 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2108 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2109 tab_next_row (risk);
2111 tab_offset (risk, 0, -1);
2114 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2115 double[N_DIRECTIONAL]);
2117 /* Display directional measures. */
2119 display_directional (void)
2121 static const char *categories[] =
2123 N_("Nominal by Nominal"),
2124 N_("Ordinal by Ordinal"),
2125 N_("Nominal by Interval"),
2128 static const char *stats[] =
2131 N_("Goodman and Kruskal tau"),
2132 N_("Uncertainty Coefficient"),
2137 static const char *types[] =
2144 static const int stats_categories[N_DIRECTIONAL] =
2146 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2149 static const int stats_stats[N_DIRECTIONAL] =
2151 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2154 static const int stats_types[N_DIRECTIONAL] =
2156 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2159 static const int *stats_lookup[] =
2166 static const char **stats_names[] =
2178 double direct_v[N_DIRECTIONAL];
2179 double direct_ase[N_DIRECTIONAL];
2180 double direct_t[N_DIRECTIONAL];
2184 if (!calc_directional (direct_v, direct_ase, direct_t))
2187 tab_offset (direct, nvar - 2, -1);
2189 for (i = 0; i < N_DIRECTIONAL; i++)
2191 if (direct_v[i] == SYSMIS)
2197 for (j = 0; j < 3; j++)
2198 if (last[j] != stats_lookup[j][i])
2201 tab_hline (direct, TAL_1, j, 6, 0);
2206 int k = last[j] = stats_lookup[j][i];
2211 string = x->vars[0]->name;
2213 string = x->vars[1]->name;
2215 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2216 gettext (stats_names[j][k]), string);
2221 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2222 if (direct_ase[i] != SYSMIS)
2223 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2224 if (direct_t[i] != SYSMIS)
2225 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2226 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2227 tab_next_row (direct);
2230 tab_offset (direct, 0, -1);
2233 /* Statistical calculations. */
2235 /* Returns the value of the gamma (factorial) function for an integer
2238 gamma_int (double x)
2243 for (i = 2; i < x; i++)
2248 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2250 static inline double
2251 Pr (int a, int b, int c, int d)
2253 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2254 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2255 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2256 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2257 / gamma_int (a + b + c + d + 1.));
2260 /* Swap the contents of A and B. */
2262 swap (int *a, int *b)
2269 /* Calculate significance for Fisher's exact test as specified in
2270 _SPSS Statistical Algorithms_, Appendix 5. */
2272 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2276 if (min (c, d) < min (a, b))
2277 swap (&a, &c), swap (&b, &d);
2278 if (min (b, d) < min (a, c))
2279 swap (&a, &b), swap (&c, &d);
2283 swap (&a, &b), swap (&c, &d);
2285 swap (&a, &c), swap (&b, &d);
2289 for (x = 0; x <= a; x++)
2290 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2292 *fisher2 = *fisher1;
2293 for (x = 1; x <= b; x++)
2294 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2297 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2298 columns with values COLS and N_ROWS rows with values ROWS. Values
2299 in the matrix sum to W. */
2301 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2302 double *fisher1, double *fisher2)
2306 chisq[0] = chisq[1] = 0.;
2307 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2308 *fisher1 = *fisher2 = SYSMIS;
2310 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2312 if (ns_rows <= 1 || ns_cols <= 1)
2314 chisq[0] = chisq[1] = SYSMIS;
2318 for (r = 0; r < n_rows; r++)
2319 for (c = 0; c < n_cols; c++)
2321 const double expected = row_tot[r] * col_tot[c] / W;
2322 const double freq = mat[n_cols * r + c];
2323 const double residual = freq - expected;
2325 chisq[0] += residual * residual / expected;
2327 chisq[1] += freq * log (expected / freq);
2338 /* Calculate Yates and Fisher exact test. */
2339 if (ns_cols == 2 && ns_rows == 2)
2341 double f11, f12, f21, f22;
2347 for (i = j = 0; i < n_cols; i++)
2348 if (col_tot[i] != 0.)
2357 f11 = mat[nz_cols[0]];
2358 f12 = mat[nz_cols[1]];
2359 f21 = mat[nz_cols[0] + n_cols];
2360 f22 = mat[nz_cols[1] + n_cols];
2365 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2368 chisq[3] = (W * x * x
2369 / (f11 + f12) / (f21 + f22)
2370 / (f11 + f21) / (f12 + f22));
2378 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2379 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2382 /* Calculate Mantel-Haenszel. */
2383 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2385 double r, ase_0, ase_1;
2386 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2388 chisq[4] = (W - 1.) * r * r;
2393 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2394 ASE_1, and ase_0 into ASE_0. The row and column values must be
2395 passed in X and Y. */
2397 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2399 double SX, SY, S, T;
2401 double sum_XYf, sum_X2Y2f;
2402 double sum_Xr, sum_X2r;
2403 double sum_Yc, sum_Y2c;
2406 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2407 for (j = 0; j < n_cols; j++)
2409 double fij = mat[j + i * n_cols];
2410 double product = X[i] * Y[j];
2411 double temp = fij * product;
2413 sum_X2Y2f += temp * product;
2416 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2418 sum_Xr += X[i] * row_tot[i];
2419 sum_X2r += X[i] * X[i] * row_tot[i];
2423 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2425 sum_Yc += Y[i] * col_tot[i];
2426 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2430 S = sum_XYf - sum_Xr * sum_Yc / W;
2431 SX = sum_X2r - sum_Xr * sum_Xr / W;
2432 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2435 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2440 for (s = c = 0., i = 0; i < n_rows; i++)
2441 for (j = 0; j < n_cols; j++)
2443 double Xresid, Yresid;
2446 Xresid = X[i] - Xbar;
2447 Yresid = Y[j] - Ybar;
2448 temp = (T * Xresid * Yresid
2450 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2451 y = mat[j + i * n_cols] * temp * temp - c;
2456 *ase_1 = sqrt (s) / (T * T);
2460 static double somers_d_v[3];
2461 static double somers_d_ase[3];
2462 static double somers_d_t[3];
2464 /* Calculate symmetric statistics and their asymptotic standard
2465 errors. Returns 0 if none could be calculated. */
2467 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2468 double t[N_SYMMETRIC])
2470 int q = min (ns_rows, ns_cols);
2479 for (i = 0; i < N_SYMMETRIC; i++)
2480 v[i] = ase[i] = t[i] = SYSMIS;
2483 /* Phi, Cramer's V, contingency coefficient. */
2484 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2486 double Xp = 0.; /* Pearson chi-square. */
2491 for (r = 0; r < n_rows; r++)
2492 for (c = 0; c < n_cols; c++)
2494 const double expected = row_tot[r] * col_tot[c] / W;
2495 const double freq = mat[n_cols * r + c];
2496 const double residual = freq - expected;
2498 Xp += residual * residual / expected;
2502 if (cmd.a_statistics[CRS_ST_PHI])
2504 v[0] = sqrt (Xp / W);
2505 v[1] = sqrt (Xp / (W * (q - 1)));
2507 if (cmd.a_statistics[CRS_ST_CC])
2508 v[2] = sqrt (Xp / (Xp + W));
2511 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2512 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2517 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2524 for (r = 0; r < n_rows; r++)
2525 Dr -= row_tot[r] * row_tot[r];
2526 for (c = 0; c < n_cols; c++)
2527 Dc -= col_tot[c] * col_tot[c];
2533 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2534 for (c = 0; c < n_cols; c++)
2538 for (r = 0; r < n_rows; r++)
2539 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2549 for (i = 0; i < n_rows; i++)
2553 for (j = 1; j < n_cols; j++)
2554 Cij += col_tot[j] - cum[j + i * n_cols];
2557 for (j = 1; j < n_cols; j++)
2558 Dij += cum[j + (i - 1) * n_cols];
2562 double fij = mat[j + i * n_cols];
2568 assert (j < n_cols);
2570 Cij -= col_tot[j] - cum[j + i * n_cols];
2571 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2575 Cij += cum[j - 1 + (i - 1) * n_cols];
2576 Dij -= cum[j + (i - 1) * n_cols];
2582 if (cmd.a_statistics[CRS_ST_BTAU])
2583 v[3] = (P - Q) / sqrt (Dr * Dc);
2584 if (cmd.a_statistics[CRS_ST_CTAU])
2585 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2586 if (cmd.a_statistics[CRS_ST_GAMMA])
2587 v[5] = (P - Q) / (P + Q);
2589 /* ASE for tau-b, tau-c, gamma. Calculations could be
2590 eliminated here, at expense of memory. */
2595 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2596 for (i = 0; i < n_rows; i++)
2600 for (j = 1; j < n_cols; j++)
2601 Cij += col_tot[j] - cum[j + i * n_cols];
2604 for (j = 1; j < n_cols; j++)
2605 Dij += cum[j + (i - 1) * n_cols];
2609 double fij = mat[j + i * n_cols];
2611 if (cmd.a_statistics[CRS_ST_BTAU])
2613 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2614 + v[3] * (row_tot[i] * Dc
2615 + col_tot[j] * Dr));
2616 btau_cum += fij * temp * temp;
2620 const double temp = Cij - Dij;
2621 ctau_cum += fij * temp * temp;
2624 if (cmd.a_statistics[CRS_ST_GAMMA])
2626 const double temp = Q * Cij - P * Dij;
2627 gamma_cum += fij * temp * temp;
2630 if (cmd.a_statistics[CRS_ST_D])
2632 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2633 - (P - Q) * (W - row_tot[i]));
2634 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2635 - (Q - P) * (W - col_tot[j]));
2640 assert (j < n_cols);
2642 Cij -= col_tot[j] - cum[j + i * n_cols];
2643 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2647 Cij += cum[j - 1 + (i - 1) * n_cols];
2648 Dij -= cum[j + (i - 1) * n_cols];
2654 btau_var = ((btau_cum
2655 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2657 if (cmd.a_statistics[CRS_ST_BTAU])
2659 ase[3] = sqrt (btau_var);
2660 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2663 if (cmd.a_statistics[CRS_ST_CTAU])
2665 ase[4] = ((2 * q / ((q - 1) * W * W))
2666 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2667 t[4] = v[4] / ase[4];
2669 if (cmd.a_statistics[CRS_ST_GAMMA])
2671 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2672 t[5] = v[5] / (2. / (P + Q)
2673 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2675 if (cmd.a_statistics[CRS_ST_D])
2677 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2678 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2679 somers_d_t[0] = (somers_d_v[0]
2681 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2682 somers_d_v[1] = (P - Q) / Dc;
2683 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2684 somers_d_t[1] = (somers_d_v[1]
2686 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2687 somers_d_v[2] = (P - Q) / Dr;
2688 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2689 somers_d_t[2] = (somers_d_v[2]
2691 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2697 /* Spearman correlation, Pearson's r. */
2698 if (cmd.a_statistics[CRS_ST_CORR])
2700 double *R = local_alloc (sizeof *R * n_rows);
2701 double *C = local_alloc (sizeof *C * n_cols);
2704 double y, t, c = 0., s = 0.;
2709 R[i] = s + (row_tot[i] + 1.) / 2.;
2716 assert (i < n_rows);
2721 double y, t, c = 0., s = 0.;
2726 C[j] = s + (col_tot[j] + 1.) / 2;
2733 assert (j < n_cols);
2737 calc_r (R, C, &v[6], &t[6], &ase[6]);
2743 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2747 /* Cohen's kappa. */
2748 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2750 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2753 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2754 i < ns_rows; i++, j++)
2758 while (col_tot[j] == 0.)
2761 prod = row_tot[i] * col_tot[j];
2762 sum = row_tot[i] + col_tot[j];
2764 sum_fii += mat[j + i * n_cols];
2766 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2767 sum_riciri_ci += prod * sum;
2769 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2770 for (j = 0; j < ns_cols; j++)
2772 double sum = row_tot[i] + col_tot[j];
2773 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2776 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2778 ase[8] = sqrt ((W * W * sum_rici
2779 + sum_rici * sum_rici
2780 - W * sum_riciri_ci)
2781 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2783 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2784 / pow2 (W * W - sum_rici))
2785 + ((2. * (W - sum_fii)
2786 * (2. * sum_fii * sum_rici
2787 - W * sum_fiiri_ci))
2788 / cube (W * W - sum_rici))
2789 + (pow2 (W - sum_fii)
2790 * (W * sum_fijri_ci2 - 4.
2791 * sum_rici * sum_rici)
2792 / pow4 (W * W - sum_rici))));
2794 t[8] = v[8] / ase[8];
2801 /* Calculate risk estimate. */
2803 calc_risk (double *value, double *upper, double *lower, union value *c)
2805 double f11, f12, f21, f22;
2811 for (i = 0; i < 3; i++)
2812 value[i] = upper[i] = lower[i] = SYSMIS;
2815 if (ns_rows != 2 || ns_cols != 2)
2822 for (i = j = 0; i < n_cols; i++)
2823 if (col_tot[i] != 0.)
2832 f11 = mat[nz_cols[0]];
2833 f12 = mat[nz_cols[1]];
2834 f21 = mat[nz_cols[0] + n_cols];
2835 f22 = mat[nz_cols[1] + n_cols];
2837 c[0] = cols[nz_cols[0]];
2838 c[1] = cols[nz_cols[1]];
2841 value[0] = (f11 * f22) / (f12 * f21);
2842 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2843 lower[0] = value[0] * exp (-1.960 * v);
2844 upper[0] = value[0] * exp (1.960 * v);
2846 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2847 v = sqrt ((f12 / (f11 * (f11 + f12)))
2848 + (f22 / (f21 * (f21 + f22))));
2849 lower[1] = value[1] * exp (-1.960 * v);
2850 upper[1] = value[1] * exp (1.960 * v);
2852 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2853 v = sqrt ((f11 / (f12 * (f11 + f12)))
2854 + (f21 / (f22 * (f21 + f22))));
2855 lower[2] = value[2] * exp (-1.960 * v);
2856 upper[2] = value[2] * exp (1.960 * v);
2861 /* Calculate directional measures. */
2863 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2864 double t[N_DIRECTIONAL])
2869 for (i = 0; i < N_DIRECTIONAL; i++)
2870 v[i] = ase[i] = t[i] = SYSMIS;
2874 if (cmd.a_statistics[CRS_ST_LAMBDA])
2876 double *fim = xnmalloc (n_rows, sizeof *fim);
2877 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2878 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2879 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2880 double sum_fim, sum_fmj;
2882 int rm_index, cm_index;
2885 /* Find maximum for each row and their sum. */
2886 for (sum_fim = 0., i = 0; i < n_rows; i++)
2888 double max = mat[i * n_cols];
2891 for (j = 1; j < n_cols; j++)
2892 if (mat[j + i * n_cols] > max)
2894 max = mat[j + i * n_cols];
2898 sum_fim += fim[i] = max;
2899 fim_index[i] = index;
2902 /* Find maximum for each column. */
2903 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2905 double max = mat[j];
2908 for (i = 1; i < n_rows; i++)
2909 if (mat[j + i * n_cols] > max)
2911 max = mat[j + i * n_cols];
2915 sum_fmj += fmj[j] = max;
2916 fmj_index[j] = index;
2919 /* Find maximum row total. */
2922 for (i = 1; i < n_rows; i++)
2923 if (row_tot[i] > rm)
2929 /* Find maximum column total. */
2932 for (j = 1; j < n_cols; j++)
2933 if (col_tot[j] > cm)
2939 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2940 v[1] = (sum_fmj - rm) / (W - rm);
2941 v[2] = (sum_fim - cm) / (W - cm);
2943 /* ASE1 for Y given X. */
2947 for (accum = 0., i = 0; i < n_rows; i++)
2948 for (j = 0; j < n_cols; j++)
2950 const int deltaj = j == cm_index;
2951 accum += (mat[j + i * n_cols]
2952 * pow2 ((j == fim_index[i])
2957 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2960 /* ASE0 for Y given X. */
2964 for (accum = 0., i = 0; i < n_rows; i++)
2965 if (cm_index != fim_index[i])
2966 accum += (mat[i * n_cols + fim_index[i]]
2967 + mat[i * n_cols + cm_index]);
2968 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2971 /* ASE1 for X given Y. */
2975 for (accum = 0., i = 0; i < n_rows; i++)
2976 for (j = 0; j < n_cols; j++)
2978 const int deltaj = i == rm_index;
2979 accum += (mat[j + i * n_cols]
2980 * pow2 ((i == fmj_index[j])
2985 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2988 /* ASE0 for X given Y. */
2992 for (accum = 0., j = 0; j < n_cols; j++)
2993 if (rm_index != fmj_index[j])
2994 accum += (mat[j + n_cols * fmj_index[j]]
2995 + mat[j + n_cols * rm_index]);
2996 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2999 /* Symmetric ASE0 and ASE1. */
3004 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3005 for (j = 0; j < n_cols; j++)
3007 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3008 int temp1 = (i == rm_index) + (j == cm_index);
3009 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3010 accum1 += (mat[j + i * n_cols]
3011 * pow2 (temp0 + (v[0] - 1.) * temp1));
3013 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3014 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3015 / (2. * W - rm - cm));
3024 double sum_fij2_ri, sum_fij2_ci;
3025 double sum_ri2, sum_cj2;
3027 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3028 for (j = 0; j < n_cols; j++)
3030 double temp = pow2 (mat[j + i * n_cols]);
3031 sum_fij2_ri += temp / row_tot[i];
3032 sum_fij2_ci += temp / col_tot[j];
3035 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3036 sum_ri2 += row_tot[i] * row_tot[i];
3038 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3039 sum_cj2 += col_tot[j] * col_tot[j];
3041 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3042 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3046 if (cmd.a_statistics[CRS_ST_UC])
3048 double UX, UY, UXY, P;
3049 double ase1_yx, ase1_xy, ase1_sym;
3052 for (UX = 0., i = 0; i < n_rows; i++)
3053 if (row_tot[i] > 0.)
3054 UX -= row_tot[i] / W * log (row_tot[i] / W);
3056 for (UY = 0., j = 0; j < n_cols; j++)
3057 if (col_tot[j] > 0.)
3058 UY -= col_tot[j] / W * log (col_tot[j] / W);
3060 for (UXY = P = 0., i = 0; i < n_rows; i++)
3061 for (j = 0; j < n_cols; j++)
3063 double entry = mat[j + i * n_cols];
3068 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3069 UXY -= entry / W * log (entry / W);
3072 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3073 for (j = 0; j < n_cols; j++)
3075 double entry = mat[j + i * n_cols];
3080 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3081 + (UX - UXY) * log (col_tot[j] / W));
3082 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3083 + (UY - UXY) * log (row_tot[i] / W));
3084 ase1_sym += entry * pow2 ((UXY
3085 * log (row_tot[i] * col_tot[j] / (W * W)))
3086 - (UX + UY) * log (entry / W));
3089 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3090 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3091 t[5] = v[5] / ((2. / (W * (UX + UY)))
3092 * sqrt (P - pow2 (UX + UY - UXY) / W));
3094 v[6] = (UX + UY - UXY) / UX;
3095 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3096 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3098 v[7] = (UX + UY - UXY) / UY;
3099 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3100 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3104 if (cmd.a_statistics[CRS_ST_D])
3109 calc_symmetric (NULL, NULL, NULL);
3110 for (i = 0; i < 3; i++)
3112 v[8 + i] = somers_d_v[i];
3113 ase[8 + i] = somers_d_ase[i];
3114 t[8 + i] = somers_d_t[i];
3119 if (cmd.a_statistics[CRS_ST_ETA])
3122 double sum_Xr, sum_X2r;
3126 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3128 sum_Xr += rows[i].f * row_tot[i];
3129 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3131 SX = sum_X2r - sum_Xr * sum_Xr / W;
3133 for (SXW = 0., j = 0; j < n_cols; j++)
3137 for (cum = 0., i = 0; i < n_rows; i++)
3139 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3140 cum += rows[i].f * mat[j + i * n_cols];
3143 SXW -= cum * cum / col_tot[j];
3145 v[11] = sqrt (1. - SXW / SX);
3149 double sum_Yc, sum_Y2c;
3153 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3155 sum_Yc += cols[i].f * col_tot[i];
3156 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3158 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3160 for (SYW = 0., i = 0; i < n_rows; i++)
3164 for (cum = 0., j = 0; j < n_cols; j++)
3166 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3167 cum += cols[j].f * mat[j + i * n_cols];
3170 SYW -= cum * cum / row_tot[i];
3172 v[12] = sqrt (1. - SYW / SY);
3179 /* A wrapper around data_out() that limits string output to short
3180 string width and null terminates the result. */
3182 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3184 struct fmt_spec fmt_subst;
3186 /* Limit to short string width. */
3187 if (formats[fp->type].cat & FCAT_STRING)
3191 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3192 if (fmt_subst.type == FMT_A)
3193 fmt_subst.w = min (8, fmt_subst.w);
3195 fmt_subst.w = min (16, fmt_subst.w);
3201 data_out (s, fp, v);
3203 /* Null terminate. */