1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
33 #include <libpspp/message.h>
37 #include <gsl/gsl_cdf.h>
38 #include <libpspp/array.h>
39 #include <libpspp/alloc.h>
40 #include <data/case.h>
41 #include <data/dictionary.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/pool.h>
44 #include <language/command.h>
45 #include <libpspp/compiler.h>
46 #include <language/lexer/lexer.h>
47 #include <libpspp/message.h>
48 #include <libpspp/magic.h>
49 #include <libpspp/misc.h>
50 #include <output/output.h>
51 #include <libpspp/str.h>
52 #include <output/table.h>
53 #include <data/value-labels.h>
54 #include <data/variable.h>
55 #include <procedure.h>
58 #define _(msgid) gettext (msgid)
59 #define N_(msgid) msgid
63 #include <libpspp/debug-print.h>
69 +missing=miss:!table/include/report;
70 +write[wr_]=none,cells,all;
71 +format=fmt:!labels/nolabels/novallabs,
74 tabl:!tables/notables,
77 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
79 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
85 /* Number of chi-square statistics. */
88 /* Number of symmetric statistics. */
91 /* Number of directional statistics. */
92 #define N_DIRECTIONAL 13
94 /* A single table entry for general mode. */
97 int table; /* Flattened table number. */
100 double freq; /* Frequency count. */
101 double *data; /* Crosstabulation table for integer mode. */
104 union value values[1]; /* Values. */
107 /* A crosstabulation. */
110 int nvar; /* Number of variables. */
111 double missing; /* Missing cases count. */
112 int ofs; /* Integer mode: Offset into sorted_tab[]. */
113 struct variable *vars[2]; /* At least two variables; sorted by
114 larger indices first. */
117 /* Integer mode variable info. */
120 int min; /* Minimum value. */
121 int max; /* Maximum value + 1. */
122 int count; /* max - min. */
125 static inline struct var_range *
126 get_var_range (struct variable *v)
129 assert (v->aux != NULL);
133 /* Indexes into crosstab.v. */
140 /* General mode crosstabulation table. */
141 static struct hsh_table *gen_tab; /* Hash table. */
142 static int n_sorted_tab; /* Number of entries in sorted_tab. */
143 static struct table_entry **sorted_tab; /* Sorted table. */
145 /* Variables specifies on VARIABLES. */
146 static struct variable **variables;
147 static size_t variables_cnt;
150 static struct crosstab **xtab;
153 /* Integer or general mode? */
162 static int num_cells; /* Number of cells requested. */
163 static int cells[8]; /* Cells requested. */
166 static int write; /* One of WR_* that specifies the WRITE style. */
168 /* Command parsing info. */
169 static struct cmd_crosstabs cmd;
172 static struct pool *pl_tc; /* For table cells. */
173 static struct pool *pl_col; /* For column data. */
175 static int internal_cmd_crosstabs (void);
176 static void precalc (void *);
177 static bool calc_general (struct ccase *, void *);
178 static bool calc_integer (struct ccase *, void *);
179 static void postcalc (void *);
180 static void submit (struct tab_table *);
182 static void format_short (char *s, const struct fmt_spec *fp,
183 const union value *v);
185 /* Parse and execute CROSSTABS, then clean up. */
189 int result = internal_cmd_crosstabs ();
192 pool_destroy (pl_tc);
193 pool_destroy (pl_col);
198 /* Parses and executes the CROSSTABS procedure. */
200 internal_cmd_crosstabs (void)
209 pl_tc = pool_create ();
210 pl_col = pool_create ();
212 if (!parse_crosstabs (&cmd))
215 mode = variables ? INTEGER : GENERAL;
220 cmd.a_cells[CRS_CL_COUNT] = 1;
226 for (i = 0; i < CRS_CL_count; i++)
231 cmd.a_cells[CRS_CL_COUNT] = 1;
232 cmd.a_cells[CRS_CL_ROW] = 1;
233 cmd.a_cells[CRS_CL_COLUMN] = 1;
234 cmd.a_cells[CRS_CL_TOTAL] = 1;
236 if (cmd.a_cells[CRS_CL_ALL])
238 for (i = 0; i < CRS_CL_count; i++)
240 cmd.a_cells[CRS_CL_ALL] = 0;
242 cmd.a_cells[CRS_CL_NONE] = 0;
244 for (num_cells = i = 0; i < CRS_CL_count; i++)
246 cells[num_cells++] = i;
249 if (cmd.sbc_statistics)
254 for (i = 0; i < CRS_ST_count; i++)
255 if (cmd.a_statistics[i])
258 cmd.a_statistics[CRS_ST_CHISQ] = 1;
259 if (cmd.a_statistics[CRS_ST_ALL])
260 for (i = 0; i < CRS_ST_count; i++)
261 cmd.a_statistics[i] = 1;
265 if (cmd.miss == CRS_REPORT && mode == GENERAL)
267 msg (SE, _("Missing mode REPORT not allowed in general mode. "
268 "Assuming MISSING=TABLE."));
269 cmd.miss = CRS_TABLE;
273 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
274 cmd.a_write[CRS_WR_ALL] = 0;
275 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
277 msg (SE, _("Write mode ALL not allowed in general mode. "
278 "Assuming WRITE=CELLS."));
279 cmd.a_write[CRS_WR_CELLS] = 1;
282 && (cmd.a_write[CRS_WR_NONE]
283 + cmd.a_write[CRS_WR_ALL]
284 + cmd.a_write[CRS_WR_CELLS] == 0))
285 cmd.a_write[CRS_WR_CELLS] = 1;
286 if (cmd.a_write[CRS_WR_CELLS])
287 write = CRS_WR_CELLS;
288 else if (cmd.a_write[CRS_WR_ALL])
293 ok = procedure_with_splits (precalc,
294 mode == GENERAL ? calc_general : calc_integer,
297 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
300 /* Parses the TABLES subcommand. */
302 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
304 struct var_set *var_set;
306 struct variable ***by = NULL;
307 size_t *by_nvar = NULL;
311 /* Ensure that this is a TABLES subcommand. */
312 if (!lex_match_id ("TABLES")
313 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
318 if (variables != NULL)
319 var_set = var_set_create_from_array (variables, variables_cnt);
321 var_set = var_set_create_from_dict (default_dict);
322 assert (var_set != NULL);
326 by = xnrealloc (by, n_by + 1, sizeof *by);
327 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
328 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
329 PV_NO_DUPLICATE | PV_NO_SCRATCH))
331 if (xalloc_oversized (nx, by_nvar[n_by]))
333 msg (SE, _("Too many crosstabulation variables or dimensions."));
339 if (!lex_match (T_BY))
343 lex_error (_("expecting BY"));
352 int *by_iter = xcalloc (n_by, sizeof *by_iter);
355 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
356 for (i = 0; i < nx; i++)
360 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
367 for (i = 0; i < n_by; i++)
368 x->vars[i] = by[i][by_iter[i]];
374 for (i = n_by - 1; i >= 0; i--)
376 if (++by_iter[i] < by_nvar[i])
389 /* All return paths lead here. */
393 for (i = 0; i < n_by; i++)
399 var_set_destroy (var_set);
404 /* Parses the VARIABLES subcommand. */
406 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
410 msg (SE, _("VARIABLES must be specified before TABLES."));
418 size_t orig_nv = variables_cnt;
423 if (!parse_variables (default_dict, &variables, &variables_cnt,
424 (PV_APPEND | PV_NUMERIC
425 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
430 lex_error ("expecting `('");
435 if (!lex_force_int ())
437 min = lex_integer ();
442 if (!lex_force_int ())
444 max = lex_integer ();
447 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
455 lex_error ("expecting `)'");
460 for (i = orig_nv; i < variables_cnt; i++)
462 struct var_range *vr = xmalloc (sizeof *vr);
465 vr->count = max - min + 1;
466 var_attach_aux (variables[i], vr, var_dtor_free);
481 /* Data file processing. */
483 static int compare_table_entry (const void *, const void *, void *);
484 static unsigned hash_table_entry (const void *, void *);
486 /* Set up the crosstabulation tables for processing. */
488 precalc (void *aux UNUSED)
492 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
502 for (i = 0; i < nxtab; i++)
504 struct crosstab *x = xtab[i];
509 x->ofs = n_sorted_tab;
511 for (j = 2; j < x->nvar; j++)
512 count *= get_var_range (x->vars[j - 2])->count;
514 sorted_tab = xnrealloc (sorted_tab,
515 n_sorted_tab + count, sizeof *sorted_tab);
516 v = local_alloc (sizeof *v * x->nvar);
517 for (j = 2; j < x->nvar; j++)
518 v[j] = get_var_range (x->vars[j])->min;
519 for (j = 0; j < count; j++)
521 struct table_entry *te;
524 te = sorted_tab[n_sorted_tab++]
525 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
529 int row_cnt = get_var_range (x->vars[0])->count;
530 int col_cnt = get_var_range (x->vars[1])->count;
531 const int mat_size = row_cnt * col_cnt;
534 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
535 for (m = 0; m < mat_size; m++)
539 for (k = 2; k < x->nvar; k++)
540 te->values[k].f = v[k];
541 for (k = 2; k < x->nvar; k++)
543 struct var_range *vr = get_var_range (x->vars[k]);
544 if (++v[k] >= vr->max)
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + 1, sizeof *sorted_tab);
555 sorted_tab[n_sorted_tab] = NULL;
559 /* Form crosstabulations for general mode. */
561 calc_general (struct ccase *c, void *aux UNUSED)
566 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
568 /* Flattened current table index. */
571 for (t = 0; t < nxtab; t++)
573 struct crosstab *x = xtab[t];
574 const size_t entry_size = (sizeof (struct table_entry)
575 + sizeof (union value) * (x->nvar - 1));
576 struct table_entry *te = local_alloc (entry_size);
578 /* Construct table entry for the current record and table. */
584 for (j = 0; j < x->nvar; j++)
586 const union value *v = case_data (c, x->vars[j]->fv);
587 const struct missing_values *mv = &x->vars[j]->miss;
588 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
589 || (cmd.miss == CRS_INCLUDE
590 && mv_is_value_system_missing (mv, v)))
592 x->missing += weight;
596 if (x->vars[j]->type == NUMERIC)
597 te->values[j].f = case_num (c, x->vars[j]->fv);
600 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
603 /* Necessary in order to simplify comparisons. */
604 memset (&te->values[j].s[x->vars[j]->width], 0,
605 sizeof (union value) - x->vars[j]->width);
610 /* Add record to hash table. */
612 struct table_entry **tepp
613 = (struct table_entry **) hsh_probe (gen_tab, te);
616 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
619 memcpy (tep, te, entry_size);
624 (*tepp)->u.freq += weight;
635 calc_integer (struct ccase *c, void *aux UNUSED)
640 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
642 /* Flattened current table index. */
645 for (t = 0; t < nxtab; t++)
647 struct crosstab *x = xtab[t];
652 for (i = 0; i < x->nvar; i++)
654 struct variable *const v = x->vars[i];
655 struct var_range *vr = get_var_range (v);
656 double value = case_num (c, v->fv);
658 /* Note that the first test also rules out SYSMIS. */
659 if ((value < vr->min || value >= vr->max)
660 || (cmd.miss == CRS_TABLE
661 && mv_is_num_user_missing (&v->miss, value)))
663 x->missing += weight;
669 ofs += fact * ((int) value - vr->min);
675 struct variable *row_var = x->vars[ROW_VAR];
676 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
678 struct variable *col_var = x->vars[COL_VAR];
679 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
681 const int col_dim = get_var_range (col_var)->count;
683 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
692 /* Compare the table_entry's at A and B and return a strcmp()-type
695 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
697 const struct table_entry *a = a_;
698 const struct table_entry *b = b_;
700 if (a->table > b->table)
702 else if (a->table < b->table)
706 const struct crosstab *x = xtab[a->table];
709 for (i = x->nvar - 1; i >= 0; i--)
710 if (x->vars[i]->type == NUMERIC)
712 const double diffnum = a->values[i].f - b->values[i].f;
715 else if (diffnum > 0)
720 assert (x->vars[i]->type == ALPHA);
722 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
733 /* Calculate a hash value from table_entry A. */
735 hash_table_entry (const void *a_, void *foo UNUSED)
737 const struct table_entry *a = a_;
742 for (i = 0; i < xtab[a->table]->nvar; i++)
743 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
748 /* Post-data reading calculations. */
750 static struct table_entry **find_pivot_extent (struct table_entry **,
751 int *cnt, int pivot);
752 static void enum_var_values (struct table_entry **entries, int entry_cnt,
754 union value **values, int *value_cnt);
755 static void output_pivot_table (struct table_entry **, struct table_entry **,
756 double **, double **, double **,
757 int *, int *, int *);
758 static void make_summary_table (void);
761 postcalc (void *aux UNUSED)
765 n_sorted_tab = hsh_count (gen_tab);
766 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
769 make_summary_table ();
771 /* Identify all the individual crosstabulation tables, and deal with
774 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
775 int pc = n_sorted_tab; /* Pivot count. */
777 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
778 int maxrows = 0, maxcols = 0, maxcells = 0;
782 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
786 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
787 &maxrows, &maxcols, &maxcells);
796 hsh_destroy (gen_tab);
799 static void insert_summary (struct tab_table *, int tab_index, double valid);
801 /* Output a table summarizing the cases processed. */
803 make_summary_table (void)
805 struct tab_table *summary;
807 struct table_entry **pb = sorted_tab, **pe;
808 int pc = n_sorted_tab;
811 summary = tab_create (7, 3 + nxtab, 1);
812 tab_title (summary, 0, _("Summary."));
813 tab_headers (summary, 1, 0, 3, 0);
814 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
815 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
816 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
817 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
818 tab_hline (summary, TAL_1, 1, 6, 1);
819 tab_hline (summary, TAL_1, 1, 6, 2);
820 tab_vline (summary, TAL_1, 3, 1, 1);
821 tab_vline (summary, TAL_1, 5, 1, 1);
825 for (i = 0; i < 3; i++)
827 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
828 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
831 tab_offset (summary, 0, 3);
837 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
841 while (cur_tab < (*pb)->table)
842 insert_summary (summary, cur_tab++, 0.);
845 for (valid = 0.; pb < pe; pb++)
846 valid += (*pb)->u.freq;
849 const struct crosstab *const x = xtab[(*pb)->table];
850 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
851 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
852 const int count = n_cols * n_rows;
854 for (valid = 0.; pb < pe; pb++)
856 const double *data = (*pb)->u.data;
859 for (i = 0; i < count; i++)
863 insert_summary (summary, cur_tab++, valid);
868 while (cur_tab < nxtab)
869 insert_summary (summary, cur_tab++, 0.);
874 /* Inserts a line into T describing the crosstabulation at index
875 TAB_INDEX, which has VALID valid observations. */
877 insert_summary (struct tab_table *t, int tab_index, double valid)
879 struct crosstab *x = xtab[tab_index];
881 tab_hline (t, TAL_1, 0, 6, 0);
883 /* Crosstabulation name. */
885 char *buf = local_alloc (128 * x->nvar);
889 for (i = 0; i < x->nvar; i++)
892 cp = stpcpy (cp, " * ");
895 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
897 tab_text (t, 0, 0, TAB_LEFT, buf);
902 /* Counts and percentages. */
912 for (i = 0; i < 3; i++)
914 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
915 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
926 static struct tab_table *table; /* Crosstabulation table. */
927 static struct tab_table *chisq; /* Chi-square table. */
928 static struct tab_table *sym; /* Symmetric measures table. */
929 static struct tab_table *risk; /* Risk estimate table. */
930 static struct tab_table *direct; /* Directional measures table. */
933 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
935 /* Column values, number of columns. */
936 static union value *cols;
939 /* Row values, number of rows. */
940 static union value *rows;
943 /* Number of statistically interesting columns/rows (columns/rows with
945 static int ns_cols, ns_rows;
947 /* Crosstabulation. */
948 static struct crosstab *x;
950 /* Number of variables from the crosstabulation to consider. This is
951 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
954 /* Matrix contents. */
955 static double *mat; /* Matrix proper. */
956 static double *row_tot; /* Row totals. */
957 static double *col_tot; /* Column totals. */
958 static double W; /* Grand total. */
960 static void display_dimensions (struct tab_table *, int first_difference,
961 struct table_entry *);
962 static void display_crosstabulation (void);
963 static void display_chisq (void);
964 static void display_symmetric (void);
965 static void display_risk (void);
966 static void display_directional (void);
967 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
968 static void table_value_missing (struct tab_table *table, int c, int r,
969 unsigned char opt, const union value *v,
970 const struct variable *var);
971 static void delete_missing (void);
973 /* Output pivot table beginning at PB and continuing until PE,
974 exclusive. For efficiency, *MATP is a pointer to a matrix that can
975 hold *MAXROWS entries. */
977 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
978 double **matp, double **row_totp, double **col_totp,
979 int *maxrows, int *maxcols, int *maxcells)
982 struct table_entry **tb = pb, **te; /* Table begin, table end. */
983 int tc = pe - pb; /* Table count. */
985 /* Table entry for header comparison. */
986 struct table_entry *cmp = NULL;
988 x = xtab[(*pb)->table];
989 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
991 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
993 /* Crosstabulation table initialization. */
996 table = tab_create (nvar + n_cols,
997 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
998 tab_headers (table, nvar - 1, 0, 2, 0);
1000 /* First header line. */
1001 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1002 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1004 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1006 /* Second header line. */
1010 for (i = 2; i < nvar; i++)
1011 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1012 TAB_RIGHT | TAT_TITLE,
1014 ? x->vars[i]->label : x->vars[i]->name));
1015 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1016 x->vars[ROW_VAR]->name);
1017 for (i = 0; i < n_cols; i++)
1018 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1020 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1023 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1024 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1028 char *title = local_alloc (x->nvar * 64 + 128);
1032 if (cmd.pivot == CRS_PIVOT)
1033 for (i = 0; i < nvar; i++)
1036 cp = stpcpy (cp, " by ");
1037 cp = stpcpy (cp, x->vars[i]->name);
1041 cp = spprintf (cp, "%s by %s for",
1042 x->vars[0]->name, x->vars[1]->name);
1043 for (i = 2; i < nvar; i++)
1045 char buf[64], *bufp;
1050 cp = stpcpy (cp, x->vars[i]->name);
1052 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1053 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1055 cp = stpcpy (cp, bufp);
1059 cp = stpcpy (cp, " [");
1060 for (i = 0; i < num_cells; i++)
1068 static const struct tuple cell_names[] =
1070 {CRS_CL_COUNT, N_("count")},
1071 {CRS_CL_ROW, N_("row %")},
1072 {CRS_CL_COLUMN, N_("column %")},
1073 {CRS_CL_TOTAL, N_("total %")},
1074 {CRS_CL_EXPECTED, N_("expected")},
1075 {CRS_CL_RESIDUAL, N_("residual")},
1076 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1077 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1081 const struct tuple *t;
1083 for (t = cell_names; t->value != cells[i]; t++)
1084 assert (t->value != -1);
1086 cp = stpcpy (cp, ", ");
1087 cp = stpcpy (cp, gettext (t->name));
1091 tab_title (table, 0, title);
1095 tab_offset (table, 0, 2);
1100 /* Chi-square table initialization. */
1101 if (cmd.a_statistics[CRS_ST_CHISQ])
1103 chisq = tab_create (6 + (nvar - 2),
1104 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1105 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1107 tab_title (chisq, 0, "Chi-square tests.");
1109 tab_offset (chisq, nvar - 2, 0);
1110 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1111 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1112 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1113 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1114 _("Asymp. Sig. (2-sided)"));
1115 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1116 _("Exact. Sig. (2-sided)"));
1117 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1118 _("Exact. Sig. (1-sided)"));
1120 tab_offset (chisq, 0, 1);
1125 /* Symmetric measures. */
1126 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1127 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1128 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1129 || cmd.a_statistics[CRS_ST_KAPPA])
1131 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1132 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1133 tab_title (sym, 0, "Symmetric measures.");
1135 tab_offset (sym, nvar - 2, 0);
1136 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1137 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1138 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1139 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1140 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1141 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1142 tab_offset (sym, 0, 1);
1147 /* Risk estimate. */
1148 if (cmd.a_statistics[CRS_ST_RISK])
1150 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1151 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1152 tab_title (risk, 0, "Risk estimate.");
1154 tab_offset (risk, nvar - 2, 0);
1155 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1156 _("95%% Confidence Interval"));
1157 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1158 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1159 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1160 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1161 tab_hline (risk, TAL_1, 2, 3, 1);
1162 tab_vline (risk, TAL_1, 2, 0, 1);
1163 tab_offset (risk, 0, 2);
1168 /* Directional measures. */
1169 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1170 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1172 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1173 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1174 tab_title (direct, 0, "Directional measures.");
1176 tab_offset (direct, nvar - 2, 0);
1177 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1178 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1179 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1180 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1181 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1182 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1183 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1184 tab_offset (direct, 0, 1);
1191 /* Find pivot subtable if applicable. */
1192 te = find_pivot_extent (tb, &tc, 0);
1196 /* Find all the row variable values. */
1197 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1199 /* Allocate memory space for the column and row totals. */
1200 if (n_rows > *maxrows)
1202 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1203 row_tot = *row_totp;
1206 if (n_cols > *maxcols)
1208 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1209 col_tot = *col_totp;
1213 /* Allocate table space for the matrix. */
1214 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1215 tab_realloc (table, -1,
1216 max (tab_nr (table) + (n_rows + 1) * num_cells,
1217 tab_nr (table) * (pe - pb) / (te - tb)));
1219 if (mode == GENERAL)
1221 /* Allocate memory space for the matrix. */
1222 if (n_cols * n_rows > *maxcells)
1224 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1225 *maxcells = n_cols * n_rows;
1230 /* Build the matrix and calculate column totals. */
1232 union value *cur_col = cols;
1233 union value *cur_row = rows;
1235 double *cp = col_tot;
1236 struct table_entry **p;
1239 for (p = &tb[0]; p < te; p++)
1241 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1245 for (; cur_row < &rows[n_rows]; cur_row++)
1251 mp = &mat[cur_col - cols];
1254 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1261 *cp += *mp = (*p)->u.freq;
1266 /* Zero out the rest of the matrix. */
1267 for (; cur_row < &rows[n_rows]; cur_row++)
1273 if (cur_col < &cols[n_cols])
1275 const int rem_cols = n_cols - (cur_col - cols);
1278 for (c = 0; c < rem_cols; c++)
1280 mp = &mat[cur_col - cols];
1281 for (r = 0; r < n_rows; r++)
1283 for (c = 0; c < rem_cols; c++)
1285 mp += n_cols - rem_cols;
1293 double *tp = col_tot;
1295 assert (mode == INTEGER);
1296 mat = (*tb)->u.data;
1299 /* Calculate column totals. */
1300 for (c = 0; c < n_cols; c++)
1303 double *cp = &mat[c];
1305 for (r = 0; r < n_rows; r++)
1306 cum += cp[r * n_cols];
1314 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1315 ns_cols += *cp != 0.;
1318 /* Calculate row totals. */
1321 double *rp = row_tot;
1324 for (ns_rows = 0, r = n_rows; r--; )
1327 for (c = n_cols; c--; )
1335 /* Calculate grand total. */
1341 if (n_rows < n_cols)
1342 tp = row_tot, n = n_rows;
1344 tp = col_tot, n = n_cols;
1350 /* Find the first variable that differs from the last subtable,
1351 then display the values of the dimensioning variables for
1352 each table that needs it. */
1354 int first_difference = nvar - 1;
1357 for (; ; first_difference--)
1359 assert (first_difference >= 2);
1360 if (memcmp (&cmp->values[first_difference],
1361 &(*tb)->values[first_difference],
1362 sizeof *cmp->values))
1368 display_dimensions (table, first_difference, *tb);
1370 display_dimensions (chisq, first_difference, *tb);
1372 display_dimensions (sym, first_difference, *tb);
1374 display_dimensions (risk, first_difference, *tb);
1376 display_dimensions (direct, first_difference, *tb);
1380 display_crosstabulation ();
1381 if (cmd.miss == CRS_REPORT)
1386 display_symmetric ();
1390 display_directional ();
1401 tab_resize (chisq, 4 + (nvar - 2), -1);
1412 /* Delete missing rows and columns for statistical analysis when
1415 delete_missing (void)
1420 for (r = 0; r < n_rows; r++)
1421 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1425 for (c = 0; c < n_cols; c++)
1426 mat[c + r * n_cols] = 0.;
1434 for (c = 0; c < n_cols; c++)
1435 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1439 for (r = 0; r < n_rows; r++)
1440 mat[c + r * n_cols] = 0.;
1446 /* Prepare table T for submission, and submit it. */
1448 submit (struct tab_table *t)
1455 tab_resize (t, -1, 0);
1456 if (tab_nr (t) == tab_t (t))
1461 tab_offset (t, 0, 0);
1463 for (i = 2; i < nvar; i++)
1464 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1465 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1466 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1467 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1469 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1471 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1472 tab_dim (t, crosstabs_dim);
1476 /* Sets the widths of all the columns and heights of all the rows in
1477 table T for driver D. */
1479 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1483 /* Width of a numerical column. */
1484 int c = outp_string_width (d, "0.000000");
1485 if (cmd.miss == CRS_REPORT)
1486 c += outp_string_width (d, "M");
1488 /* Set width for header columns. */
1491 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1493 if (w < d->prop_em_width * 8)
1494 w = d->prop_em_width * 8;
1496 if (w > d->prop_em_width * 15)
1497 w = d->prop_em_width * 15;
1499 for (i = 0; i < t->l; i++)
1503 for (i = t->l; i < t->nc; i++)
1506 for (i = 0; i < t->nr; i++)
1507 t->h[i] = tab_natural_height (t, d, i);
1510 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1511 int *cnt, int pivot);
1512 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1513 int *cnt, int pivot);
1515 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1517 static struct table_entry **
1518 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1520 return (mode == GENERAL
1521 ? find_pivot_extent_general (tp, cnt, pivot)
1522 : find_pivot_extent_integer (tp, cnt, pivot));
1525 /* Find the extent of a region in TP that contains one table. If
1526 PIVOT != 0 that means a set of table entries with identical table
1527 number; otherwise they also have to have the same values for every
1528 dimension after the row and column dimensions. The table that is
1529 searched starts at TP and has length CNT. Returns the first entry
1530 after the last one in the table; sets *CNT to the number of
1531 remaining values. If there are no entries in TP at all, returns
1532 NULL. A yucky interface, admittedly, but it works. */
1533 static struct table_entry **
1534 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1536 struct table_entry *fp = *tp;
1541 x = xtab[(*tp)->table];
1549 if ((*tp)->table != fp->table)
1554 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1561 /* Integer mode correspondent to find_pivot_extent_general(). This
1562 could be optimized somewhat, but I just don't give a crap about
1563 CROSSTABS performance in integer mode, which is just a
1564 CROSSTABS wart as far as I'm concerned.
1566 That said, feel free to send optimization patches to me. */
1567 static struct table_entry **
1568 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1570 struct table_entry *fp = *tp;
1575 x = xtab[(*tp)->table];
1583 if ((*tp)->table != fp->table)
1588 if (memcmp (&(*tp)->values[2], &fp->values[2],
1589 sizeof (union value) * (x->nvar - 2)))
1596 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1597 result. WIDTH_ points to an int which is either 0 for a
1598 numeric value or a string width for a string value. */
1600 compare_value (const void *a_, const void *b_, void *width_)
1602 const union value *a = a_;
1603 const union value *b = b_;
1604 const int *pwidth = width_;
1605 const int width = *pwidth;
1608 return (a->f < b->f) ? -1 : (a->f > b->f);
1610 return strncmp (a->s, b->s, width);
1613 /* Given an array of ENTRY_CNT table_entry structures starting at
1614 ENTRIES, creates a sorted list of the values that the variable
1615 with index VAR_IDX takes on. The values are returned as a
1616 malloc()'darray stored in *VALUES, with the number of values
1617 stored in *VALUE_CNT.
1620 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1621 union value **values, int *value_cnt)
1623 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1625 if (mode == GENERAL)
1627 int width = v->width;
1630 *values = xnmalloc (entry_cnt, sizeof **values);
1631 for (i = 0; i < entry_cnt; i++)
1632 (*values)[i] = entries[i]->values[var_idx];
1633 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1634 compare_value, &width);
1638 struct var_range *vr = get_var_range (v);
1641 assert (mode == INTEGER);
1642 *values = xnmalloc (vr->count, sizeof **values);
1643 for (i = 0; i < vr->count; i++)
1644 (*values)[i].f = i + vr->min;
1645 *value_cnt = vr->count;
1649 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1650 from V, displayed with print format spec from variable VAR. When
1651 in REPORT missing-value mode, missing values have an M appended. */
1653 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1654 const union value *v, const struct variable *var)
1656 struct fixed_string s;
1658 const char *label = val_labs_find (var->val_labs, *v);
1661 tab_text (table, c, r, TAB_LEFT, label);
1665 s.string = tab_alloc (table, var->print.w);
1666 format_short (s.string, &var->print, v);
1667 s.length = strlen (s.string);
1668 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1669 s.string[s.length++] = 'M';
1670 while (s.length && *s.string == ' ')
1675 tab_raw (table, c, r, opt, &s);
1678 /* Draws a line across TABLE at the current row to indicate the most
1679 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1680 that changed, and puts the values that changed into the table. TB
1681 and X must be the corresponding table_entry and crosstab,
1684 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1686 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1688 for (; first_difference >= 2; first_difference--)
1689 table_value_missing (table, nvar - first_difference - 1, 0,
1690 TAB_RIGHT, &tb->values[first_difference],
1691 x->vars[first_difference]);
1694 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1695 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1696 additionally suffixed with a letter `M'. */
1698 format_cell_entry (struct tab_table *table, int c, int r, double value,
1699 char suffix, int mark_missing)
1701 const struct fmt_spec f = {FMT_F, 10, 1};
1703 struct fixed_string s;
1706 s.string = tab_alloc (table, 16);
1708 data_out (s.string, &f, &v);
1709 while (*s.string == ' ')
1715 s.string[s.length++] = suffix;
1717 s.string[s.length++] = 'M';
1719 tab_raw (table, c, r, TAB_RIGHT, &s);
1722 /* Displays the crosstabulation table. */
1724 display_crosstabulation (void)
1729 for (r = 0; r < n_rows; r++)
1730 table_value_missing (table, nvar - 2, r * num_cells,
1731 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1733 tab_text (table, nvar - 2, n_rows * num_cells,
1734 TAB_LEFT, _("Total"));
1736 /* Put in the actual cells. */
1741 tab_offset (table, nvar - 1, -1);
1742 for (r = 0; r < n_rows; r++)
1745 tab_hline (table, TAL_1, -1, n_cols, 0);
1746 for (c = 0; c < n_cols; c++)
1748 int mark_missing = 0;
1749 double expected_value = row_tot[r] * col_tot[c] / W;
1750 if (cmd.miss == CRS_REPORT
1751 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1752 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1755 for (i = 0; i < num_cells; i++)
1766 v = *mp / row_tot[r] * 100.;
1770 v = *mp / col_tot[c] * 100.;
1777 case CRS_CL_EXPECTED:
1780 case CRS_CL_RESIDUAL:
1781 v = *mp - expected_value;
1783 case CRS_CL_SRESIDUAL:
1784 v = (*mp - expected_value) / sqrt (expected_value);
1786 case CRS_CL_ASRESIDUAL:
1787 v = ((*mp - expected_value)
1788 / sqrt (expected_value
1789 * (1. - row_tot[r] / W)
1790 * (1. - col_tot[c] / W)));
1797 format_cell_entry (table, c, i, v, suffix, mark_missing);
1803 tab_offset (table, -1, tab_row (table) + num_cells);
1811 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1812 for (r = 0; r < n_rows; r++)
1815 int mark_missing = 0;
1817 if (cmd.miss == CRS_REPORT
1818 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1821 for (i = 0; i < num_cells; i++)
1835 v = row_tot[r] / W * 100.;
1839 v = row_tot[r] / W * 100.;
1842 case CRS_CL_EXPECTED:
1843 case CRS_CL_RESIDUAL:
1844 case CRS_CL_SRESIDUAL:
1845 case CRS_CL_ASRESIDUAL:
1853 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1854 tab_next_row (table);
1859 /* Column totals, grand total. */
1865 tab_hline (table, TAL_1, -1, n_cols, 0);
1866 for (c = 0; c <= n_cols; c++)
1868 double ct = c < n_cols ? col_tot[c] : W;
1869 int mark_missing = 0;
1873 if (cmd.miss == CRS_REPORT && c < n_cols
1874 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1877 for (i = 0; i < num_cells; i++)
1899 case CRS_CL_EXPECTED:
1900 case CRS_CL_RESIDUAL:
1901 case CRS_CL_SRESIDUAL:
1902 case CRS_CL_ASRESIDUAL:
1909 format_cell_entry (table, c, i, v, suffix, mark_missing);
1914 tab_offset (table, -1, tab_row (table) + last_row);
1917 tab_offset (table, 0, -1);
1920 static void calc_r (double *X, double *Y, double *, double *, double *);
1921 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1923 /* Display chi-square statistics. */
1925 display_chisq (void)
1927 static const char *chisq_stats[N_CHISQ] =
1929 N_("Pearson Chi-Square"),
1930 N_("Likelihood Ratio"),
1931 N_("Fisher's Exact Test"),
1932 N_("Continuity Correction"),
1933 N_("Linear-by-Linear Association"),
1935 double chisq_v[N_CHISQ];
1936 double fisher1, fisher2;
1942 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1944 tab_offset (chisq, nvar - 2, -1);
1946 for (i = 0; i < N_CHISQ; i++)
1948 if ((i != 2 && chisq_v[i] == SYSMIS)
1949 || (i == 2 && fisher1 == SYSMIS))
1953 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1956 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1957 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1958 tab_float (chisq, 3, 0, TAB_RIGHT,
1959 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1964 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1965 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1967 tab_next_row (chisq);
1970 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1971 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1972 tab_next_row (chisq);
1974 tab_offset (chisq, 0, -1);
1977 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1978 double[N_SYMMETRIC]);
1980 /* Display symmetric measures. */
1982 display_symmetric (void)
1984 static const char *categories[] =
1986 N_("Nominal by Nominal"),
1987 N_("Ordinal by Ordinal"),
1988 N_("Interval by Interval"),
1989 N_("Measure of Agreement"),
1992 static const char *stats[N_SYMMETRIC] =
1996 N_("Contingency Coefficient"),
1997 N_("Kendall's tau-b"),
1998 N_("Kendall's tau-c"),
2000 N_("Spearman Correlation"),
2005 static const int stats_categories[N_SYMMETRIC] =
2007 0, 0, 0, 1, 1, 1, 1, 2, 3,
2011 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2014 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2017 tab_offset (sym, nvar - 2, -1);
2019 for (i = 0; i < N_SYMMETRIC; i++)
2021 if (sym_v[i] == SYSMIS)
2024 if (stats_categories[i] != last_cat)
2026 last_cat = stats_categories[i];
2027 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2030 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2031 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2032 if (sym_ase[i] != SYSMIS)
2033 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2034 if (sym_t[i] != SYSMIS)
2035 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2036 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2040 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2041 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2044 tab_offset (sym, 0, -1);
2047 static int calc_risk (double[], double[], double[], union value *);
2049 /* Display risk estimate. */
2054 double risk_v[3], lower[3], upper[3];
2058 if (!calc_risk (risk_v, upper, lower, c))
2061 tab_offset (risk, nvar - 2, -1);
2063 for (i = 0; i < 3; i++)
2065 if (risk_v[i] == SYSMIS)
2071 if (x->vars[COL_VAR]->type == NUMERIC)
2072 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2073 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2075 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2076 x->vars[COL_VAR]->name,
2077 x->vars[COL_VAR]->width, c[0].s,
2078 x->vars[COL_VAR]->width, c[1].s);
2082 if (x->vars[ROW_VAR]->type == NUMERIC)
2083 sprintf (buf, _("For cohort %s = %g"),
2084 x->vars[ROW_VAR]->name, rows[i - 1].f);
2086 sprintf (buf, _("For cohort %s = %.*s"),
2087 x->vars[ROW_VAR]->name,
2088 x->vars[ROW_VAR]->width, rows[i - 1].s);
2092 tab_text (risk, 0, 0, TAB_LEFT, buf);
2093 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2094 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2095 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2096 tab_next_row (risk);
2099 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2100 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2101 tab_next_row (risk);
2103 tab_offset (risk, 0, -1);
2106 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2107 double[N_DIRECTIONAL]);
2109 /* Display directional measures. */
2111 display_directional (void)
2113 static const char *categories[] =
2115 N_("Nominal by Nominal"),
2116 N_("Ordinal by Ordinal"),
2117 N_("Nominal by Interval"),
2120 static const char *stats[] =
2123 N_("Goodman and Kruskal tau"),
2124 N_("Uncertainty Coefficient"),
2129 static const char *types[] =
2136 static const int stats_categories[N_DIRECTIONAL] =
2138 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2141 static const int stats_stats[N_DIRECTIONAL] =
2143 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2146 static const int stats_types[N_DIRECTIONAL] =
2148 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2151 static const int *stats_lookup[] =
2158 static const char **stats_names[] =
2170 double direct_v[N_DIRECTIONAL];
2171 double direct_ase[N_DIRECTIONAL];
2172 double direct_t[N_DIRECTIONAL];
2176 if (!calc_directional (direct_v, direct_ase, direct_t))
2179 tab_offset (direct, nvar - 2, -1);
2181 for (i = 0; i < N_DIRECTIONAL; i++)
2183 if (direct_v[i] == SYSMIS)
2189 for (j = 0; j < 3; j++)
2190 if (last[j] != stats_lookup[j][i])
2193 tab_hline (direct, TAL_1, j, 6, 0);
2198 int k = last[j] = stats_lookup[j][i];
2203 string = x->vars[0]->name;
2205 string = x->vars[1]->name;
2207 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2208 gettext (stats_names[j][k]), string);
2213 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2214 if (direct_ase[i] != SYSMIS)
2215 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2216 if (direct_t[i] != SYSMIS)
2217 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2218 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2219 tab_next_row (direct);
2222 tab_offset (direct, 0, -1);
2225 /* Statistical calculations. */
2227 /* Returns the value of the gamma (factorial) function for an integer
2230 gamma_int (double x)
2235 for (i = 2; i < x; i++)
2240 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2242 static inline double
2243 Pr (int a, int b, int c, int d)
2245 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2246 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2247 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2248 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2249 / gamma_int (a + b + c + d + 1.));
2252 /* Swap the contents of A and B. */
2254 swap (int *a, int *b)
2261 /* Calculate significance for Fisher's exact test as specified in
2262 _SPSS Statistical Algorithms_, Appendix 5. */
2264 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2268 if (min (c, d) < min (a, b))
2269 swap (&a, &c), swap (&b, &d);
2270 if (min (b, d) < min (a, c))
2271 swap (&a, &b), swap (&c, &d);
2275 swap (&a, &b), swap (&c, &d);
2277 swap (&a, &c), swap (&b, &d);
2281 for (x = 0; x <= a; x++)
2282 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2284 *fisher2 = *fisher1;
2285 for (x = 1; x <= b; x++)
2286 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2289 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2290 columns with values COLS and N_ROWS rows with values ROWS. Values
2291 in the matrix sum to W. */
2293 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2294 double *fisher1, double *fisher2)
2298 chisq[0] = chisq[1] = 0.;
2299 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2300 *fisher1 = *fisher2 = SYSMIS;
2302 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2304 if (ns_rows <= 1 || ns_cols <= 1)
2306 chisq[0] = chisq[1] = SYSMIS;
2310 for (r = 0; r < n_rows; r++)
2311 for (c = 0; c < n_cols; c++)
2313 const double expected = row_tot[r] * col_tot[c] / W;
2314 const double freq = mat[n_cols * r + c];
2315 const double residual = freq - expected;
2317 chisq[0] += residual * residual / expected;
2319 chisq[1] += freq * log (expected / freq);
2330 /* Calculate Yates and Fisher exact test. */
2331 if (ns_cols == 2 && ns_rows == 2)
2333 double f11, f12, f21, f22;
2339 for (i = j = 0; i < n_cols; i++)
2340 if (col_tot[i] != 0.)
2349 f11 = mat[nz_cols[0]];
2350 f12 = mat[nz_cols[1]];
2351 f21 = mat[nz_cols[0] + n_cols];
2352 f22 = mat[nz_cols[1] + n_cols];
2357 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2360 chisq[3] = (W * x * x
2361 / (f11 + f12) / (f21 + f22)
2362 / (f11 + f21) / (f12 + f22));
2370 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2371 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2374 /* Calculate Mantel-Haenszel. */
2375 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2377 double r, ase_0, ase_1;
2378 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2380 chisq[4] = (W - 1.) * r * r;
2385 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2386 ASE_1, and ase_0 into ASE_0. The row and column values must be
2387 passed in X and Y. */
2389 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2391 double SX, SY, S, T;
2393 double sum_XYf, sum_X2Y2f;
2394 double sum_Xr, sum_X2r;
2395 double sum_Yc, sum_Y2c;
2398 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2399 for (j = 0; j < n_cols; j++)
2401 double fij = mat[j + i * n_cols];
2402 double product = X[i] * Y[j];
2403 double temp = fij * product;
2405 sum_X2Y2f += temp * product;
2408 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2410 sum_Xr += X[i] * row_tot[i];
2411 sum_X2r += X[i] * X[i] * row_tot[i];
2415 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2417 sum_Yc += Y[i] * col_tot[i];
2418 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2422 S = sum_XYf - sum_Xr * sum_Yc / W;
2423 SX = sum_X2r - sum_Xr * sum_Xr / W;
2424 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2427 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2432 for (s = c = 0., i = 0; i < n_rows; i++)
2433 for (j = 0; j < n_cols; j++)
2435 double Xresid, Yresid;
2438 Xresid = X[i] - Xbar;
2439 Yresid = Y[j] - Ybar;
2440 temp = (T * Xresid * Yresid
2442 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2443 y = mat[j + i * n_cols] * temp * temp - c;
2448 *ase_1 = sqrt (s) / (T * T);
2452 static double somers_d_v[3];
2453 static double somers_d_ase[3];
2454 static double somers_d_t[3];
2456 /* Calculate symmetric statistics and their asymptotic standard
2457 errors. Returns 0 if none could be calculated. */
2459 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2460 double t[N_SYMMETRIC])
2462 int q = min (ns_rows, ns_cols);
2471 for (i = 0; i < N_SYMMETRIC; i++)
2472 v[i] = ase[i] = t[i] = SYSMIS;
2475 /* Phi, Cramer's V, contingency coefficient. */
2476 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2478 double Xp = 0.; /* Pearson chi-square. */
2483 for (r = 0; r < n_rows; r++)
2484 for (c = 0; c < n_cols; c++)
2486 const double expected = row_tot[r] * col_tot[c] / W;
2487 const double freq = mat[n_cols * r + c];
2488 const double residual = freq - expected;
2490 Xp += residual * residual / expected;
2494 if (cmd.a_statistics[CRS_ST_PHI])
2496 v[0] = sqrt (Xp / W);
2497 v[1] = sqrt (Xp / (W * (q - 1)));
2499 if (cmd.a_statistics[CRS_ST_CC])
2500 v[2] = sqrt (Xp / (Xp + W));
2503 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2504 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2509 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2516 for (r = 0; r < n_rows; r++)
2517 Dr -= row_tot[r] * row_tot[r];
2518 for (c = 0; c < n_cols; c++)
2519 Dc -= col_tot[c] * col_tot[c];
2525 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2526 for (c = 0; c < n_cols; c++)
2530 for (r = 0; r < n_rows; r++)
2531 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2541 for (i = 0; i < n_rows; i++)
2545 for (j = 1; j < n_cols; j++)
2546 Cij += col_tot[j] - cum[j + i * n_cols];
2549 for (j = 1; j < n_cols; j++)
2550 Dij += cum[j + (i - 1) * n_cols];
2554 double fij = mat[j + i * n_cols];
2560 assert (j < n_cols);
2562 Cij -= col_tot[j] - cum[j + i * n_cols];
2563 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2567 Cij += cum[j - 1 + (i - 1) * n_cols];
2568 Dij -= cum[j + (i - 1) * n_cols];
2574 if (cmd.a_statistics[CRS_ST_BTAU])
2575 v[3] = (P - Q) / sqrt (Dr * Dc);
2576 if (cmd.a_statistics[CRS_ST_CTAU])
2577 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2578 if (cmd.a_statistics[CRS_ST_GAMMA])
2579 v[5] = (P - Q) / (P + Q);
2581 /* ASE for tau-b, tau-c, gamma. Calculations could be
2582 eliminated here, at expense of memory. */
2587 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2588 for (i = 0; i < n_rows; i++)
2592 for (j = 1; j < n_cols; j++)
2593 Cij += col_tot[j] - cum[j + i * n_cols];
2596 for (j = 1; j < n_cols; j++)
2597 Dij += cum[j + (i - 1) * n_cols];
2601 double fij = mat[j + i * n_cols];
2603 if (cmd.a_statistics[CRS_ST_BTAU])
2605 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2606 + v[3] * (row_tot[i] * Dc
2607 + col_tot[j] * Dr));
2608 btau_cum += fij * temp * temp;
2612 const double temp = Cij - Dij;
2613 ctau_cum += fij * temp * temp;
2616 if (cmd.a_statistics[CRS_ST_GAMMA])
2618 const double temp = Q * Cij - P * Dij;
2619 gamma_cum += fij * temp * temp;
2622 if (cmd.a_statistics[CRS_ST_D])
2624 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2625 - (P - Q) * (W - row_tot[i]));
2626 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2627 - (Q - P) * (W - col_tot[j]));
2632 assert (j < n_cols);
2634 Cij -= col_tot[j] - cum[j + i * n_cols];
2635 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2639 Cij += cum[j - 1 + (i - 1) * n_cols];
2640 Dij -= cum[j + (i - 1) * n_cols];
2646 btau_var = ((btau_cum
2647 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2649 if (cmd.a_statistics[CRS_ST_BTAU])
2651 ase[3] = sqrt (btau_var);
2652 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2655 if (cmd.a_statistics[CRS_ST_CTAU])
2657 ase[4] = ((2 * q / ((q - 1) * W * W))
2658 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2659 t[4] = v[4] / ase[4];
2661 if (cmd.a_statistics[CRS_ST_GAMMA])
2663 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2664 t[5] = v[5] / (2. / (P + Q)
2665 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2667 if (cmd.a_statistics[CRS_ST_D])
2669 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2670 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2671 somers_d_t[0] = (somers_d_v[0]
2673 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2674 somers_d_v[1] = (P - Q) / Dc;
2675 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2676 somers_d_t[1] = (somers_d_v[1]
2678 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2679 somers_d_v[2] = (P - Q) / Dr;
2680 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2681 somers_d_t[2] = (somers_d_v[2]
2683 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2689 /* Spearman correlation, Pearson's r. */
2690 if (cmd.a_statistics[CRS_ST_CORR])
2692 double *R = local_alloc (sizeof *R * n_rows);
2693 double *C = local_alloc (sizeof *C * n_cols);
2696 double y, t, c = 0., s = 0.;
2701 R[i] = s + (row_tot[i] + 1.) / 2.;
2708 assert (i < n_rows);
2713 double y, t, c = 0., s = 0.;
2718 C[j] = s + (col_tot[j] + 1.) / 2;
2725 assert (j < n_cols);
2729 calc_r (R, C, &v[6], &t[6], &ase[6]);
2735 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2739 /* Cohen's kappa. */
2740 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2742 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2745 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2746 i < ns_rows; i++, j++)
2750 while (col_tot[j] == 0.)
2753 prod = row_tot[i] * col_tot[j];
2754 sum = row_tot[i] + col_tot[j];
2756 sum_fii += mat[j + i * n_cols];
2758 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2759 sum_riciri_ci += prod * sum;
2761 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2762 for (j = 0; j < ns_cols; j++)
2764 double sum = row_tot[i] + col_tot[j];
2765 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2768 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2770 ase[8] = sqrt ((W * W * sum_rici
2771 + sum_rici * sum_rici
2772 - W * sum_riciri_ci)
2773 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2775 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2776 / pow2 (W * W - sum_rici))
2777 + ((2. * (W - sum_fii)
2778 * (2. * sum_fii * sum_rici
2779 - W * sum_fiiri_ci))
2780 / cube (W * W - sum_rici))
2781 + (pow2 (W - sum_fii)
2782 * (W * sum_fijri_ci2 - 4.
2783 * sum_rici * sum_rici)
2784 / pow4 (W * W - sum_rici))));
2786 t[8] = v[8] / ase[8];
2793 /* Calculate risk estimate. */
2795 calc_risk (double *value, double *upper, double *lower, union value *c)
2797 double f11, f12, f21, f22;
2803 for (i = 0; i < 3; i++)
2804 value[i] = upper[i] = lower[i] = SYSMIS;
2807 if (ns_rows != 2 || ns_cols != 2)
2814 for (i = j = 0; i < n_cols; i++)
2815 if (col_tot[i] != 0.)
2824 f11 = mat[nz_cols[0]];
2825 f12 = mat[nz_cols[1]];
2826 f21 = mat[nz_cols[0] + n_cols];
2827 f22 = mat[nz_cols[1] + n_cols];
2829 c[0] = cols[nz_cols[0]];
2830 c[1] = cols[nz_cols[1]];
2833 value[0] = (f11 * f22) / (f12 * f21);
2834 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2835 lower[0] = value[0] * exp (-1.960 * v);
2836 upper[0] = value[0] * exp (1.960 * v);
2838 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2839 v = sqrt ((f12 / (f11 * (f11 + f12)))
2840 + (f22 / (f21 * (f21 + f22))));
2841 lower[1] = value[1] * exp (-1.960 * v);
2842 upper[1] = value[1] * exp (1.960 * v);
2844 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2845 v = sqrt ((f11 / (f12 * (f11 + f12)))
2846 + (f21 / (f22 * (f21 + f22))));
2847 lower[2] = value[2] * exp (-1.960 * v);
2848 upper[2] = value[2] * exp (1.960 * v);
2853 /* Calculate directional measures. */
2855 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2856 double t[N_DIRECTIONAL])
2861 for (i = 0; i < N_DIRECTIONAL; i++)
2862 v[i] = ase[i] = t[i] = SYSMIS;
2866 if (cmd.a_statistics[CRS_ST_LAMBDA])
2868 double *fim = xnmalloc (n_rows, sizeof *fim);
2869 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2870 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2871 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2872 double sum_fim, sum_fmj;
2874 int rm_index, cm_index;
2877 /* Find maximum for each row and their sum. */
2878 for (sum_fim = 0., i = 0; i < n_rows; i++)
2880 double max = mat[i * n_cols];
2883 for (j = 1; j < n_cols; j++)
2884 if (mat[j + i * n_cols] > max)
2886 max = mat[j + i * n_cols];
2890 sum_fim += fim[i] = max;
2891 fim_index[i] = index;
2894 /* Find maximum for each column. */
2895 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2897 double max = mat[j];
2900 for (i = 1; i < n_rows; i++)
2901 if (mat[j + i * n_cols] > max)
2903 max = mat[j + i * n_cols];
2907 sum_fmj += fmj[j] = max;
2908 fmj_index[j] = index;
2911 /* Find maximum row total. */
2914 for (i = 1; i < n_rows; i++)
2915 if (row_tot[i] > rm)
2921 /* Find maximum column total. */
2924 for (j = 1; j < n_cols; j++)
2925 if (col_tot[j] > cm)
2931 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2932 v[1] = (sum_fmj - rm) / (W - rm);
2933 v[2] = (sum_fim - cm) / (W - cm);
2935 /* ASE1 for Y given X. */
2939 for (accum = 0., i = 0; i < n_rows; i++)
2940 for (j = 0; j < n_cols; j++)
2942 const int deltaj = j == cm_index;
2943 accum += (mat[j + i * n_cols]
2944 * pow2 ((j == fim_index[i])
2949 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2952 /* ASE0 for Y given X. */
2956 for (accum = 0., i = 0; i < n_rows; i++)
2957 if (cm_index != fim_index[i])
2958 accum += (mat[i * n_cols + fim_index[i]]
2959 + mat[i * n_cols + cm_index]);
2960 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2963 /* ASE1 for X given Y. */
2967 for (accum = 0., i = 0; i < n_rows; i++)
2968 for (j = 0; j < n_cols; j++)
2970 const int deltaj = i == rm_index;
2971 accum += (mat[j + i * n_cols]
2972 * pow2 ((i == fmj_index[j])
2977 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2980 /* ASE0 for X given Y. */
2984 for (accum = 0., j = 0; j < n_cols; j++)
2985 if (rm_index != fmj_index[j])
2986 accum += (mat[j + n_cols * fmj_index[j]]
2987 + mat[j + n_cols * rm_index]);
2988 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2991 /* Symmetric ASE0 and ASE1. */
2996 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2997 for (j = 0; j < n_cols; j++)
2999 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3000 int temp1 = (i == rm_index) + (j == cm_index);
3001 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3002 accum1 += (mat[j + i * n_cols]
3003 * pow2 (temp0 + (v[0] - 1.) * temp1));
3005 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3006 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3007 / (2. * W - rm - cm));
3016 double sum_fij2_ri, sum_fij2_ci;
3017 double sum_ri2, sum_cj2;
3019 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3020 for (j = 0; j < n_cols; j++)
3022 double temp = pow2 (mat[j + i * n_cols]);
3023 sum_fij2_ri += temp / row_tot[i];
3024 sum_fij2_ci += temp / col_tot[j];
3027 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3028 sum_ri2 += row_tot[i] * row_tot[i];
3030 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3031 sum_cj2 += col_tot[j] * col_tot[j];
3033 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3034 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3038 if (cmd.a_statistics[CRS_ST_UC])
3040 double UX, UY, UXY, P;
3041 double ase1_yx, ase1_xy, ase1_sym;
3044 for (UX = 0., i = 0; i < n_rows; i++)
3045 if (row_tot[i] > 0.)
3046 UX -= row_tot[i] / W * log (row_tot[i] / W);
3048 for (UY = 0., j = 0; j < n_cols; j++)
3049 if (col_tot[j] > 0.)
3050 UY -= col_tot[j] / W * log (col_tot[j] / W);
3052 for (UXY = P = 0., i = 0; i < n_rows; i++)
3053 for (j = 0; j < n_cols; j++)
3055 double entry = mat[j + i * n_cols];
3060 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3061 UXY -= entry / W * log (entry / W);
3064 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3065 for (j = 0; j < n_cols; j++)
3067 double entry = mat[j + i * n_cols];
3072 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3073 + (UX - UXY) * log (col_tot[j] / W));
3074 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3075 + (UY - UXY) * log (row_tot[i] / W));
3076 ase1_sym += entry * pow2 ((UXY
3077 * log (row_tot[i] * col_tot[j] / (W * W)))
3078 - (UX + UY) * log (entry / W));
3081 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3082 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3083 t[5] = v[5] / ((2. / (W * (UX + UY)))
3084 * sqrt (P - pow2 (UX + UY - UXY) / W));
3086 v[6] = (UX + UY - UXY) / UX;
3087 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3088 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3090 v[7] = (UX + UY - UXY) / UY;
3091 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3092 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3096 if (cmd.a_statistics[CRS_ST_D])
3101 calc_symmetric (NULL, NULL, NULL);
3102 for (i = 0; i < 3; i++)
3104 v[8 + i] = somers_d_v[i];
3105 ase[8 + i] = somers_d_ase[i];
3106 t[8 + i] = somers_d_t[i];
3111 if (cmd.a_statistics[CRS_ST_ETA])
3114 double sum_Xr, sum_X2r;
3118 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3120 sum_Xr += rows[i].f * row_tot[i];
3121 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3123 SX = sum_X2r - sum_Xr * sum_Xr / W;
3125 for (SXW = 0., j = 0; j < n_cols; j++)
3129 for (cum = 0., i = 0; i < n_rows; i++)
3131 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3132 cum += rows[i].f * mat[j + i * n_cols];
3135 SXW -= cum * cum / col_tot[j];
3137 v[11] = sqrt (1. - SXW / SX);
3141 double sum_Yc, sum_Y2c;
3145 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3147 sum_Yc += cols[i].f * col_tot[i];
3148 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3150 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3152 for (SYW = 0., i = 0; i < n_rows; i++)
3156 for (cum = 0., j = 0; j < n_cols; j++)
3158 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3159 cum += cols[j].f * mat[j + i * n_cols];
3162 SYW -= cum * cum / row_tot[i];
3164 v[12] = sqrt (1. - SYW / SY);
3171 /* A wrapper around data_out() that limits string output to short
3172 string width and null terminates the result. */
3174 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3176 struct fmt_spec fmt_subst;
3178 /* Limit to short string width. */
3179 if (formats[fp->type].cat & FCAT_STRING)
3183 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3184 if (fmt_subst.type == FMT_A)
3185 fmt_subst.w = min (8, fmt_subst.w);
3187 fmt_subst.w = min (16, fmt_subst.w);
3193 data_out (s, fp, v);
3195 /* Null terminate. */