1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
38 #include "algorithm.h"
41 #include "dictionary.h"
52 #include "value-labels.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) msgid
62 #include "debug-print.h"
68 +missing=miss:!table/include/report;
69 +write[wr_]=none,cells,all;
70 +format=fmt:!labels/nolabels/novallabs,
73 tabl:!tables/notables,
76 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
78 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
84 /* Number of chi-square statistics. */
87 /* Number of symmetric statistics. */
90 /* Number of directional statistics. */
91 #define N_DIRECTIONAL 13
93 /* A single table entry for general mode. */
96 int table; /* Flattened table number. */
99 double freq; /* Frequency count. */
100 double *data; /* Crosstabulation table for integer mode. */
103 union value values[1]; /* Values. */
106 /* A crosstabulation. */
109 int nvar; /* Number of variables. */
110 double missing; /* Missing cases count. */
111 int ofs; /* Integer mode: Offset into sorted_tab[]. */
112 struct variable *vars[2]; /* At least two variables; sorted by
113 larger indices first. */
116 /* Integer mode variable info. */
119 int min; /* Minimum value. */
120 int max; /* Maximum value + 1. */
121 int count; /* max - min. */
124 static inline struct var_range *
125 get_var_range (struct variable *v)
128 assert (v->aux != NULL);
132 /* Indexes into crosstab.v. */
139 /* General mode crosstabulation table. */
140 static struct hsh_table *gen_tab; /* Hash table. */
141 static int n_sorted_tab; /* Number of entries in sorted_tab. */
142 static struct table_entry **sorted_tab; /* Sorted table. */
144 /* Variables specifies on VARIABLES. */
145 static struct variable **variables;
146 static size_t variables_cnt;
149 static struct crosstab **xtab;
152 /* Integer or general mode? */
161 static int num_cells; /* Number of cells requested. */
162 static int cells[8]; /* Cells requested. */
165 static int write; /* One of WR_* that specifies the WRITE style. */
167 /* Command parsing info. */
168 static struct cmd_crosstabs cmd;
171 static struct pool *pl_tc; /* For table cells. */
172 static struct pool *pl_col; /* For column data. */
174 static int internal_cmd_crosstabs (void);
175 static void precalc (void *);
176 static int calc_general (struct ccase *, void *);
177 static int calc_integer (struct ccase *, void *);
178 static void postcalc (void *);
179 static void submit (struct tab_table *);
181 static void format_short (char *s, const struct fmt_spec *fp,
182 const union value *v);
184 /* Parse and execute CROSSTABS, then clean up. */
188 int result = internal_cmd_crosstabs ();
191 pool_destroy (pl_tc);
192 pool_destroy (pl_col);
197 /* Parses and executes the CROSSTABS procedure. */
199 internal_cmd_crosstabs (void)
207 pl_tc = pool_create ();
208 pl_col = pool_create ();
210 if (!parse_crosstabs (&cmd))
213 mode = variables ? INTEGER : GENERAL;
218 cmd.a_cells[CRS_CL_COUNT] = 1;
224 for (i = 0; i < CRS_CL_count; i++)
229 cmd.a_cells[CRS_CL_COUNT] = 1;
230 cmd.a_cells[CRS_CL_ROW] = 1;
231 cmd.a_cells[CRS_CL_COLUMN] = 1;
232 cmd.a_cells[CRS_CL_TOTAL] = 1;
234 if (cmd.a_cells[CRS_CL_ALL])
236 for (i = 0; i < CRS_CL_count; i++)
238 cmd.a_cells[CRS_CL_ALL] = 0;
240 cmd.a_cells[CRS_CL_NONE] = 0;
242 for (num_cells = i = 0; i < CRS_CL_count; i++)
244 cells[num_cells++] = i;
247 if (cmd.sbc_statistics)
252 for (i = 0; i < CRS_ST_count; i++)
253 if (cmd.a_statistics[i])
256 cmd.a_statistics[CRS_ST_CHISQ] = 1;
257 if (cmd.a_statistics[CRS_ST_ALL])
258 for (i = 0; i < CRS_ST_count; i++)
259 cmd.a_statistics[i] = 1;
263 if (cmd.miss == CRS_REPORT && mode == GENERAL)
265 msg (SE, _("Missing mode REPORT not allowed in general mode. "
266 "Assuming MISSING=TABLE."));
267 cmd.miss = CRS_TABLE;
271 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
272 cmd.a_write[CRS_WR_ALL] = 0;
273 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
275 msg (SE, _("Write mode ALL not allowed in general mode. "
276 "Assuming WRITE=CELLS."));
277 cmd.a_write[CRS_WR_CELLS] = 1;
280 && (cmd.a_write[CRS_WR_NONE]
281 + cmd.a_write[CRS_WR_ALL]
282 + cmd.a_write[CRS_WR_CELLS] == 0))
283 cmd.a_write[CRS_WR_CELLS] = 1;
284 if (cmd.a_write[CRS_WR_CELLS])
285 write = CRS_WR_CELLS;
286 else if (cmd.a_write[CRS_WR_ALL])
291 procedure_with_splits (precalc,
292 mode == GENERAL ? calc_general : calc_integer,
298 /* Parses the TABLES subcommand. */
300 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
302 struct var_set *var_set;
304 struct variable ***by = NULL;
309 /* Ensure that this is a TABLES subcommand. */
310 if (!lex_match_id ("TABLES")
311 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
316 if (variables != NULL)
317 var_set = var_set_create_from_array (variables, variables_cnt);
319 var_set = var_set_create_from_dict (default_dict);
320 assert (var_set != NULL);
324 by = xrealloc (by, sizeof *by * (n_by + 1));
325 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
326 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
327 PV_NO_DUPLICATE | PV_NO_SCRATCH))
332 if (!lex_match (T_BY))
336 lex_error (_("expecting BY"));
345 int *by_iter = xcalloc (n_by, sizeof *by_iter);
348 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
349 for (i = 0; i < nx; i++)
353 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
360 for (i = 0; i < n_by; i++)
361 x->vars[i] = by[i][by_iter[i]];
367 for (i = n_by - 1; i >= 0; i--)
369 if (++by_iter[i] < by_nvar[i])
382 /* All return paths lead here. */
386 for (i = 0; i < n_by; i++)
392 var_set_destroy (var_set);
397 /* Parses the VARIABLES subcommand. */
399 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
403 msg (SE, _("VARIABLES must be specified before TABLES."));
411 int orig_nv = variables_cnt;
416 if (!parse_variables (default_dict, &variables, &variables_cnt,
417 (PV_APPEND | PV_NUMERIC
418 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
423 lex_error ("expecting `('");
428 if (!lex_force_int ())
430 min = lex_integer ();
435 if (!lex_force_int ())
437 max = lex_integer ();
440 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
448 lex_error ("expecting `)'");
453 for (i = orig_nv; i < variables_cnt; i++)
455 struct var_range *vr = xmalloc (sizeof *vr);
458 vr->count = max - min + 1;
459 var_attach_aux (variables[i], vr, var_dtor_free);
474 /* Data file processing. */
476 static int compare_table_entry (const void *, const void *, void *);
477 static unsigned hash_table_entry (const void *, void *);
479 /* Set up the crosstabulation tables for processing. */
481 precalc (void *aux UNUSED)
485 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
495 for (i = 0; i < nxtab; i++)
497 struct crosstab *x = xtab[i];
502 x->ofs = n_sorted_tab;
504 for (j = 2; j < x->nvar; j++)
505 count *= get_var_range (x->vars[j - 2])->count;
507 sorted_tab = xrealloc (sorted_tab,
508 sizeof *sorted_tab * (n_sorted_tab + count));
509 v = local_alloc (sizeof *v * x->nvar);
510 for (j = 2; j < x->nvar; j++)
511 v[j] = get_var_range (x->vars[j])->min;
512 for (j = 0; j < count; j++)
514 struct table_entry *te;
517 te = sorted_tab[n_sorted_tab++]
518 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
522 int row_cnt = get_var_range (x->vars[0])->count;
523 int col_cnt = get_var_range (x->vars[1])->count;
524 const int mat_size = row_cnt * col_cnt;
527 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
528 for (m = 0; m < mat_size; m++)
532 for (k = 2; k < x->nvar; k++)
533 te->values[k].f = v[k];
534 for (k = 2; k < x->nvar; k++)
536 struct var_range *vr = get_var_range (x->vars[k]);
537 if (++v[k] >= vr->max)
546 sorted_tab = xrealloc (sorted_tab,
547 sizeof *sorted_tab * (n_sorted_tab + 1));
548 sorted_tab[n_sorted_tab] = NULL;
552 /* Form crosstabulations for general mode. */
554 calc_general (struct ccase *c, void *aux UNUSED)
559 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
561 /* Flattened current table index. */
564 for (t = 0; t < nxtab; t++)
566 struct crosstab *x = xtab[t];
567 const size_t entry_size = (sizeof (struct table_entry)
568 + sizeof (union value) * (x->nvar - 1));
569 struct table_entry *te = local_alloc (entry_size);
571 /* Construct table entry for the current record and table. */
577 for (j = 0; j < x->nvar; j++)
579 const union value *v = case_data (c, x->vars[j]->fv);
580 const struct missing_values *mv = &x->vars[j]->miss;
581 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
582 || (cmd.miss == CRS_INCLUDE
583 && mv_is_value_system_missing (mv, v)))
585 x->missing += weight;
589 if (x->vars[j]->type == NUMERIC)
590 te->values[j].f = case_num (c, x->vars[j]->fv);
593 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
596 /* Necessary in order to simplify comparisons. */
597 memset (&te->values[j].s[x->vars[j]->width], 0,
598 sizeof (union value) - x->vars[j]->width);
603 /* Add record to hash table. */
605 struct table_entry **tepp
606 = (struct table_entry **) hsh_probe (gen_tab, te);
609 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
612 memcpy (tep, te, entry_size);
617 (*tepp)->u.freq += weight;
628 calc_integer (struct ccase *c, void *aux UNUSED)
633 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
635 /* Flattened current table index. */
638 for (t = 0; t < nxtab; t++)
640 struct crosstab *x = xtab[t];
645 for (i = 0; i < x->nvar; i++)
647 struct variable *const v = x->vars[i];
648 struct var_range *vr = get_var_range (v);
649 double value = case_num (c, v->fv);
651 /* Note that the first test also rules out SYSMIS. */
652 if ((value < vr->min || value >= vr->max)
653 || (cmd.miss == CRS_TABLE
654 && mv_is_num_user_missing (&v->miss, value)))
656 x->missing += weight;
662 ofs += fact * ((int) value - vr->min);
668 struct variable *row_var = x->vars[ROW_VAR];
669 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
671 struct variable *col_var = x->vars[COL_VAR];
672 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
674 const int col_dim = get_var_range (col_var)->count;
676 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
685 /* Compare the table_entry's at A and B and return a strcmp()-type
688 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
690 const struct table_entry *a = a_;
691 const struct table_entry *b = b_;
693 if (a->table > b->table)
695 else if (a->table < b->table)
699 const struct crosstab *x = xtab[a->table];
702 for (i = x->nvar - 1; i >= 0; i--)
703 if (x->vars[i]->type == NUMERIC)
705 const double diffnum = a->values[i].f - b->values[i].f;
708 else if (diffnum > 0)
713 assert (x->vars[i]->type == ALPHA);
715 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
726 /* Calculate a hash value from table_entry A. */
728 hash_table_entry (const void *a_, void *foo UNUSED)
730 const struct table_entry *a = a_;
735 for (i = 0; i < xtab[a->table]->nvar; i++)
736 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
741 /* Post-data reading calculations. */
743 static struct table_entry **find_pivot_extent (struct table_entry **,
744 int *cnt, int pivot);
745 static void enum_var_values (struct table_entry **entries, int entry_cnt,
747 union value **values, int *value_cnt);
748 static void output_pivot_table (struct table_entry **, struct table_entry **,
749 double **, double **, double **,
750 int *, int *, int *);
751 static void make_summary_table (void);
754 postcalc (void *aux UNUSED)
758 n_sorted_tab = hsh_count (gen_tab);
759 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
762 make_summary_table ();
764 /* Identify all the individual crosstabulation tables, and deal with
767 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
768 int pc = n_sorted_tab; /* Pivot count. */
770 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
771 int maxrows = 0, maxcols = 0, maxcells = 0;
775 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
779 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
780 &maxrows, &maxcols, &maxcells);
789 hsh_destroy (gen_tab);
792 static void insert_summary (struct tab_table *, int tab_index, double valid);
794 /* Output a table summarizing the cases processed. */
796 make_summary_table (void)
798 struct tab_table *summary;
800 struct table_entry **pb = sorted_tab, **pe;
801 int pc = n_sorted_tab;
804 summary = tab_create (7, 3 + nxtab, 1);
805 tab_title (summary, 0, _("Summary."));
806 tab_headers (summary, 1, 0, 3, 0);
807 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
808 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
809 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
810 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
811 tab_hline (summary, TAL_1, 1, 6, 1);
812 tab_hline (summary, TAL_1, 1, 6, 2);
813 tab_vline (summary, TAL_1, 3, 1, 1);
814 tab_vline (summary, TAL_1, 5, 1, 1);
818 for (i = 0; i < 3; i++)
820 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
821 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
824 tab_offset (summary, 0, 3);
830 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
834 while (cur_tab < (*pb)->table)
835 insert_summary (summary, cur_tab++, 0.);
838 for (valid = 0.; pb < pe; pb++)
839 valid += (*pb)->u.freq;
842 const struct crosstab *const x = xtab[(*pb)->table];
843 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
844 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
845 const int count = n_cols * n_rows;
847 for (valid = 0.; pb < pe; pb++)
849 const double *data = (*pb)->u.data;
852 for (i = 0; i < count; i++)
856 insert_summary (summary, cur_tab++, valid);
861 while (cur_tab < nxtab)
862 insert_summary (summary, cur_tab++, 0.);
867 /* Inserts a line into T describing the crosstabulation at index
868 TAB_INDEX, which has VALID valid observations. */
870 insert_summary (struct tab_table *t, int tab_index, double valid)
872 struct crosstab *x = xtab[tab_index];
874 tab_hline (t, TAL_1, 0, 6, 0);
876 /* Crosstabulation name. */
878 char *buf = local_alloc (128 * x->nvar);
882 for (i = 0; i < x->nvar; i++)
885 cp = stpcpy (cp, " * ");
888 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
890 tab_text (t, 0, 0, TAB_LEFT, buf);
895 /* Counts and percentages. */
905 for (i = 0; i < 3; i++)
907 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
908 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
919 static struct tab_table *table; /* Crosstabulation table. */
920 static struct tab_table *chisq; /* Chi-square table. */
921 static struct tab_table *sym; /* Symmetric measures table. */
922 static struct tab_table *risk; /* Risk estimate table. */
923 static struct tab_table *direct; /* Directional measures table. */
926 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
928 /* Column values, number of columns. */
929 static union value *cols;
932 /* Row values, number of rows. */
933 static union value *rows;
936 /* Number of statistically interesting columns/rows (columns/rows with
938 static int ns_cols, ns_rows;
940 /* Crosstabulation. */
941 static struct crosstab *x;
943 /* Number of variables from the crosstabulation to consider. This is
944 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
947 /* Matrix contents. */
948 static double *mat; /* Matrix proper. */
949 static double *row_tot; /* Row totals. */
950 static double *col_tot; /* Column totals. */
951 static double W; /* Grand total. */
953 static void display_dimensions (struct tab_table *, int first_difference,
954 struct table_entry *);
955 static void display_crosstabulation (void);
956 static void display_chisq (void);
957 static void display_symmetric (void);
958 static void display_risk (void);
959 static void display_directional (void);
960 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
961 static void table_value_missing (struct tab_table *table, int c, int r,
962 unsigned char opt, const union value *v,
963 const struct variable *var);
964 static void delete_missing (void);
966 /* Output pivot table beginning at PB and continuing until PE,
967 exclusive. For efficiency, *MATP is a pointer to a matrix that can
968 hold *MAXROWS entries. */
970 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
971 double **matp, double **row_totp, double **col_totp,
972 int *maxrows, int *maxcols, int *maxcells)
975 struct table_entry **tb = pb, **te; /* Table begin, table end. */
976 int tc = pe - pb; /* Table count. */
978 /* Table entry for header comparison. */
979 struct table_entry *cmp = NULL;
981 x = xtab[(*pb)->table];
982 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
984 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
986 /* Crosstabulation table initialization. */
989 table = tab_create (nvar + n_cols,
990 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
991 tab_headers (table, nvar - 1, 0, 2, 0);
993 /* First header line. */
994 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
995 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
997 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
999 /* Second header line. */
1003 for (i = 2; i < nvar; i++)
1004 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1005 TAB_RIGHT | TAT_TITLE,
1007 ? x->vars[i]->label : x->vars[i]->name));
1008 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1009 x->vars[ROW_VAR]->name);
1010 for (i = 0; i < n_cols; i++)
1011 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1013 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1016 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1017 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1021 char *title = local_alloc (x->nvar * 64 + 128);
1025 if (cmd.pivot == CRS_PIVOT)
1026 for (i = 0; i < nvar; i++)
1029 cp = stpcpy (cp, " by ");
1030 cp = stpcpy (cp, x->vars[i]->name);
1034 cp = spprintf (cp, "%s by %s for",
1035 x->vars[0]->name, x->vars[1]->name);
1036 for (i = 2; i < nvar; i++)
1038 char buf[64], *bufp;
1043 cp = stpcpy (cp, x->vars[i]->name);
1045 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1046 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1048 cp = stpcpy (cp, bufp);
1052 cp = stpcpy (cp, " [");
1053 for (i = 0; i < num_cells; i++)
1061 static const struct tuple cell_names[] =
1063 {CRS_CL_COUNT, N_("count")},
1064 {CRS_CL_ROW, N_("row %")},
1065 {CRS_CL_COLUMN, N_("column %")},
1066 {CRS_CL_TOTAL, N_("total %")},
1067 {CRS_CL_EXPECTED, N_("expected")},
1068 {CRS_CL_RESIDUAL, N_("residual")},
1069 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1070 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1074 const struct tuple *t;
1076 for (t = cell_names; t->value != cells[i]; t++)
1077 assert (t->value != -1);
1079 cp = stpcpy (cp, ", ");
1080 cp = stpcpy (cp, gettext (t->name));
1084 tab_title (table, 0, title);
1088 tab_offset (table, 0, 2);
1093 /* Chi-square table initialization. */
1094 if (cmd.a_statistics[CRS_ST_CHISQ])
1096 chisq = tab_create (6 + (nvar - 2),
1097 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1098 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1100 tab_title (chisq, 0, "Chi-square tests.");
1102 tab_offset (chisq, nvar - 2, 0);
1103 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1104 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1105 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1106 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1107 _("Asymp. Sig. (2-sided)"));
1108 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1109 _("Exact. Sig. (2-sided)"));
1110 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1111 _("Exact. Sig. (1-sided)"));
1113 tab_offset (chisq, 0, 1);
1118 /* Symmetric measures. */
1119 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1120 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1121 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1122 || cmd.a_statistics[CRS_ST_KAPPA])
1124 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1125 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1126 tab_title (sym, 0, "Symmetric measures.");
1128 tab_offset (sym, nvar - 2, 0);
1129 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1130 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1131 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1132 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1133 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1134 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1135 tab_offset (sym, 0, 1);
1140 /* Risk estimate. */
1141 if (cmd.a_statistics[CRS_ST_RISK])
1143 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1144 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1145 tab_title (risk, 0, "Risk estimate.");
1147 tab_offset (risk, nvar - 2, 0);
1148 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1149 _(" 95%% Confidence Interval"));
1150 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1151 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1152 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1153 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1154 tab_hline (risk, TAL_1, 2, 3, 1);
1155 tab_vline (risk, TAL_1, 2, 0, 1);
1156 tab_offset (risk, 0, 2);
1161 /* Directional measures. */
1162 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1163 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1165 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1166 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1167 tab_title (direct, 0, "Directional measures.");
1169 tab_offset (direct, nvar - 2, 0);
1170 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1171 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1172 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1173 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1174 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1175 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1176 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1177 tab_offset (direct, 0, 1);
1184 /* Find pivot subtable if applicable. */
1185 te = find_pivot_extent (tb, &tc, 0);
1189 /* Find all the row variable values. */
1190 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1192 /* Allocate memory space for the column and row totals. */
1193 if (n_rows > *maxrows)
1195 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1196 row_tot = *row_totp;
1199 if (n_cols > *maxcols)
1201 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1202 col_tot = *col_totp;
1206 /* Allocate table space for the matrix. */
1207 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1208 tab_realloc (table, -1,
1209 max (tab_nr (table) + (n_rows + 1) * num_cells,
1210 tab_nr (table) * (pe - pb) / (te - tb)));
1212 if (mode == GENERAL)
1214 /* Allocate memory space for the matrix. */
1215 if (n_cols * n_rows > *maxcells)
1217 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1218 *maxcells = n_cols * n_rows;
1223 /* Build the matrix and calculate column totals. */
1225 union value *cur_col = cols;
1226 union value *cur_row = rows;
1228 double *cp = col_tot;
1229 struct table_entry **p;
1232 for (p = &tb[0]; p < te; p++)
1234 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1238 for (; cur_row < &rows[n_rows]; cur_row++)
1244 mp = &mat[cur_col - cols];
1247 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1254 *cp += *mp = (*p)->u.freq;
1259 /* Zero out the rest of the matrix. */
1260 for (; cur_row < &rows[n_rows]; cur_row++)
1266 if (cur_col < &cols[n_cols])
1268 const int rem_cols = n_cols - (cur_col - cols);
1271 for (c = 0; c < rem_cols; c++)
1273 mp = &mat[cur_col - cols];
1274 for (r = 0; r < n_rows; r++)
1276 for (c = 0; c < rem_cols; c++)
1278 mp += n_cols - rem_cols;
1286 double *tp = col_tot;
1288 assert (mode == INTEGER);
1289 mat = (*tb)->u.data;
1292 /* Calculate column totals. */
1293 for (c = 0; c < n_cols; c++)
1296 double *cp = &mat[c];
1298 for (r = 0; r < n_rows; r++)
1299 cum += cp[r * n_cols];
1307 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1308 ns_cols += *cp != 0.;
1311 /* Calculate row totals. */
1314 double *rp = row_tot;
1317 for (ns_rows = 0, r = n_rows; r--; )
1320 for (c = n_cols; c--; )
1328 /* Calculate grand total. */
1334 if (n_rows < n_cols)
1335 tp = row_tot, n = n_rows;
1337 tp = col_tot, n = n_cols;
1343 /* Find the first variable that differs from the last subtable,
1344 then display the values of the dimensioning variables for
1345 each table that needs it. */
1347 int first_difference = nvar - 1;
1350 for (; ; first_difference--)
1352 assert (first_difference >= 2);
1353 if (memcmp (&cmp->values[first_difference],
1354 &(*tb)->values[first_difference],
1355 sizeof *cmp->values))
1361 display_dimensions (table, first_difference, *tb);
1363 display_dimensions (chisq, first_difference, *tb);
1365 display_dimensions (sym, first_difference, *tb);
1367 display_dimensions (risk, first_difference, *tb);
1369 display_dimensions (direct, first_difference, *tb);
1373 display_crosstabulation ();
1374 if (cmd.miss == CRS_REPORT)
1379 display_symmetric ();
1383 display_directional ();
1394 tab_resize (chisq, 4 + (nvar - 2), -1);
1405 /* Delete missing rows and columns for statistical analysis when
1408 delete_missing (void)
1413 for (r = 0; r < n_rows; r++)
1414 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1418 for (c = 0; c < n_cols; c++)
1419 mat[c + r * n_cols] = 0.;
1427 for (c = 0; c < n_cols; c++)
1428 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1432 for (r = 0; r < n_rows; r++)
1433 mat[c + r * n_cols] = 0.;
1439 /* Prepare table T for submission, and submit it. */
1441 submit (struct tab_table *t)
1448 tab_resize (t, -1, 0);
1449 if (tab_nr (t) == tab_t (t))
1454 tab_offset (t, 0, 0);
1456 for (i = 2; i < nvar; i++)
1457 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1458 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1459 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1460 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1462 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1464 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1465 tab_dim (t, crosstabs_dim);
1469 /* Sets the widths of all the columns and heights of all the rows in
1470 table T for driver D. */
1472 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1476 /* Width of a numerical column. */
1477 int c = outp_string_width (d, "0.000000");
1478 if (cmd.miss == CRS_REPORT)
1479 c += outp_string_width (d, "M");
1481 /* Set width for header columns. */
1484 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1486 if (w < d->prop_em_width * 8)
1487 w = d->prop_em_width * 8;
1489 if (w > d->prop_em_width * 15)
1490 w = d->prop_em_width * 15;
1492 for (i = 0; i < t->l; i++)
1496 for (i = t->l; i < t->nc; i++)
1499 for (i = 0; i < t->nr; i++)
1500 t->h[i] = tab_natural_height (t, d, i);
1503 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1504 int *cnt, int pivot);
1505 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1506 int *cnt, int pivot);
1508 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1510 static struct table_entry **
1511 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1513 return (mode == GENERAL
1514 ? find_pivot_extent_general (tp, cnt, pivot)
1515 : find_pivot_extent_integer (tp, cnt, pivot));
1518 /* Find the extent of a region in TP that contains one table. If
1519 PIVOT != 0 that means a set of table entries with identical table
1520 number; otherwise they also have to have the same values for every
1521 dimension after the row and column dimensions. The table that is
1522 searched starts at TP and has length CNT. Returns the first entry
1523 after the last one in the table; sets *CNT to the number of
1524 remaining values. If there are no entries in TP at all, returns
1525 NULL. A yucky interface, admittedly, but it works. */
1526 static struct table_entry **
1527 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1529 struct table_entry *fp = *tp;
1534 x = xtab[(*tp)->table];
1542 if ((*tp)->table != fp->table)
1547 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1554 /* Integer mode correspondent to find_pivot_extent_general(). This
1555 could be optimized somewhat, but I just don't give a crap about
1556 CROSSTABS performance in integer mode, which is just a
1557 CROSSTABS wart as far as I'm concerned.
1559 That said, feel free to send optimization patches to me. */
1560 static struct table_entry **
1561 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1563 struct table_entry *fp = *tp;
1568 x = xtab[(*tp)->table];
1576 if ((*tp)->table != fp->table)
1581 if (memcmp (&(*tp)->values[2], &fp->values[2],
1582 sizeof (union value) * (x->nvar - 2)))
1589 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1590 result. WIDTH_ points to an int which is either 0 for a
1591 numeric value or a string width for a string value. */
1593 compare_value (const void *a_, const void *b_, void *width_)
1595 const union value *a = a_;
1596 const union value *b = b_;
1597 const int *pwidth = width_;
1598 const int width = *pwidth;
1601 return (a->f < b->f) ? -1 : (a->f > b->f);
1603 return strncmp (a->s, b->s, width);
1606 /* Given an array of ENTRY_CNT table_entry structures starting at
1607 ENTRIES, creates a sorted list of the values that the variable
1608 with index VAR_IDX takes on. The values are returned as a
1609 malloc()'darray stored in *VALUES, with the number of values
1610 stored in *VALUE_CNT.
1613 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1614 union value **values, int *value_cnt)
1616 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1618 if (mode == GENERAL)
1620 int width = v->width;
1623 *values = xmalloc (sizeof **values * entry_cnt);
1624 for (i = 0; i < entry_cnt; i++)
1625 (*values)[i] = entries[i]->values[var_idx];
1626 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1627 compare_value, &width);
1631 struct var_range *vr = get_var_range (v);
1634 assert (mode == INTEGER);
1635 *values = xmalloc (sizeof **values * vr->count);
1636 for (i = 0; i < vr->count; i++)
1637 (*values)[i].f = i + vr->min;
1638 *value_cnt = vr->count;
1642 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1643 from V, displayed with print format spec from variable VAR. When
1644 in REPORT missing-value mode, missing values have an M appended. */
1646 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1647 const union value *v, const struct variable *var)
1649 struct fixed_string s;
1651 const char *label = val_labs_find (var->val_labs, *v);
1654 tab_text (table, c, r, TAB_LEFT, label);
1658 s.string = tab_alloc (table, var->print.w);
1659 format_short (s.string, &var->print, v);
1660 s.length = strlen (s.string);
1661 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1662 s.string[s.length++] = 'M';
1663 while (s.length && *s.string == ' ')
1668 tab_raw (table, c, r, opt, &s);
1671 /* Draws a line across TABLE at the current row to indicate the most
1672 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1673 that changed, and puts the values that changed into the table. TB
1674 and X must be the corresponding table_entry and crosstab,
1677 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1679 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1681 for (; first_difference >= 2; first_difference--)
1682 table_value_missing (table, nvar - first_difference - 1, 0,
1683 TAB_RIGHT, &tb->values[first_difference],
1684 x->vars[first_difference]);
1687 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1688 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1689 additionally suffixed with a letter `M'. */
1691 format_cell_entry (struct tab_table *table, int c, int r, double value,
1692 char suffix, int mark_missing)
1694 const struct fmt_spec f = {FMT_F, 10, 1};
1696 struct fixed_string s;
1699 s.string = tab_alloc (table, 16);
1701 data_out (s.string, &f, &v);
1702 while (*s.string == ' ')
1708 s.string[s.length++] = suffix;
1710 s.string[s.length++] = 'M';
1712 tab_raw (table, c, r, TAB_RIGHT, &s);
1715 /* Displays the crosstabulation table. */
1717 display_crosstabulation (void)
1722 for (r = 0; r < n_rows; r++)
1723 table_value_missing (table, nvar - 2, r * num_cells,
1724 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1726 tab_text (table, nvar - 2, n_rows * num_cells,
1727 TAB_LEFT, _("Total"));
1729 /* Put in the actual cells. */
1734 tab_offset (table, nvar - 1, -1);
1735 for (r = 0; r < n_rows; r++)
1738 tab_hline (table, TAL_1, -1, n_cols, 0);
1739 for (c = 0; c < n_cols; c++)
1741 int mark_missing = 0;
1742 double expected_value = row_tot[r] * col_tot[c] / W;
1743 if (cmd.miss == CRS_REPORT
1744 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1745 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1748 for (i = 0; i < num_cells; i++)
1759 v = *mp / row_tot[r] * 100.;
1763 v = *mp / col_tot[c] * 100.;
1770 case CRS_CL_EXPECTED:
1773 case CRS_CL_RESIDUAL:
1774 v = *mp - expected_value;
1776 case CRS_CL_SRESIDUAL:
1777 v = (*mp - expected_value) / sqrt (expected_value);
1779 case CRS_CL_ASRESIDUAL:
1780 v = ((*mp - expected_value)
1781 / sqrt (expected_value
1782 * (1. - row_tot[r] / W)
1783 * (1. - col_tot[c] / W)));
1790 format_cell_entry (table, c, i, v, suffix, mark_missing);
1796 tab_offset (table, -1, tab_row (table) + num_cells);
1804 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1805 for (r = 0; r < n_rows; r++)
1808 int mark_missing = 0;
1810 if (cmd.miss == CRS_REPORT
1811 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1814 for (i = 0; i < num_cells; i++)
1828 v = row_tot[r] / W * 100.;
1832 v = row_tot[r] / W * 100.;
1835 case CRS_CL_EXPECTED:
1836 case CRS_CL_RESIDUAL:
1837 case CRS_CL_SRESIDUAL:
1838 case CRS_CL_ASRESIDUAL:
1846 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1847 tab_next_row (table);
1852 /* Column totals, grand total. */
1858 tab_hline (table, TAL_1, -1, n_cols, 0);
1859 for (c = 0; c <= n_cols; c++)
1861 double ct = c < n_cols ? col_tot[c] : W;
1862 int mark_missing = 0;
1866 if (cmd.miss == CRS_REPORT && c < n_cols
1867 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1870 for (i = 0; i < num_cells; i++)
1892 case CRS_CL_EXPECTED:
1893 case CRS_CL_RESIDUAL:
1894 case CRS_CL_SRESIDUAL:
1895 case CRS_CL_ASRESIDUAL:
1902 format_cell_entry (table, c, i, v, suffix, mark_missing);
1907 tab_offset (table, -1, tab_row (table) + last_row);
1910 tab_offset (table, 0, -1);
1913 static void calc_r (double *X, double *Y, double *, double *, double *);
1914 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1916 /* Display chi-square statistics. */
1918 display_chisq (void)
1920 static const char *chisq_stats[N_CHISQ] =
1922 N_("Pearson Chi-Square"),
1923 N_("Likelihood Ratio"),
1924 N_("Fisher's Exact Test"),
1925 N_("Continuity Correction"),
1926 N_("Linear-by-Linear Association"),
1928 double chisq_v[N_CHISQ];
1929 double fisher1, fisher2;
1935 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1937 tab_offset (chisq, nvar - 2, -1);
1939 for (i = 0; i < N_CHISQ; i++)
1941 if ((i != 2 && chisq_v[i] == SYSMIS)
1942 || (i == 2 && fisher1 == SYSMIS))
1946 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1949 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1950 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1951 tab_float (chisq, 3, 0, TAB_RIGHT,
1952 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1957 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1958 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1960 tab_next_row (chisq);
1963 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1964 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1965 tab_next_row (chisq);
1967 tab_offset (chisq, 0, -1);
1970 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1971 double[N_SYMMETRIC]);
1973 /* Display symmetric measures. */
1975 display_symmetric (void)
1977 static const char *categories[] =
1979 N_("Nominal by Nominal"),
1980 N_("Ordinal by Ordinal"),
1981 N_("Interval by Interval"),
1982 N_("Measure of Agreement"),
1985 static const char *stats[N_SYMMETRIC] =
1989 N_("Contingency Coefficient"),
1990 N_("Kendall's tau-b"),
1991 N_("Kendall's tau-c"),
1993 N_("Spearman Correlation"),
1998 static const int stats_categories[N_SYMMETRIC] =
2000 0, 0, 0, 1, 1, 1, 1, 2, 3,
2004 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2007 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2010 tab_offset (sym, nvar - 2, -1);
2012 for (i = 0; i < N_SYMMETRIC; i++)
2014 if (sym_v[i] == SYSMIS)
2017 if (stats_categories[i] != last_cat)
2019 last_cat = stats_categories[i];
2020 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2023 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2024 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2025 if (sym_ase[i] != SYSMIS)
2026 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2027 if (sym_t[i] != SYSMIS)
2028 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2029 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2033 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2034 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2037 tab_offset (sym, 0, -1);
2040 static int calc_risk (double[], double[], double[], union value *);
2042 /* Display risk estimate. */
2047 double risk_v[3], lower[3], upper[3];
2051 if (!calc_risk (risk_v, upper, lower, c))
2054 tab_offset (risk, nvar - 2, -1);
2056 for (i = 0; i < 3; i++)
2058 if (risk_v[i] == SYSMIS)
2064 if (x->vars[COL_VAR]->type == NUMERIC)
2065 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2066 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2068 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2069 x->vars[COL_VAR]->name,
2070 x->vars[COL_VAR]->width, c[0].s,
2071 x->vars[COL_VAR]->width, c[1].s);
2075 if (x->vars[ROW_VAR]->type == NUMERIC)
2076 sprintf (buf, _("For cohort %s = %g"),
2077 x->vars[ROW_VAR]->name, rows[i - 1].f);
2079 sprintf (buf, _("For cohort %s = %.*s"),
2080 x->vars[ROW_VAR]->name,
2081 x->vars[ROW_VAR]->width, rows[i - 1].s);
2085 tab_text (risk, 0, 0, TAB_LEFT, buf);
2086 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2087 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2088 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2089 tab_next_row (risk);
2092 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2093 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2094 tab_next_row (risk);
2096 tab_offset (risk, 0, -1);
2099 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2100 double[N_DIRECTIONAL]);
2102 /* Display directional measures. */
2104 display_directional (void)
2106 static const char *categories[] =
2108 N_("Nominal by Nominal"),
2109 N_("Ordinal by Ordinal"),
2110 N_("Nominal by Interval"),
2113 static const char *stats[] =
2116 N_("Goodman and Kruskal tau"),
2117 N_("Uncertainty Coefficient"),
2122 static const char *types[] =
2129 static const int stats_categories[N_DIRECTIONAL] =
2131 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2134 static const int stats_stats[N_DIRECTIONAL] =
2136 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2139 static const int stats_types[N_DIRECTIONAL] =
2141 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2144 static const int *stats_lookup[] =
2151 static const char **stats_names[] =
2163 double direct_v[N_DIRECTIONAL];
2164 double direct_ase[N_DIRECTIONAL];
2165 double direct_t[N_DIRECTIONAL];
2169 if (!calc_directional (direct_v, direct_ase, direct_t))
2172 tab_offset (direct, nvar - 2, -1);
2174 for (i = 0; i < N_DIRECTIONAL; i++)
2176 if (direct_v[i] == SYSMIS)
2182 for (j = 0; j < 3; j++)
2183 if (last[j] != stats_lookup[j][i])
2186 tab_hline (direct, TAL_1, j, 6, 0);
2191 int k = last[j] = stats_lookup[j][i];
2196 string = x->vars[0]->name;
2198 string = x->vars[1]->name;
2200 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2201 gettext (stats_names[j][k]), string);
2206 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2207 if (direct_ase[i] != SYSMIS)
2208 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2209 if (direct_t[i] != SYSMIS)
2210 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2211 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2212 tab_next_row (direct);
2215 tab_offset (direct, 0, -1);
2218 /* Statistical calculations. */
2220 /* Returns the value of the gamma (factorial) function for an integer
2223 gamma_int (double x)
2228 for (i = 2; i < x; i++)
2233 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2235 static inline double
2236 Pr (int a, int b, int c, int d)
2238 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2239 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2240 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2241 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2242 / gamma_int (a + b + c + d + 1.));
2245 /* Swap the contents of A and B. */
2247 swap (int *a, int *b)
2254 /* Calculate significance for Fisher's exact test as specified in
2255 _SPSS Statistical Algorithms_, Appendix 5. */
2257 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2261 if (min (c, d) < min (a, b))
2262 swap (&a, &c), swap (&b, &d);
2263 if (min (b, d) < min (a, c))
2264 swap (&a, &b), swap (&c, &d);
2268 swap (&a, &b), swap (&c, &d);
2270 swap (&a, &c), swap (&b, &d);
2274 for (x = 0; x <= a; x++)
2275 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2277 *fisher2 = *fisher1;
2278 for (x = 1; x <= b; x++)
2279 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2282 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2283 columns with values COLS and N_ROWS rows with values ROWS. Values
2284 in the matrix sum to W. */
2286 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2287 double *fisher1, double *fisher2)
2291 chisq[0] = chisq[1] = 0.;
2292 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2293 *fisher1 = *fisher2 = SYSMIS;
2295 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2297 if (ns_rows <= 1 || ns_cols <= 1)
2299 chisq[0] = chisq[1] = SYSMIS;
2303 for (r = 0; r < n_rows; r++)
2304 for (c = 0; c < n_cols; c++)
2306 const double expected = row_tot[r] * col_tot[c] / W;
2307 const double freq = mat[n_cols * r + c];
2308 const double residual = freq - expected;
2310 chisq[0] += residual * residual / expected;
2312 chisq[1] += freq * log (expected / freq);
2323 /* Calculate Yates and Fisher exact test. */
2324 if (ns_cols == 2 && ns_rows == 2)
2326 double f11, f12, f21, f22;
2332 for (i = j = 0; i < n_cols; i++)
2333 if (col_tot[i] != 0.)
2342 f11 = mat[nz_cols[0]];
2343 f12 = mat[nz_cols[1]];
2344 f21 = mat[nz_cols[0] + n_cols];
2345 f22 = mat[nz_cols[1] + n_cols];
2350 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2353 chisq[3] = (W * x * x
2354 / (f11 + f12) / (f21 + f22)
2355 / (f11 + f21) / (f12 + f22));
2363 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2364 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2367 /* Calculate Mantel-Haenszel. */
2368 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2370 double r, ase_0, ase_1;
2371 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2373 chisq[4] = (W - 1.) * r * r;
2378 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2379 ASE_1, and ase_0 into ASE_0. The row and column values must be
2380 passed in X and Y. */
2382 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2384 double SX, SY, S, T;
2386 double sum_XYf, sum_X2Y2f;
2387 double sum_Xr, sum_X2r;
2388 double sum_Yc, sum_Y2c;
2391 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2392 for (j = 0; j < n_cols; j++)
2394 double fij = mat[j + i * n_cols];
2395 double product = X[i] * Y[j];
2396 double temp = fij * product;
2398 sum_X2Y2f += temp * product;
2401 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2403 sum_Xr += X[i] * row_tot[i];
2404 sum_X2r += X[i] * X[i] * row_tot[i];
2408 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2410 sum_Yc += Y[i] * col_tot[i];
2411 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2415 S = sum_XYf - sum_Xr * sum_Yc / W;
2416 SX = sum_X2r - sum_Xr * sum_Xr / W;
2417 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2420 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2425 for (s = c = 0., i = 0; i < n_rows; i++)
2426 for (j = 0; j < n_cols; j++)
2428 double Xresid, Yresid;
2431 Xresid = X[i] - Xbar;
2432 Yresid = Y[j] - Ybar;
2433 temp = (T * Xresid * Yresid
2435 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2436 y = mat[j + i * n_cols] * temp * temp - c;
2441 *ase_1 = sqrt (s) / (T * T);
2445 static double somers_d_v[3];
2446 static double somers_d_ase[3];
2447 static double somers_d_t[3];
2449 /* Calculate symmetric statistics and their asymptotic standard
2450 errors. Returns 0 if none could be calculated. */
2452 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2453 double t[N_SYMMETRIC])
2455 int q = min (ns_rows, ns_cols);
2464 for (i = 0; i < N_SYMMETRIC; i++)
2465 v[i] = ase[i] = t[i] = SYSMIS;
2468 /* Phi, Cramer's V, contingency coefficient. */
2469 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2471 double Xp = 0.; /* Pearson chi-square. */
2476 for (r = 0; r < n_rows; r++)
2477 for (c = 0; c < n_cols; c++)
2479 const double expected = row_tot[r] * col_tot[c] / W;
2480 const double freq = mat[n_cols * r + c];
2481 const double residual = freq - expected;
2483 Xp += residual * residual / expected;
2487 if (cmd.a_statistics[CRS_ST_PHI])
2489 v[0] = sqrt (Xp / W);
2490 v[1] = sqrt (Xp / (W * (q - 1)));
2492 if (cmd.a_statistics[CRS_ST_CC])
2493 v[2] = sqrt (Xp / (Xp + W));
2496 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2497 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2502 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2509 for (r = 0; r < n_rows; r++)
2510 Dr -= row_tot[r] * row_tot[r];
2511 for (c = 0; c < n_cols; c++)
2512 Dc -= col_tot[c] * col_tot[c];
2518 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2519 for (c = 0; c < n_cols; c++)
2523 for (r = 0; r < n_rows; r++)
2524 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2534 for (i = 0; i < n_rows; i++)
2538 for (j = 1; j < n_cols; j++)
2539 Cij += col_tot[j] - cum[j + i * n_cols];
2542 for (j = 1; j < n_cols; j++)
2543 Dij += cum[j + (i - 1) * n_cols];
2547 double fij = mat[j + i * n_cols];
2553 assert (j < n_cols);
2555 Cij -= col_tot[j] - cum[j + i * n_cols];
2556 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2560 Cij += cum[j - 1 + (i - 1) * n_cols];
2561 Dij -= cum[j + (i - 1) * n_cols];
2567 if (cmd.a_statistics[CRS_ST_BTAU])
2568 v[3] = (P - Q) / sqrt (Dr * Dc);
2569 if (cmd.a_statistics[CRS_ST_CTAU])
2570 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2571 if (cmd.a_statistics[CRS_ST_GAMMA])
2572 v[5] = (P - Q) / (P + Q);
2574 /* ASE for tau-b, tau-c, gamma. Calculations could be
2575 eliminated here, at expense of memory. */
2580 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2581 for (i = 0; i < n_rows; i++)
2585 for (j = 1; j < n_cols; j++)
2586 Cij += col_tot[j] - cum[j + i * n_cols];
2589 for (j = 1; j < n_cols; j++)
2590 Dij += cum[j + (i - 1) * n_cols];
2594 double fij = mat[j + i * n_cols];
2596 if (cmd.a_statistics[CRS_ST_BTAU])
2598 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2599 + v[3] * (row_tot[i] * Dc
2600 + col_tot[j] * Dr));
2601 btau_cum += fij * temp * temp;
2605 const double temp = Cij - Dij;
2606 ctau_cum += fij * temp * temp;
2609 if (cmd.a_statistics[CRS_ST_GAMMA])
2611 const double temp = Q * Cij - P * Dij;
2612 gamma_cum += fij * temp * temp;
2615 if (cmd.a_statistics[CRS_ST_D])
2617 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2618 - (P - Q) * (W - row_tot[i]));
2619 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2620 - (Q - P) * (W - col_tot[j]));
2625 assert (j < n_cols);
2627 Cij -= col_tot[j] - cum[j + i * n_cols];
2628 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2632 Cij += cum[j - 1 + (i - 1) * n_cols];
2633 Dij -= cum[j + (i - 1) * n_cols];
2639 btau_var = ((btau_cum
2640 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2642 if (cmd.a_statistics[CRS_ST_BTAU])
2644 ase[3] = sqrt (btau_var);
2645 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2648 if (cmd.a_statistics[CRS_ST_CTAU])
2650 ase[4] = ((2 * q / ((q - 1) * W * W))
2651 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2652 t[4] = v[4] / ase[4];
2654 if (cmd.a_statistics[CRS_ST_GAMMA])
2656 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2657 t[5] = v[5] / (2. / (P + Q)
2658 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2660 if (cmd.a_statistics[CRS_ST_D])
2662 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2663 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2664 somers_d_t[0] = (somers_d_v[0]
2666 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2667 somers_d_v[1] = (P - Q) / Dc;
2668 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2669 somers_d_t[1] = (somers_d_v[1]
2671 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2672 somers_d_v[2] = (P - Q) / Dr;
2673 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2674 somers_d_t[2] = (somers_d_v[2]
2676 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2682 /* Spearman correlation, Pearson's r. */
2683 if (cmd.a_statistics[CRS_ST_CORR])
2685 double *R = local_alloc (sizeof *R * n_rows);
2686 double *C = local_alloc (sizeof *C * n_cols);
2689 double y, t, c = 0., s = 0.;
2694 R[i] = s + (row_tot[i] + 1.) / 2.;
2701 assert (i < n_rows);
2706 double y, t, c = 0., s = 0.;
2711 C[j] = s + (col_tot[j] + 1.) / 2;
2718 assert (j < n_cols);
2722 calc_r (R, C, &v[6], &t[6], &ase[6]);
2728 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2732 /* Cohen's kappa. */
2733 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2735 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2738 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2739 i < ns_rows; i++, j++)
2743 while (col_tot[j] == 0.)
2746 prod = row_tot[i] * col_tot[j];
2747 sum = row_tot[i] + col_tot[j];
2749 sum_fii += mat[j + i * n_cols];
2751 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2752 sum_riciri_ci += prod * sum;
2754 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2755 for (j = 0; j < ns_cols; j++)
2757 double sum = row_tot[i] + col_tot[j];
2758 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2761 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2763 ase[8] = sqrt ((W * W * sum_rici
2764 + sum_rici * sum_rici
2765 - W * sum_riciri_ci)
2766 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2768 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2769 / pow2 (W * W - sum_rici))
2770 + ((2. * (W - sum_fii)
2771 * (2. * sum_fii * sum_rici
2772 - W * sum_fiiri_ci))
2773 / cube (W * W - sum_rici))
2774 + (pow2 (W - sum_fii)
2775 * (W * sum_fijri_ci2 - 4.
2776 * sum_rici * sum_rici)
2777 / pow4 (W * W - sum_rici))));
2779 t[8] = v[8] / ase[8];
2786 /* Calculate risk estimate. */
2788 calc_risk (double *value, double *upper, double *lower, union value *c)
2790 double f11, f12, f21, f22;
2796 for (i = 0; i < 3; i++)
2797 value[i] = upper[i] = lower[i] = SYSMIS;
2800 if (ns_rows != 2 || ns_cols != 2)
2807 for (i = j = 0; i < n_cols; i++)
2808 if (col_tot[i] != 0.)
2817 f11 = mat[nz_cols[0]];
2818 f12 = mat[nz_cols[1]];
2819 f21 = mat[nz_cols[0] + n_cols];
2820 f22 = mat[nz_cols[1] + n_cols];
2822 c[0] = cols[nz_cols[0]];
2823 c[1] = cols[nz_cols[1]];
2826 value[0] = (f11 * f22) / (f12 * f21);
2827 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2828 lower[0] = value[0] * exp (-1.960 * v);
2829 upper[0] = value[0] * exp (1.960 * v);
2831 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2832 v = sqrt ((f12 / (f11 * (f11 + f12)))
2833 + (f22 / (f21 * (f21 + f22))));
2834 lower[1] = value[1] * exp (-1.960 * v);
2835 upper[1] = value[1] * exp (1.960 * v);
2837 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2838 v = sqrt ((f11 / (f12 * (f11 + f12)))
2839 + (f21 / (f22 * (f21 + f22))));
2840 lower[2] = value[2] * exp (-1.960 * v);
2841 upper[2] = value[2] * exp (1.960 * v);
2846 /* Calculate directional measures. */
2848 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2849 double t[N_DIRECTIONAL])
2854 for (i = 0; i < N_DIRECTIONAL; i++)
2855 v[i] = ase[i] = t[i] = SYSMIS;
2859 if (cmd.a_statistics[CRS_ST_LAMBDA])
2861 double *fim = xmalloc (sizeof *fim * n_rows);
2862 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2863 double *fmj = xmalloc (sizeof *fmj * n_cols);
2864 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2865 double sum_fim, sum_fmj;
2867 int rm_index, cm_index;
2870 /* Find maximum for each row and their sum. */
2871 for (sum_fim = 0., i = 0; i < n_rows; i++)
2873 double max = mat[i * n_cols];
2876 for (j = 1; j < n_cols; j++)
2877 if (mat[j + i * n_cols] > max)
2879 max = mat[j + i * n_cols];
2883 sum_fim += fim[i] = max;
2884 fim_index[i] = index;
2887 /* Find maximum for each column. */
2888 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2890 double max = mat[j];
2893 for (i = 1; i < n_rows; i++)
2894 if (mat[j + i * n_cols] > max)
2896 max = mat[j + i * n_cols];
2900 sum_fmj += fmj[j] = max;
2901 fmj_index[j] = index;
2904 /* Find maximum row total. */
2907 for (i = 1; i < n_rows; i++)
2908 if (row_tot[i] > rm)
2914 /* Find maximum column total. */
2917 for (j = 1; j < n_cols; j++)
2918 if (col_tot[j] > cm)
2924 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2925 v[1] = (sum_fmj - rm) / (W - rm);
2926 v[2] = (sum_fim - cm) / (W - cm);
2928 /* ASE1 for Y given X. */
2932 for (accum = 0., i = 0; i < n_rows; i++)
2933 for (j = 0; j < n_cols; j++)
2935 const int deltaj = j == cm_index;
2936 accum += (mat[j + i * n_cols]
2937 * pow2 ((j == fim_index[i])
2942 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2945 /* ASE0 for Y given X. */
2949 for (accum = 0., i = 0; i < n_rows; i++)
2950 if (cm_index != fim_index[i])
2951 accum += (mat[i * n_cols + fim_index[i]]
2952 + mat[i * n_cols + cm_index]);
2953 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2956 /* ASE1 for X given Y. */
2960 for (accum = 0., i = 0; i < n_rows; i++)
2961 for (j = 0; j < n_cols; j++)
2963 const int deltaj = i == rm_index;
2964 accum += (mat[j + i * n_cols]
2965 * pow2 ((i == fmj_index[j])
2970 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2973 /* ASE0 for X given Y. */
2977 for (accum = 0., j = 0; j < n_cols; j++)
2978 if (rm_index != fmj_index[j])
2979 accum += (mat[j + n_cols * fmj_index[j]]
2980 + mat[j + n_cols * rm_index]);
2981 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2984 /* Symmetric ASE0 and ASE1. */
2989 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2990 for (j = 0; j < n_cols; j++)
2992 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2993 int temp1 = (i == rm_index) + (j == cm_index);
2994 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
2995 accum1 += (mat[j + i * n_cols]
2996 * pow2 (temp0 + (v[0] - 1.) * temp1));
2998 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
2999 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3000 / (2. * W - rm - cm));
3009 double sum_fij2_ri, sum_fij2_ci;
3010 double sum_ri2, sum_cj2;
3012 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3013 for (j = 0; j < n_cols; j++)
3015 double temp = pow2 (mat[j + i * n_cols]);
3016 sum_fij2_ri += temp / row_tot[i];
3017 sum_fij2_ci += temp / col_tot[j];
3020 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3021 sum_ri2 += row_tot[i] * row_tot[i];
3023 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3024 sum_cj2 += col_tot[j] * col_tot[j];
3026 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3027 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3031 if (cmd.a_statistics[CRS_ST_UC])
3033 double UX, UY, UXY, P;
3034 double ase1_yx, ase1_xy, ase1_sym;
3037 for (UX = 0., i = 0; i < n_rows; i++)
3038 if (row_tot[i] > 0.)
3039 UX -= row_tot[i] / W * log (row_tot[i] / W);
3041 for (UY = 0., j = 0; j < n_cols; j++)
3042 if (col_tot[j] > 0.)
3043 UY -= col_tot[j] / W * log (col_tot[j] / W);
3045 for (UXY = P = 0., i = 0; i < n_rows; i++)
3046 for (j = 0; j < n_cols; j++)
3048 double entry = mat[j + i * n_cols];
3053 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3054 UXY -= entry / W * log (entry / W);
3057 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3058 for (j = 0; j < n_cols; j++)
3060 double entry = mat[j + i * n_cols];
3065 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3066 + (UX - UXY) * log (col_tot[j] / W));
3067 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3068 + (UY - UXY) * log (row_tot[i] / W));
3069 ase1_sym += entry * pow2 ((UXY
3070 * log (row_tot[i] * col_tot[j] / (W * W)))
3071 - (UX + UY) * log (entry / W));
3074 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3075 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3076 t[5] = v[5] / ((2. / (W * (UX + UY)))
3077 * sqrt (P - pow2 (UX + UY - UXY) / W));
3079 v[6] = (UX + UY - UXY) / UX;
3080 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3081 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3083 v[7] = (UX + UY - UXY) / UY;
3084 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3085 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3089 if (cmd.a_statistics[CRS_ST_D])
3094 calc_symmetric (NULL, NULL, NULL);
3095 for (i = 0; i < 3; i++)
3097 v[8 + i] = somers_d_v[i];
3098 ase[8 + i] = somers_d_ase[i];
3099 t[8 + i] = somers_d_t[i];
3104 if (cmd.a_statistics[CRS_ST_ETA])
3107 double sum_Xr, sum_X2r;
3111 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3113 sum_Xr += rows[i].f * row_tot[i];
3114 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3116 SX = sum_X2r - sum_Xr * sum_Xr / W;
3118 for (SXW = 0., j = 0; j < n_cols; j++)
3122 for (cum = 0., i = 0; i < n_rows; i++)
3124 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3125 cum += rows[i].f * mat[j + i * n_cols];
3128 SXW -= cum * cum / col_tot[j];
3130 v[11] = sqrt (1. - SXW / SX);
3134 double sum_Yc, sum_Y2c;
3138 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3140 sum_Yc += cols[i].f * col_tot[i];
3141 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3143 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3145 for (SYW = 0., i = 0; i < n_rows; i++)
3149 for (cum = 0., j = 0; j < n_cols; j++)
3151 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3152 cum += cols[j].f * mat[j + i * n_cols];
3155 SYW -= cum * cum / row_tot[i];
3157 v[12] = sqrt (1. - SYW / SY);
3164 /* A wrapper around data_out() that limits string output to short
3165 string width and null terminates the result. */
3167 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3169 struct fmt_spec fmt_subst;
3171 /* Limit to short string width. */
3172 if (formats[fp->type].cat & FCAT_STRING)
3176 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3177 if (fmt_subst.type == FMT_A)
3178 fmt_subst.w = min (8, fmt_subst.w);
3180 fmt_subst.w = min (16, fmt_subst.w);
3186 data_out (s, fp, v);
3188 /* Null terminate. */