1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include <gsl/gsl_cdf.h>
41 #include "dictionary.h"
52 #include "value-labels.h"
54 #include "procedure.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) msgid
62 #include "debug-print.h"
68 +missing=miss:!table/include/report;
69 +write[wr_]=none,cells,all;
70 +format=fmt:!labels/nolabels/novallabs,
73 tabl:!tables/notables,
76 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
78 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
84 /* Number of chi-square statistics. */
87 /* Number of symmetric statistics. */
90 /* Number of directional statistics. */
91 #define N_DIRECTIONAL 13
93 /* A single table entry for general mode. */
96 int table; /* Flattened table number. */
99 double freq; /* Frequency count. */
100 double *data; /* Crosstabulation table for integer mode. */
103 union value values[1]; /* Values. */
106 /* A crosstabulation. */
109 int nvar; /* Number of variables. */
110 double missing; /* Missing cases count. */
111 int ofs; /* Integer mode: Offset into sorted_tab[]. */
112 struct variable *vars[2]; /* At least two variables; sorted by
113 larger indices first. */
116 /* Integer mode variable info. */
119 int min; /* Minimum value. */
120 int max; /* Maximum value + 1. */
121 int count; /* max - min. */
124 static inline struct var_range *
125 get_var_range (struct variable *v)
128 assert (v->aux != NULL);
132 /* Indexes into crosstab.v. */
139 /* General mode crosstabulation table. */
140 static struct hsh_table *gen_tab; /* Hash table. */
141 static int n_sorted_tab; /* Number of entries in sorted_tab. */
142 static struct table_entry **sorted_tab; /* Sorted table. */
144 /* Variables specifies on VARIABLES. */
145 static struct variable **variables;
146 static size_t variables_cnt;
149 static struct crosstab **xtab;
152 /* Integer or general mode? */
161 static int num_cells; /* Number of cells requested. */
162 static int cells[8]; /* Cells requested. */
165 static int write; /* One of WR_* that specifies the WRITE style. */
167 /* Command parsing info. */
168 static struct cmd_crosstabs cmd;
171 static struct pool *pl_tc; /* For table cells. */
172 static struct pool *pl_col; /* For column data. */
174 static int internal_cmd_crosstabs (void);
175 static void precalc (void *);
176 static bool calc_general (struct ccase *, void *);
177 static bool calc_integer (struct ccase *, void *);
178 static void postcalc (void *);
179 static void submit (struct tab_table *);
181 static void format_short (char *s, const struct fmt_spec *fp,
182 const union value *v);
184 /* Parse and execute CROSSTABS, then clean up. */
188 int result = internal_cmd_crosstabs ();
191 pool_destroy (pl_tc);
192 pool_destroy (pl_col);
197 /* Parses and executes the CROSSTABS procedure. */
199 internal_cmd_crosstabs (void)
208 pl_tc = pool_create ();
209 pl_col = pool_create ();
211 if (!parse_crosstabs (&cmd))
214 mode = variables ? INTEGER : GENERAL;
219 cmd.a_cells[CRS_CL_COUNT] = 1;
225 for (i = 0; i < CRS_CL_count; i++)
230 cmd.a_cells[CRS_CL_COUNT] = 1;
231 cmd.a_cells[CRS_CL_ROW] = 1;
232 cmd.a_cells[CRS_CL_COLUMN] = 1;
233 cmd.a_cells[CRS_CL_TOTAL] = 1;
235 if (cmd.a_cells[CRS_CL_ALL])
237 for (i = 0; i < CRS_CL_count; i++)
239 cmd.a_cells[CRS_CL_ALL] = 0;
241 cmd.a_cells[CRS_CL_NONE] = 0;
243 for (num_cells = i = 0; i < CRS_CL_count; i++)
245 cells[num_cells++] = i;
248 if (cmd.sbc_statistics)
253 for (i = 0; i < CRS_ST_count; i++)
254 if (cmd.a_statistics[i])
257 cmd.a_statistics[CRS_ST_CHISQ] = 1;
258 if (cmd.a_statistics[CRS_ST_ALL])
259 for (i = 0; i < CRS_ST_count; i++)
260 cmd.a_statistics[i] = 1;
264 if (cmd.miss == CRS_REPORT && mode == GENERAL)
266 msg (SE, _("Missing mode REPORT not allowed in general mode. "
267 "Assuming MISSING=TABLE."));
268 cmd.miss = CRS_TABLE;
272 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
273 cmd.a_write[CRS_WR_ALL] = 0;
274 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
276 msg (SE, _("Write mode ALL not allowed in general mode. "
277 "Assuming WRITE=CELLS."));
278 cmd.a_write[CRS_WR_CELLS] = 1;
281 && (cmd.a_write[CRS_WR_NONE]
282 + cmd.a_write[CRS_WR_ALL]
283 + cmd.a_write[CRS_WR_CELLS] == 0))
284 cmd.a_write[CRS_WR_CELLS] = 1;
285 if (cmd.a_write[CRS_WR_CELLS])
286 write = CRS_WR_CELLS;
287 else if (cmd.a_write[CRS_WR_ALL])
292 ok = procedure_with_splits (precalc,
293 mode == GENERAL ? calc_general : calc_integer,
296 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
299 /* Parses the TABLES subcommand. */
301 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
303 struct var_set *var_set;
305 struct variable ***by = NULL;
306 size_t *by_nvar = NULL;
310 /* Ensure that this is a TABLES subcommand. */
311 if (!lex_match_id ("TABLES")
312 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
317 if (variables != NULL)
318 var_set = var_set_create_from_array (variables, variables_cnt);
320 var_set = var_set_create_from_dict (default_dict);
321 assert (var_set != NULL);
325 by = xnrealloc (by, n_by + 1, sizeof *by);
326 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
327 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
328 PV_NO_DUPLICATE | PV_NO_SCRATCH))
330 if (xalloc_oversized (nx, by_nvar[n_by]))
332 msg (SE, _("Too many crosstabulation variables or dimensions."));
338 if (!lex_match (T_BY))
342 lex_error (_("expecting BY"));
351 int *by_iter = xcalloc (n_by, sizeof *by_iter);
354 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
355 for (i = 0; i < nx; i++)
359 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
366 for (i = 0; i < n_by; i++)
367 x->vars[i] = by[i][by_iter[i]];
373 for (i = n_by - 1; i >= 0; i--)
375 if (++by_iter[i] < by_nvar[i])
388 /* All return paths lead here. */
392 for (i = 0; i < n_by; i++)
398 var_set_destroy (var_set);
403 /* Parses the VARIABLES subcommand. */
405 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
409 msg (SE, _("VARIABLES must be specified before TABLES."));
417 size_t orig_nv = variables_cnt;
422 if (!parse_variables (default_dict, &variables, &variables_cnt,
423 (PV_APPEND | PV_NUMERIC
424 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
429 lex_error ("expecting `('");
434 if (!lex_force_int ())
436 min = lex_integer ();
441 if (!lex_force_int ())
443 max = lex_integer ();
446 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
454 lex_error ("expecting `)'");
459 for (i = orig_nv; i < variables_cnt; i++)
461 struct var_range *vr = xmalloc (sizeof *vr);
464 vr->count = max - min + 1;
465 var_attach_aux (variables[i], vr, var_dtor_free);
480 /* Data file processing. */
482 static int compare_table_entry (const void *, const void *, void *);
483 static unsigned hash_table_entry (const void *, void *);
485 /* Set up the crosstabulation tables for processing. */
487 precalc (void *aux UNUSED)
491 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
501 for (i = 0; i < nxtab; i++)
503 struct crosstab *x = xtab[i];
508 x->ofs = n_sorted_tab;
510 for (j = 2; j < x->nvar; j++)
511 count *= get_var_range (x->vars[j - 2])->count;
513 sorted_tab = xnrealloc (sorted_tab,
514 n_sorted_tab + count, sizeof *sorted_tab);
515 v = local_alloc (sizeof *v * x->nvar);
516 for (j = 2; j < x->nvar; j++)
517 v[j] = get_var_range (x->vars[j])->min;
518 for (j = 0; j < count; j++)
520 struct table_entry *te;
523 te = sorted_tab[n_sorted_tab++]
524 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
528 int row_cnt = get_var_range (x->vars[0])->count;
529 int col_cnt = get_var_range (x->vars[1])->count;
530 const int mat_size = row_cnt * col_cnt;
533 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
534 for (m = 0; m < mat_size; m++)
538 for (k = 2; k < x->nvar; k++)
539 te->values[k].f = v[k];
540 for (k = 2; k < x->nvar; k++)
542 struct var_range *vr = get_var_range (x->vars[k]);
543 if (++v[k] >= vr->max)
552 sorted_tab = xnrealloc (sorted_tab,
553 n_sorted_tab + 1, sizeof *sorted_tab);
554 sorted_tab[n_sorted_tab] = NULL;
558 /* Form crosstabulations for general mode. */
560 calc_general (struct ccase *c, void *aux UNUSED)
565 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
567 /* Flattened current table index. */
570 for (t = 0; t < nxtab; t++)
572 struct crosstab *x = xtab[t];
573 const size_t entry_size = (sizeof (struct table_entry)
574 + sizeof (union value) * (x->nvar - 1));
575 struct table_entry *te = local_alloc (entry_size);
577 /* Construct table entry for the current record and table. */
583 for (j = 0; j < x->nvar; j++)
585 const union value *v = case_data (c, x->vars[j]->fv);
586 const struct missing_values *mv = &x->vars[j]->miss;
587 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
588 || (cmd.miss == CRS_INCLUDE
589 && mv_is_value_system_missing (mv, v)))
591 x->missing += weight;
595 if (x->vars[j]->type == NUMERIC)
596 te->values[j].f = case_num (c, x->vars[j]->fv);
599 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
602 /* Necessary in order to simplify comparisons. */
603 memset (&te->values[j].s[x->vars[j]->width], 0,
604 sizeof (union value) - x->vars[j]->width);
609 /* Add record to hash table. */
611 struct table_entry **tepp
612 = (struct table_entry **) hsh_probe (gen_tab, te);
615 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
618 memcpy (tep, te, entry_size);
623 (*tepp)->u.freq += weight;
634 calc_integer (struct ccase *c, void *aux UNUSED)
639 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
641 /* Flattened current table index. */
644 for (t = 0; t < nxtab; t++)
646 struct crosstab *x = xtab[t];
651 for (i = 0; i < x->nvar; i++)
653 struct variable *const v = x->vars[i];
654 struct var_range *vr = get_var_range (v);
655 double value = case_num (c, v->fv);
657 /* Note that the first test also rules out SYSMIS. */
658 if ((value < vr->min || value >= vr->max)
659 || (cmd.miss == CRS_TABLE
660 && mv_is_num_user_missing (&v->miss, value)))
662 x->missing += weight;
668 ofs += fact * ((int) value - vr->min);
674 struct variable *row_var = x->vars[ROW_VAR];
675 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
677 struct variable *col_var = x->vars[COL_VAR];
678 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
680 const int col_dim = get_var_range (col_var)->count;
682 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
691 /* Compare the table_entry's at A and B and return a strcmp()-type
694 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
696 const struct table_entry *a = a_;
697 const struct table_entry *b = b_;
699 if (a->table > b->table)
701 else if (a->table < b->table)
705 const struct crosstab *x = xtab[a->table];
708 for (i = x->nvar - 1; i >= 0; i--)
709 if (x->vars[i]->type == NUMERIC)
711 const double diffnum = a->values[i].f - b->values[i].f;
714 else if (diffnum > 0)
719 assert (x->vars[i]->type == ALPHA);
721 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
732 /* Calculate a hash value from table_entry A. */
734 hash_table_entry (const void *a_, void *foo UNUSED)
736 const struct table_entry *a = a_;
741 for (i = 0; i < xtab[a->table]->nvar; i++)
742 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
747 /* Post-data reading calculations. */
749 static struct table_entry **find_pivot_extent (struct table_entry **,
750 int *cnt, int pivot);
751 static void enum_var_values (struct table_entry **entries, int entry_cnt,
753 union value **values, int *value_cnt);
754 static void output_pivot_table (struct table_entry **, struct table_entry **,
755 double **, double **, double **,
756 int *, int *, int *);
757 static void make_summary_table (void);
760 postcalc (void *aux UNUSED)
764 n_sorted_tab = hsh_count (gen_tab);
765 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
768 make_summary_table ();
770 /* Identify all the individual crosstabulation tables, and deal with
773 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
774 int pc = n_sorted_tab; /* Pivot count. */
776 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
777 int maxrows = 0, maxcols = 0, maxcells = 0;
781 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
785 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
786 &maxrows, &maxcols, &maxcells);
795 hsh_destroy (gen_tab);
798 static void insert_summary (struct tab_table *, int tab_index, double valid);
800 /* Output a table summarizing the cases processed. */
802 make_summary_table (void)
804 struct tab_table *summary;
806 struct table_entry **pb = sorted_tab, **pe;
807 int pc = n_sorted_tab;
810 summary = tab_create (7, 3 + nxtab, 1);
811 tab_title (summary, 0, _("Summary."));
812 tab_headers (summary, 1, 0, 3, 0);
813 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
814 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
815 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
816 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
817 tab_hline (summary, TAL_1, 1, 6, 1);
818 tab_hline (summary, TAL_1, 1, 6, 2);
819 tab_vline (summary, TAL_1, 3, 1, 1);
820 tab_vline (summary, TAL_1, 5, 1, 1);
824 for (i = 0; i < 3; i++)
826 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
827 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
830 tab_offset (summary, 0, 3);
836 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
840 while (cur_tab < (*pb)->table)
841 insert_summary (summary, cur_tab++, 0.);
844 for (valid = 0.; pb < pe; pb++)
845 valid += (*pb)->u.freq;
848 const struct crosstab *const x = xtab[(*pb)->table];
849 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
850 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
851 const int count = n_cols * n_rows;
853 for (valid = 0.; pb < pe; pb++)
855 const double *data = (*pb)->u.data;
858 for (i = 0; i < count; i++)
862 insert_summary (summary, cur_tab++, valid);
867 while (cur_tab < nxtab)
868 insert_summary (summary, cur_tab++, 0.);
873 /* Inserts a line into T describing the crosstabulation at index
874 TAB_INDEX, which has VALID valid observations. */
876 insert_summary (struct tab_table *t, int tab_index, double valid)
878 struct crosstab *x = xtab[tab_index];
880 tab_hline (t, TAL_1, 0, 6, 0);
882 /* Crosstabulation name. */
884 char *buf = local_alloc (128 * x->nvar);
888 for (i = 0; i < x->nvar; i++)
891 cp = stpcpy (cp, " * ");
894 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
896 tab_text (t, 0, 0, TAB_LEFT, buf);
901 /* Counts and percentages. */
911 for (i = 0; i < 3; i++)
913 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
914 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
925 static struct tab_table *table; /* Crosstabulation table. */
926 static struct tab_table *chisq; /* Chi-square table. */
927 static struct tab_table *sym; /* Symmetric measures table. */
928 static struct tab_table *risk; /* Risk estimate table. */
929 static struct tab_table *direct; /* Directional measures table. */
932 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
934 /* Column values, number of columns. */
935 static union value *cols;
938 /* Row values, number of rows. */
939 static union value *rows;
942 /* Number of statistically interesting columns/rows (columns/rows with
944 static int ns_cols, ns_rows;
946 /* Crosstabulation. */
947 static struct crosstab *x;
949 /* Number of variables from the crosstabulation to consider. This is
950 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
953 /* Matrix contents. */
954 static double *mat; /* Matrix proper. */
955 static double *row_tot; /* Row totals. */
956 static double *col_tot; /* Column totals. */
957 static double W; /* Grand total. */
959 static void display_dimensions (struct tab_table *, int first_difference,
960 struct table_entry *);
961 static void display_crosstabulation (void);
962 static void display_chisq (void);
963 static void display_symmetric (void);
964 static void display_risk (void);
965 static void display_directional (void);
966 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
967 static void table_value_missing (struct tab_table *table, int c, int r,
968 unsigned char opt, const union value *v,
969 const struct variable *var);
970 static void delete_missing (void);
972 /* Output pivot table beginning at PB and continuing until PE,
973 exclusive. For efficiency, *MATP is a pointer to a matrix that can
974 hold *MAXROWS entries. */
976 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
977 double **matp, double **row_totp, double **col_totp,
978 int *maxrows, int *maxcols, int *maxcells)
981 struct table_entry **tb = pb, **te; /* Table begin, table end. */
982 int tc = pe - pb; /* Table count. */
984 /* Table entry for header comparison. */
985 struct table_entry *cmp = NULL;
987 x = xtab[(*pb)->table];
988 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
990 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
992 /* Crosstabulation table initialization. */
995 table = tab_create (nvar + n_cols,
996 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
997 tab_headers (table, nvar - 1, 0, 2, 0);
999 /* First header line. */
1000 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1001 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1003 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1005 /* Second header line. */
1009 for (i = 2; i < nvar; i++)
1010 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1011 TAB_RIGHT | TAT_TITLE,
1013 ? x->vars[i]->label : x->vars[i]->name));
1014 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1015 x->vars[ROW_VAR]->name);
1016 for (i = 0; i < n_cols; i++)
1017 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1019 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1022 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1023 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1027 char *title = local_alloc (x->nvar * 64 + 128);
1031 if (cmd.pivot == CRS_PIVOT)
1032 for (i = 0; i < nvar; i++)
1035 cp = stpcpy (cp, " by ");
1036 cp = stpcpy (cp, x->vars[i]->name);
1040 cp = spprintf (cp, "%s by %s for",
1041 x->vars[0]->name, x->vars[1]->name);
1042 for (i = 2; i < nvar; i++)
1044 char buf[64], *bufp;
1049 cp = stpcpy (cp, x->vars[i]->name);
1051 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1052 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1054 cp = stpcpy (cp, bufp);
1058 cp = stpcpy (cp, " [");
1059 for (i = 0; i < num_cells; i++)
1067 static const struct tuple cell_names[] =
1069 {CRS_CL_COUNT, N_("count")},
1070 {CRS_CL_ROW, N_("row %")},
1071 {CRS_CL_COLUMN, N_("column %")},
1072 {CRS_CL_TOTAL, N_("total %")},
1073 {CRS_CL_EXPECTED, N_("expected")},
1074 {CRS_CL_RESIDUAL, N_("residual")},
1075 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1076 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1080 const struct tuple *t;
1082 for (t = cell_names; t->value != cells[i]; t++)
1083 assert (t->value != -1);
1085 cp = stpcpy (cp, ", ");
1086 cp = stpcpy (cp, gettext (t->name));
1090 tab_title (table, 0, title);
1094 tab_offset (table, 0, 2);
1099 /* Chi-square table initialization. */
1100 if (cmd.a_statistics[CRS_ST_CHISQ])
1102 chisq = tab_create (6 + (nvar - 2),
1103 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1104 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1106 tab_title (chisq, 0, "Chi-square tests.");
1108 tab_offset (chisq, nvar - 2, 0);
1109 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1110 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1111 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1112 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1113 _("Asymp. Sig. (2-sided)"));
1114 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1115 _("Exact. Sig. (2-sided)"));
1116 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1117 _("Exact. Sig. (1-sided)"));
1119 tab_offset (chisq, 0, 1);
1124 /* Symmetric measures. */
1125 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1126 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1127 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1128 || cmd.a_statistics[CRS_ST_KAPPA])
1130 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1131 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1132 tab_title (sym, 0, "Symmetric measures.");
1134 tab_offset (sym, nvar - 2, 0);
1135 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1136 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1137 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1138 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1139 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1140 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1141 tab_offset (sym, 0, 1);
1146 /* Risk estimate. */
1147 if (cmd.a_statistics[CRS_ST_RISK])
1149 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1150 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1151 tab_title (risk, 0, "Risk estimate.");
1153 tab_offset (risk, nvar - 2, 0);
1154 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1155 _(" 95%% Confidence Interval"));
1156 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1157 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1158 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1159 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1160 tab_hline (risk, TAL_1, 2, 3, 1);
1161 tab_vline (risk, TAL_1, 2, 0, 1);
1162 tab_offset (risk, 0, 2);
1167 /* Directional measures. */
1168 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1169 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1171 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1172 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1173 tab_title (direct, 0, "Directional measures.");
1175 tab_offset (direct, nvar - 2, 0);
1176 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1177 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1178 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1179 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1180 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1181 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1182 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1183 tab_offset (direct, 0, 1);
1190 /* Find pivot subtable if applicable. */
1191 te = find_pivot_extent (tb, &tc, 0);
1195 /* Find all the row variable values. */
1196 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1198 /* Allocate memory space for the column and row totals. */
1199 if (n_rows > *maxrows)
1201 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1202 row_tot = *row_totp;
1205 if (n_cols > *maxcols)
1207 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1208 col_tot = *col_totp;
1212 /* Allocate table space for the matrix. */
1213 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1214 tab_realloc (table, -1,
1215 max (tab_nr (table) + (n_rows + 1) * num_cells,
1216 tab_nr (table) * (pe - pb) / (te - tb)));
1218 if (mode == GENERAL)
1220 /* Allocate memory space for the matrix. */
1221 if (n_cols * n_rows > *maxcells)
1223 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1224 *maxcells = n_cols * n_rows;
1229 /* Build the matrix and calculate column totals. */
1231 union value *cur_col = cols;
1232 union value *cur_row = rows;
1234 double *cp = col_tot;
1235 struct table_entry **p;
1238 for (p = &tb[0]; p < te; p++)
1240 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1244 for (; cur_row < &rows[n_rows]; cur_row++)
1250 mp = &mat[cur_col - cols];
1253 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1260 *cp += *mp = (*p)->u.freq;
1265 /* Zero out the rest of the matrix. */
1266 for (; cur_row < &rows[n_rows]; cur_row++)
1272 if (cur_col < &cols[n_cols])
1274 const int rem_cols = n_cols - (cur_col - cols);
1277 for (c = 0; c < rem_cols; c++)
1279 mp = &mat[cur_col - cols];
1280 for (r = 0; r < n_rows; r++)
1282 for (c = 0; c < rem_cols; c++)
1284 mp += n_cols - rem_cols;
1292 double *tp = col_tot;
1294 assert (mode == INTEGER);
1295 mat = (*tb)->u.data;
1298 /* Calculate column totals. */
1299 for (c = 0; c < n_cols; c++)
1302 double *cp = &mat[c];
1304 for (r = 0; r < n_rows; r++)
1305 cum += cp[r * n_cols];
1313 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1314 ns_cols += *cp != 0.;
1317 /* Calculate row totals. */
1320 double *rp = row_tot;
1323 for (ns_rows = 0, r = n_rows; r--; )
1326 for (c = n_cols; c--; )
1334 /* Calculate grand total. */
1340 if (n_rows < n_cols)
1341 tp = row_tot, n = n_rows;
1343 tp = col_tot, n = n_cols;
1349 /* Find the first variable that differs from the last subtable,
1350 then display the values of the dimensioning variables for
1351 each table that needs it. */
1353 int first_difference = nvar - 1;
1356 for (; ; first_difference--)
1358 assert (first_difference >= 2);
1359 if (memcmp (&cmp->values[first_difference],
1360 &(*tb)->values[first_difference],
1361 sizeof *cmp->values))
1367 display_dimensions (table, first_difference, *tb);
1369 display_dimensions (chisq, first_difference, *tb);
1371 display_dimensions (sym, first_difference, *tb);
1373 display_dimensions (risk, first_difference, *tb);
1375 display_dimensions (direct, first_difference, *tb);
1379 display_crosstabulation ();
1380 if (cmd.miss == CRS_REPORT)
1385 display_symmetric ();
1389 display_directional ();
1400 tab_resize (chisq, 4 + (nvar - 2), -1);
1411 /* Delete missing rows and columns for statistical analysis when
1414 delete_missing (void)
1419 for (r = 0; r < n_rows; r++)
1420 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1424 for (c = 0; c < n_cols; c++)
1425 mat[c + r * n_cols] = 0.;
1433 for (c = 0; c < n_cols; c++)
1434 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1438 for (r = 0; r < n_rows; r++)
1439 mat[c + r * n_cols] = 0.;
1445 /* Prepare table T for submission, and submit it. */
1447 submit (struct tab_table *t)
1454 tab_resize (t, -1, 0);
1455 if (tab_nr (t) == tab_t (t))
1460 tab_offset (t, 0, 0);
1462 for (i = 2; i < nvar; i++)
1463 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1464 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1465 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1466 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1468 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1470 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1471 tab_dim (t, crosstabs_dim);
1475 /* Sets the widths of all the columns and heights of all the rows in
1476 table T for driver D. */
1478 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1482 /* Width of a numerical column. */
1483 int c = outp_string_width (d, "0.000000");
1484 if (cmd.miss == CRS_REPORT)
1485 c += outp_string_width (d, "M");
1487 /* Set width for header columns. */
1490 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1492 if (w < d->prop_em_width * 8)
1493 w = d->prop_em_width * 8;
1495 if (w > d->prop_em_width * 15)
1496 w = d->prop_em_width * 15;
1498 for (i = 0; i < t->l; i++)
1502 for (i = t->l; i < t->nc; i++)
1505 for (i = 0; i < t->nr; i++)
1506 t->h[i] = tab_natural_height (t, d, i);
1509 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1510 int *cnt, int pivot);
1511 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1512 int *cnt, int pivot);
1514 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1516 static struct table_entry **
1517 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1519 return (mode == GENERAL
1520 ? find_pivot_extent_general (tp, cnt, pivot)
1521 : find_pivot_extent_integer (tp, cnt, pivot));
1524 /* Find the extent of a region in TP that contains one table. If
1525 PIVOT != 0 that means a set of table entries with identical table
1526 number; otherwise they also have to have the same values for every
1527 dimension after the row and column dimensions. The table that is
1528 searched starts at TP and has length CNT. Returns the first entry
1529 after the last one in the table; sets *CNT to the number of
1530 remaining values. If there are no entries in TP at all, returns
1531 NULL. A yucky interface, admittedly, but it works. */
1532 static struct table_entry **
1533 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1535 struct table_entry *fp = *tp;
1540 x = xtab[(*tp)->table];
1548 if ((*tp)->table != fp->table)
1553 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1560 /* Integer mode correspondent to find_pivot_extent_general(). This
1561 could be optimized somewhat, but I just don't give a crap about
1562 CROSSTABS performance in integer mode, which is just a
1563 CROSSTABS wart as far as I'm concerned.
1565 That said, feel free to send optimization patches to me. */
1566 static struct table_entry **
1567 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1569 struct table_entry *fp = *tp;
1574 x = xtab[(*tp)->table];
1582 if ((*tp)->table != fp->table)
1587 if (memcmp (&(*tp)->values[2], &fp->values[2],
1588 sizeof (union value) * (x->nvar - 2)))
1595 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1596 result. WIDTH_ points to an int which is either 0 for a
1597 numeric value or a string width for a string value. */
1599 compare_value (const void *a_, const void *b_, void *width_)
1601 const union value *a = a_;
1602 const union value *b = b_;
1603 const int *pwidth = width_;
1604 const int width = *pwidth;
1607 return (a->f < b->f) ? -1 : (a->f > b->f);
1609 return strncmp (a->s, b->s, width);
1612 /* Given an array of ENTRY_CNT table_entry structures starting at
1613 ENTRIES, creates a sorted list of the values that the variable
1614 with index VAR_IDX takes on. The values are returned as a
1615 malloc()'darray stored in *VALUES, with the number of values
1616 stored in *VALUE_CNT.
1619 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1620 union value **values, int *value_cnt)
1622 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1624 if (mode == GENERAL)
1626 int width = v->width;
1629 *values = xnmalloc (entry_cnt, sizeof **values);
1630 for (i = 0; i < entry_cnt; i++)
1631 (*values)[i] = entries[i]->values[var_idx];
1632 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1633 compare_value, &width);
1637 struct var_range *vr = get_var_range (v);
1640 assert (mode == INTEGER);
1641 *values = xnmalloc (vr->count, sizeof **values);
1642 for (i = 0; i < vr->count; i++)
1643 (*values)[i].f = i + vr->min;
1644 *value_cnt = vr->count;
1648 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1649 from V, displayed with print format spec from variable VAR. When
1650 in REPORT missing-value mode, missing values have an M appended. */
1652 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1653 const union value *v, const struct variable *var)
1655 struct fixed_string s;
1657 const char *label = val_labs_find (var->val_labs, *v);
1660 tab_text (table, c, r, TAB_LEFT, label);
1664 s.string = tab_alloc (table, var->print.w);
1665 format_short (s.string, &var->print, v);
1666 s.length = strlen (s.string);
1667 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1668 s.string[s.length++] = 'M';
1669 while (s.length && *s.string == ' ')
1674 tab_raw (table, c, r, opt, &s);
1677 /* Draws a line across TABLE at the current row to indicate the most
1678 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1679 that changed, and puts the values that changed into the table. TB
1680 and X must be the corresponding table_entry and crosstab,
1683 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1685 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1687 for (; first_difference >= 2; first_difference--)
1688 table_value_missing (table, nvar - first_difference - 1, 0,
1689 TAB_RIGHT, &tb->values[first_difference],
1690 x->vars[first_difference]);
1693 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1694 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1695 additionally suffixed with a letter `M'. */
1697 format_cell_entry (struct tab_table *table, int c, int r, double value,
1698 char suffix, int mark_missing)
1700 const struct fmt_spec f = {FMT_F, 10, 1};
1702 struct fixed_string s;
1705 s.string = tab_alloc (table, 16);
1707 data_out (s.string, &f, &v);
1708 while (*s.string == ' ')
1714 s.string[s.length++] = suffix;
1716 s.string[s.length++] = 'M';
1718 tab_raw (table, c, r, TAB_RIGHT, &s);
1721 /* Displays the crosstabulation table. */
1723 display_crosstabulation (void)
1728 for (r = 0; r < n_rows; r++)
1729 table_value_missing (table, nvar - 2, r * num_cells,
1730 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1732 tab_text (table, nvar - 2, n_rows * num_cells,
1733 TAB_LEFT, _("Total"));
1735 /* Put in the actual cells. */
1740 tab_offset (table, nvar - 1, -1);
1741 for (r = 0; r < n_rows; r++)
1744 tab_hline (table, TAL_1, -1, n_cols, 0);
1745 for (c = 0; c < n_cols; c++)
1747 int mark_missing = 0;
1748 double expected_value = row_tot[r] * col_tot[c] / W;
1749 if (cmd.miss == CRS_REPORT
1750 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1751 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1754 for (i = 0; i < num_cells; i++)
1765 v = *mp / row_tot[r] * 100.;
1769 v = *mp / col_tot[c] * 100.;
1776 case CRS_CL_EXPECTED:
1779 case CRS_CL_RESIDUAL:
1780 v = *mp - expected_value;
1782 case CRS_CL_SRESIDUAL:
1783 v = (*mp - expected_value) / sqrt (expected_value);
1785 case CRS_CL_ASRESIDUAL:
1786 v = ((*mp - expected_value)
1787 / sqrt (expected_value
1788 * (1. - row_tot[r] / W)
1789 * (1. - col_tot[c] / W)));
1796 format_cell_entry (table, c, i, v, suffix, mark_missing);
1802 tab_offset (table, -1, tab_row (table) + num_cells);
1810 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1811 for (r = 0; r < n_rows; r++)
1814 int mark_missing = 0;
1816 if (cmd.miss == CRS_REPORT
1817 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1820 for (i = 0; i < num_cells; i++)
1834 v = row_tot[r] / W * 100.;
1838 v = row_tot[r] / W * 100.;
1841 case CRS_CL_EXPECTED:
1842 case CRS_CL_RESIDUAL:
1843 case CRS_CL_SRESIDUAL:
1844 case CRS_CL_ASRESIDUAL:
1852 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1853 tab_next_row (table);
1858 /* Column totals, grand total. */
1864 tab_hline (table, TAL_1, -1, n_cols, 0);
1865 for (c = 0; c <= n_cols; c++)
1867 double ct = c < n_cols ? col_tot[c] : W;
1868 int mark_missing = 0;
1872 if (cmd.miss == CRS_REPORT && c < n_cols
1873 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1876 for (i = 0; i < num_cells; i++)
1898 case CRS_CL_EXPECTED:
1899 case CRS_CL_RESIDUAL:
1900 case CRS_CL_SRESIDUAL:
1901 case CRS_CL_ASRESIDUAL:
1908 format_cell_entry (table, c, i, v, suffix, mark_missing);
1913 tab_offset (table, -1, tab_row (table) + last_row);
1916 tab_offset (table, 0, -1);
1919 static void calc_r (double *X, double *Y, double *, double *, double *);
1920 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1922 /* Display chi-square statistics. */
1924 display_chisq (void)
1926 static const char *chisq_stats[N_CHISQ] =
1928 N_("Pearson Chi-Square"),
1929 N_("Likelihood Ratio"),
1930 N_("Fisher's Exact Test"),
1931 N_("Continuity Correction"),
1932 N_("Linear-by-Linear Association"),
1934 double chisq_v[N_CHISQ];
1935 double fisher1, fisher2;
1941 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1943 tab_offset (chisq, nvar - 2, -1);
1945 for (i = 0; i < N_CHISQ; i++)
1947 if ((i != 2 && chisq_v[i] == SYSMIS)
1948 || (i == 2 && fisher1 == SYSMIS))
1952 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1955 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1956 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1957 tab_float (chisq, 3, 0, TAB_RIGHT,
1958 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1963 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1964 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1966 tab_next_row (chisq);
1969 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1970 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1971 tab_next_row (chisq);
1973 tab_offset (chisq, 0, -1);
1976 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1977 double[N_SYMMETRIC]);
1979 /* Display symmetric measures. */
1981 display_symmetric (void)
1983 static const char *categories[] =
1985 N_("Nominal by Nominal"),
1986 N_("Ordinal by Ordinal"),
1987 N_("Interval by Interval"),
1988 N_("Measure of Agreement"),
1991 static const char *stats[N_SYMMETRIC] =
1995 N_("Contingency Coefficient"),
1996 N_("Kendall's tau-b"),
1997 N_("Kendall's tau-c"),
1999 N_("Spearman Correlation"),
2004 static const int stats_categories[N_SYMMETRIC] =
2006 0, 0, 0, 1, 1, 1, 1, 2, 3,
2010 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2013 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2016 tab_offset (sym, nvar - 2, -1);
2018 for (i = 0; i < N_SYMMETRIC; i++)
2020 if (sym_v[i] == SYSMIS)
2023 if (stats_categories[i] != last_cat)
2025 last_cat = stats_categories[i];
2026 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2029 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2030 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2031 if (sym_ase[i] != SYSMIS)
2032 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2033 if (sym_t[i] != SYSMIS)
2034 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2035 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2039 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2040 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2043 tab_offset (sym, 0, -1);
2046 static int calc_risk (double[], double[], double[], union value *);
2048 /* Display risk estimate. */
2053 double risk_v[3], lower[3], upper[3];
2057 if (!calc_risk (risk_v, upper, lower, c))
2060 tab_offset (risk, nvar - 2, -1);
2062 for (i = 0; i < 3; i++)
2064 if (risk_v[i] == SYSMIS)
2070 if (x->vars[COL_VAR]->type == NUMERIC)
2071 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2072 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2074 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2075 x->vars[COL_VAR]->name,
2076 x->vars[COL_VAR]->width, c[0].s,
2077 x->vars[COL_VAR]->width, c[1].s);
2081 if (x->vars[ROW_VAR]->type == NUMERIC)
2082 sprintf (buf, _("For cohort %s = %g"),
2083 x->vars[ROW_VAR]->name, rows[i - 1].f);
2085 sprintf (buf, _("For cohort %s = %.*s"),
2086 x->vars[ROW_VAR]->name,
2087 x->vars[ROW_VAR]->width, rows[i - 1].s);
2091 tab_text (risk, 0, 0, TAB_LEFT, buf);
2092 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2093 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2094 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2095 tab_next_row (risk);
2098 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2099 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2100 tab_next_row (risk);
2102 tab_offset (risk, 0, -1);
2105 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2106 double[N_DIRECTIONAL]);
2108 /* Display directional measures. */
2110 display_directional (void)
2112 static const char *categories[] =
2114 N_("Nominal by Nominal"),
2115 N_("Ordinal by Ordinal"),
2116 N_("Nominal by Interval"),
2119 static const char *stats[] =
2122 N_("Goodman and Kruskal tau"),
2123 N_("Uncertainty Coefficient"),
2128 static const char *types[] =
2135 static const int stats_categories[N_DIRECTIONAL] =
2137 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2140 static const int stats_stats[N_DIRECTIONAL] =
2142 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2145 static const int stats_types[N_DIRECTIONAL] =
2147 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2150 static const int *stats_lookup[] =
2157 static const char **stats_names[] =
2169 double direct_v[N_DIRECTIONAL];
2170 double direct_ase[N_DIRECTIONAL];
2171 double direct_t[N_DIRECTIONAL];
2175 if (!calc_directional (direct_v, direct_ase, direct_t))
2178 tab_offset (direct, nvar - 2, -1);
2180 for (i = 0; i < N_DIRECTIONAL; i++)
2182 if (direct_v[i] == SYSMIS)
2188 for (j = 0; j < 3; j++)
2189 if (last[j] != stats_lookup[j][i])
2192 tab_hline (direct, TAL_1, j, 6, 0);
2197 int k = last[j] = stats_lookup[j][i];
2202 string = x->vars[0]->name;
2204 string = x->vars[1]->name;
2206 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2207 gettext (stats_names[j][k]), string);
2212 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2213 if (direct_ase[i] != SYSMIS)
2214 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2215 if (direct_t[i] != SYSMIS)
2216 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2217 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2218 tab_next_row (direct);
2221 tab_offset (direct, 0, -1);
2224 /* Statistical calculations. */
2226 /* Returns the value of the gamma (factorial) function for an integer
2229 gamma_int (double x)
2234 for (i = 2; i < x; i++)
2239 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2241 static inline double
2242 Pr (int a, int b, int c, int d)
2244 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2245 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2246 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2247 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2248 / gamma_int (a + b + c + d + 1.));
2251 /* Swap the contents of A and B. */
2253 swap (int *a, int *b)
2260 /* Calculate significance for Fisher's exact test as specified in
2261 _SPSS Statistical Algorithms_, Appendix 5. */
2263 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2267 if (min (c, d) < min (a, b))
2268 swap (&a, &c), swap (&b, &d);
2269 if (min (b, d) < min (a, c))
2270 swap (&a, &b), swap (&c, &d);
2274 swap (&a, &b), swap (&c, &d);
2276 swap (&a, &c), swap (&b, &d);
2280 for (x = 0; x <= a; x++)
2281 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2283 *fisher2 = *fisher1;
2284 for (x = 1; x <= b; x++)
2285 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2288 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2289 columns with values COLS and N_ROWS rows with values ROWS. Values
2290 in the matrix sum to W. */
2292 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2293 double *fisher1, double *fisher2)
2297 chisq[0] = chisq[1] = 0.;
2298 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2299 *fisher1 = *fisher2 = SYSMIS;
2301 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2303 if (ns_rows <= 1 || ns_cols <= 1)
2305 chisq[0] = chisq[1] = SYSMIS;
2309 for (r = 0; r < n_rows; r++)
2310 for (c = 0; c < n_cols; c++)
2312 const double expected = row_tot[r] * col_tot[c] / W;
2313 const double freq = mat[n_cols * r + c];
2314 const double residual = freq - expected;
2316 chisq[0] += residual * residual / expected;
2318 chisq[1] += freq * log (expected / freq);
2329 /* Calculate Yates and Fisher exact test. */
2330 if (ns_cols == 2 && ns_rows == 2)
2332 double f11, f12, f21, f22;
2338 for (i = j = 0; i < n_cols; i++)
2339 if (col_tot[i] != 0.)
2348 f11 = mat[nz_cols[0]];
2349 f12 = mat[nz_cols[1]];
2350 f21 = mat[nz_cols[0] + n_cols];
2351 f22 = mat[nz_cols[1] + n_cols];
2356 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2359 chisq[3] = (W * x * x
2360 / (f11 + f12) / (f21 + f22)
2361 / (f11 + f21) / (f12 + f22));
2369 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2370 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2373 /* Calculate Mantel-Haenszel. */
2374 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2376 double r, ase_0, ase_1;
2377 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2379 chisq[4] = (W - 1.) * r * r;
2384 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2385 ASE_1, and ase_0 into ASE_0. The row and column values must be
2386 passed in X and Y. */
2388 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2390 double SX, SY, S, T;
2392 double sum_XYf, sum_X2Y2f;
2393 double sum_Xr, sum_X2r;
2394 double sum_Yc, sum_Y2c;
2397 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2398 for (j = 0; j < n_cols; j++)
2400 double fij = mat[j + i * n_cols];
2401 double product = X[i] * Y[j];
2402 double temp = fij * product;
2404 sum_X2Y2f += temp * product;
2407 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2409 sum_Xr += X[i] * row_tot[i];
2410 sum_X2r += X[i] * X[i] * row_tot[i];
2414 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2416 sum_Yc += Y[i] * col_tot[i];
2417 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2421 S = sum_XYf - sum_Xr * sum_Yc / W;
2422 SX = sum_X2r - sum_Xr * sum_Xr / W;
2423 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2426 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2431 for (s = c = 0., i = 0; i < n_rows; i++)
2432 for (j = 0; j < n_cols; j++)
2434 double Xresid, Yresid;
2437 Xresid = X[i] - Xbar;
2438 Yresid = Y[j] - Ybar;
2439 temp = (T * Xresid * Yresid
2441 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2442 y = mat[j + i * n_cols] * temp * temp - c;
2447 *ase_1 = sqrt (s) / (T * T);
2451 static double somers_d_v[3];
2452 static double somers_d_ase[3];
2453 static double somers_d_t[3];
2455 /* Calculate symmetric statistics and their asymptotic standard
2456 errors. Returns 0 if none could be calculated. */
2458 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2459 double t[N_SYMMETRIC])
2461 int q = min (ns_rows, ns_cols);
2470 for (i = 0; i < N_SYMMETRIC; i++)
2471 v[i] = ase[i] = t[i] = SYSMIS;
2474 /* Phi, Cramer's V, contingency coefficient. */
2475 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2477 double Xp = 0.; /* Pearson chi-square. */
2482 for (r = 0; r < n_rows; r++)
2483 for (c = 0; c < n_cols; c++)
2485 const double expected = row_tot[r] * col_tot[c] / W;
2486 const double freq = mat[n_cols * r + c];
2487 const double residual = freq - expected;
2489 Xp += residual * residual / expected;
2493 if (cmd.a_statistics[CRS_ST_PHI])
2495 v[0] = sqrt (Xp / W);
2496 v[1] = sqrt (Xp / (W * (q - 1)));
2498 if (cmd.a_statistics[CRS_ST_CC])
2499 v[2] = sqrt (Xp / (Xp + W));
2502 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2503 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2508 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2515 for (r = 0; r < n_rows; r++)
2516 Dr -= row_tot[r] * row_tot[r];
2517 for (c = 0; c < n_cols; c++)
2518 Dc -= col_tot[c] * col_tot[c];
2524 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2525 for (c = 0; c < n_cols; c++)
2529 for (r = 0; r < n_rows; r++)
2530 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2540 for (i = 0; i < n_rows; i++)
2544 for (j = 1; j < n_cols; j++)
2545 Cij += col_tot[j] - cum[j + i * n_cols];
2548 for (j = 1; j < n_cols; j++)
2549 Dij += cum[j + (i - 1) * n_cols];
2553 double fij = mat[j + i * n_cols];
2559 assert (j < n_cols);
2561 Cij -= col_tot[j] - cum[j + i * n_cols];
2562 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2566 Cij += cum[j - 1 + (i - 1) * n_cols];
2567 Dij -= cum[j + (i - 1) * n_cols];
2573 if (cmd.a_statistics[CRS_ST_BTAU])
2574 v[3] = (P - Q) / sqrt (Dr * Dc);
2575 if (cmd.a_statistics[CRS_ST_CTAU])
2576 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2577 if (cmd.a_statistics[CRS_ST_GAMMA])
2578 v[5] = (P - Q) / (P + Q);
2580 /* ASE for tau-b, tau-c, gamma. Calculations could be
2581 eliminated here, at expense of memory. */
2586 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2587 for (i = 0; i < n_rows; i++)
2591 for (j = 1; j < n_cols; j++)
2592 Cij += col_tot[j] - cum[j + i * n_cols];
2595 for (j = 1; j < n_cols; j++)
2596 Dij += cum[j + (i - 1) * n_cols];
2600 double fij = mat[j + i * n_cols];
2602 if (cmd.a_statistics[CRS_ST_BTAU])
2604 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2605 + v[3] * (row_tot[i] * Dc
2606 + col_tot[j] * Dr));
2607 btau_cum += fij * temp * temp;
2611 const double temp = Cij - Dij;
2612 ctau_cum += fij * temp * temp;
2615 if (cmd.a_statistics[CRS_ST_GAMMA])
2617 const double temp = Q * Cij - P * Dij;
2618 gamma_cum += fij * temp * temp;
2621 if (cmd.a_statistics[CRS_ST_D])
2623 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2624 - (P - Q) * (W - row_tot[i]));
2625 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2626 - (Q - P) * (W - col_tot[j]));
2631 assert (j < n_cols);
2633 Cij -= col_tot[j] - cum[j + i * n_cols];
2634 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2638 Cij += cum[j - 1 + (i - 1) * n_cols];
2639 Dij -= cum[j + (i - 1) * n_cols];
2645 btau_var = ((btau_cum
2646 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2648 if (cmd.a_statistics[CRS_ST_BTAU])
2650 ase[3] = sqrt (btau_var);
2651 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2654 if (cmd.a_statistics[CRS_ST_CTAU])
2656 ase[4] = ((2 * q / ((q - 1) * W * W))
2657 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2658 t[4] = v[4] / ase[4];
2660 if (cmd.a_statistics[CRS_ST_GAMMA])
2662 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2663 t[5] = v[5] / (2. / (P + Q)
2664 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2666 if (cmd.a_statistics[CRS_ST_D])
2668 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2669 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2670 somers_d_t[0] = (somers_d_v[0]
2672 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2673 somers_d_v[1] = (P - Q) / Dc;
2674 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2675 somers_d_t[1] = (somers_d_v[1]
2677 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2678 somers_d_v[2] = (P - Q) / Dr;
2679 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2680 somers_d_t[2] = (somers_d_v[2]
2682 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2688 /* Spearman correlation, Pearson's r. */
2689 if (cmd.a_statistics[CRS_ST_CORR])
2691 double *R = local_alloc (sizeof *R * n_rows);
2692 double *C = local_alloc (sizeof *C * n_cols);
2695 double y, t, c = 0., s = 0.;
2700 R[i] = s + (row_tot[i] + 1.) / 2.;
2707 assert (i < n_rows);
2712 double y, t, c = 0., s = 0.;
2717 C[j] = s + (col_tot[j] + 1.) / 2;
2724 assert (j < n_cols);
2728 calc_r (R, C, &v[6], &t[6], &ase[6]);
2734 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2738 /* Cohen's kappa. */
2739 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2741 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2744 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2745 i < ns_rows; i++, j++)
2749 while (col_tot[j] == 0.)
2752 prod = row_tot[i] * col_tot[j];
2753 sum = row_tot[i] + col_tot[j];
2755 sum_fii += mat[j + i * n_cols];
2757 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2758 sum_riciri_ci += prod * sum;
2760 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2761 for (j = 0; j < ns_cols; j++)
2763 double sum = row_tot[i] + col_tot[j];
2764 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2767 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2769 ase[8] = sqrt ((W * W * sum_rici
2770 + sum_rici * sum_rici
2771 - W * sum_riciri_ci)
2772 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2774 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2775 / pow2 (W * W - sum_rici))
2776 + ((2. * (W - sum_fii)
2777 * (2. * sum_fii * sum_rici
2778 - W * sum_fiiri_ci))
2779 / cube (W * W - sum_rici))
2780 + (pow2 (W - sum_fii)
2781 * (W * sum_fijri_ci2 - 4.
2782 * sum_rici * sum_rici)
2783 / pow4 (W * W - sum_rici))));
2785 t[8] = v[8] / ase[8];
2792 /* Calculate risk estimate. */
2794 calc_risk (double *value, double *upper, double *lower, union value *c)
2796 double f11, f12, f21, f22;
2802 for (i = 0; i < 3; i++)
2803 value[i] = upper[i] = lower[i] = SYSMIS;
2806 if (ns_rows != 2 || ns_cols != 2)
2813 for (i = j = 0; i < n_cols; i++)
2814 if (col_tot[i] != 0.)
2823 f11 = mat[nz_cols[0]];
2824 f12 = mat[nz_cols[1]];
2825 f21 = mat[nz_cols[0] + n_cols];
2826 f22 = mat[nz_cols[1] + n_cols];
2828 c[0] = cols[nz_cols[0]];
2829 c[1] = cols[nz_cols[1]];
2832 value[0] = (f11 * f22) / (f12 * f21);
2833 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2834 lower[0] = value[0] * exp (-1.960 * v);
2835 upper[0] = value[0] * exp (1.960 * v);
2837 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2838 v = sqrt ((f12 / (f11 * (f11 + f12)))
2839 + (f22 / (f21 * (f21 + f22))));
2840 lower[1] = value[1] * exp (-1.960 * v);
2841 upper[1] = value[1] * exp (1.960 * v);
2843 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2844 v = sqrt ((f11 / (f12 * (f11 + f12)))
2845 + (f21 / (f22 * (f21 + f22))));
2846 lower[2] = value[2] * exp (-1.960 * v);
2847 upper[2] = value[2] * exp (1.960 * v);
2852 /* Calculate directional measures. */
2854 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2855 double t[N_DIRECTIONAL])
2860 for (i = 0; i < N_DIRECTIONAL; i++)
2861 v[i] = ase[i] = t[i] = SYSMIS;
2865 if (cmd.a_statistics[CRS_ST_LAMBDA])
2867 double *fim = xnmalloc (n_rows, sizeof *fim);
2868 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2869 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2870 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2871 double sum_fim, sum_fmj;
2873 int rm_index, cm_index;
2876 /* Find maximum for each row and their sum. */
2877 for (sum_fim = 0., i = 0; i < n_rows; i++)
2879 double max = mat[i * n_cols];
2882 for (j = 1; j < n_cols; j++)
2883 if (mat[j + i * n_cols] > max)
2885 max = mat[j + i * n_cols];
2889 sum_fim += fim[i] = max;
2890 fim_index[i] = index;
2893 /* Find maximum for each column. */
2894 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2896 double max = mat[j];
2899 for (i = 1; i < n_rows; i++)
2900 if (mat[j + i * n_cols] > max)
2902 max = mat[j + i * n_cols];
2906 sum_fmj += fmj[j] = max;
2907 fmj_index[j] = index;
2910 /* Find maximum row total. */
2913 for (i = 1; i < n_rows; i++)
2914 if (row_tot[i] > rm)
2920 /* Find maximum column total. */
2923 for (j = 1; j < n_cols; j++)
2924 if (col_tot[j] > cm)
2930 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2931 v[1] = (sum_fmj - rm) / (W - rm);
2932 v[2] = (sum_fim - cm) / (W - cm);
2934 /* ASE1 for Y given X. */
2938 for (accum = 0., i = 0; i < n_rows; i++)
2939 for (j = 0; j < n_cols; j++)
2941 const int deltaj = j == cm_index;
2942 accum += (mat[j + i * n_cols]
2943 * pow2 ((j == fim_index[i])
2948 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2951 /* ASE0 for Y given X. */
2955 for (accum = 0., i = 0; i < n_rows; i++)
2956 if (cm_index != fim_index[i])
2957 accum += (mat[i * n_cols + fim_index[i]]
2958 + mat[i * n_cols + cm_index]);
2959 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2962 /* ASE1 for X given Y. */
2966 for (accum = 0., i = 0; i < n_rows; i++)
2967 for (j = 0; j < n_cols; j++)
2969 const int deltaj = i == rm_index;
2970 accum += (mat[j + i * n_cols]
2971 * pow2 ((i == fmj_index[j])
2976 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2979 /* ASE0 for X given Y. */
2983 for (accum = 0., j = 0; j < n_cols; j++)
2984 if (rm_index != fmj_index[j])
2985 accum += (mat[j + n_cols * fmj_index[j]]
2986 + mat[j + n_cols * rm_index]);
2987 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2990 /* Symmetric ASE0 and ASE1. */
2995 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
2996 for (j = 0; j < n_cols; j++)
2998 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
2999 int temp1 = (i == rm_index) + (j == cm_index);
3000 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3001 accum1 += (mat[j + i * n_cols]
3002 * pow2 (temp0 + (v[0] - 1.) * temp1));
3004 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3005 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3006 / (2. * W - rm - cm));
3015 double sum_fij2_ri, sum_fij2_ci;
3016 double sum_ri2, sum_cj2;
3018 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3019 for (j = 0; j < n_cols; j++)
3021 double temp = pow2 (mat[j + i * n_cols]);
3022 sum_fij2_ri += temp / row_tot[i];
3023 sum_fij2_ci += temp / col_tot[j];
3026 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3027 sum_ri2 += row_tot[i] * row_tot[i];
3029 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3030 sum_cj2 += col_tot[j] * col_tot[j];
3032 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3033 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3037 if (cmd.a_statistics[CRS_ST_UC])
3039 double UX, UY, UXY, P;
3040 double ase1_yx, ase1_xy, ase1_sym;
3043 for (UX = 0., i = 0; i < n_rows; i++)
3044 if (row_tot[i] > 0.)
3045 UX -= row_tot[i] / W * log (row_tot[i] / W);
3047 for (UY = 0., j = 0; j < n_cols; j++)
3048 if (col_tot[j] > 0.)
3049 UY -= col_tot[j] / W * log (col_tot[j] / W);
3051 for (UXY = P = 0., i = 0; i < n_rows; i++)
3052 for (j = 0; j < n_cols; j++)
3054 double entry = mat[j + i * n_cols];
3059 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3060 UXY -= entry / W * log (entry / W);
3063 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3064 for (j = 0; j < n_cols; j++)
3066 double entry = mat[j + i * n_cols];
3071 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3072 + (UX - UXY) * log (col_tot[j] / W));
3073 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3074 + (UY - UXY) * log (row_tot[i] / W));
3075 ase1_sym += entry * pow2 ((UXY
3076 * log (row_tot[i] * col_tot[j] / (W * W)))
3077 - (UX + UY) * log (entry / W));
3080 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3081 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3082 t[5] = v[5] / ((2. / (W * (UX + UY)))
3083 * sqrt (P - pow2 (UX + UY - UXY) / W));
3085 v[6] = (UX + UY - UXY) / UX;
3086 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3087 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3089 v[7] = (UX + UY - UXY) / UY;
3090 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3091 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3095 if (cmd.a_statistics[CRS_ST_D])
3100 calc_symmetric (NULL, NULL, NULL);
3101 for (i = 0; i < 3; i++)
3103 v[8 + i] = somers_d_v[i];
3104 ase[8 + i] = somers_d_ase[i];
3105 t[8 + i] = somers_d_t[i];
3110 if (cmd.a_statistics[CRS_ST_ETA])
3113 double sum_Xr, sum_X2r;
3117 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3119 sum_Xr += rows[i].f * row_tot[i];
3120 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3122 SX = sum_X2r - sum_Xr * sum_Xr / W;
3124 for (SXW = 0., j = 0; j < n_cols; j++)
3128 for (cum = 0., i = 0; i < n_rows; i++)
3130 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3131 cum += rows[i].f * mat[j + i * n_cols];
3134 SXW -= cum * cum / col_tot[j];
3136 v[11] = sqrt (1. - SXW / SX);
3140 double sum_Yc, sum_Y2c;
3144 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3146 sum_Yc += cols[i].f * col_tot[i];
3147 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3149 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3151 for (SYW = 0., i = 0; i < n_rows; i++)
3155 for (cum = 0., j = 0; j < n_cols; j++)
3157 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3158 cum += cols[j].f * mat[j + i * n_cols];
3161 SYW -= cum * cum / row_tot[i];
3163 v[12] = sqrt (1. - SYW / SY);
3170 /* A wrapper around data_out() that limits string output to short
3171 string width and null terminates the result. */
3173 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3175 struct fmt_spec fmt_subst;
3177 /* Limit to short string width. */
3178 if (formats[fp->type].cat & FCAT_STRING)
3182 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3183 if (fmt_subst.type == FMT_A)
3184 fmt_subst.w = min (8, fmt_subst.w);
3186 fmt_subst.w = min (16, fmt_subst.w);
3192 data_out (s, fp, v);
3194 /* Null terminate. */