1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value values[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *vars[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
140 static int write; /* One of WR_* that specifies the WRITE style. */
142 /* Command parsing info. */
143 static struct cmd_crosstabs cmd;
146 static struct pool *pl_tc; /* For table cells. */
147 static struct pool *pl_col; /* For column data. */
149 static int internal_cmd_crosstabs (void);
150 static void precalc (void *);
151 static int calc_general (struct ccase *, void *);
152 static int calc_integer (struct ccase *, void *);
153 static void postcalc (void *);
154 static void submit (struct tab_table *);
156 static void format_short (char *s, const struct fmt_spec *fp,
157 const union value *v);
160 static void debug_print (void);
161 static void print_table_entries (struct table_entry **tab);
164 /* Parse and execute CROSSTABS, then clean up. */
168 int result = internal_cmd_crosstabs ();
171 pool_destroy (pl_tc);
172 pool_destroy (pl_col);
177 /* Parses and executes the CROSSTABS procedure. */
179 internal_cmd_crosstabs (void)
185 pl_tc = pool_create ();
186 pl_col = pool_create ();
188 if (!parse_crosstabs (&cmd))
192 /* Needs variables. */
196 mode = variables ? INTEGER : GENERAL;
201 cmd.a_cells[CRS_CL_COUNT] = 1;
209 for (i = 0; i < CRS_CL_count; i++)
214 cmd.a_cells[CRS_CL_COUNT] = 1;
215 cmd.a_cells[CRS_CL_ROW] = 1;
216 cmd.a_cells[CRS_CL_COLUMN] = 1;
217 cmd.a_cells[CRS_CL_TOTAL] = 1;
219 if (cmd.a_cells[CRS_CL_ALL])
221 for (i = 0; i < CRS_CL_count; i++)
223 cmd.a_cells[CRS_CL_ALL] = 0;
225 cmd.a_cells[CRS_CL_NONE] = 0;
226 for (num_cells = i = 0; i < CRS_CL_count; i++)
228 cmd.a_cells[num_cells++] = i;
232 if (cmd.sbc_statistics)
237 for (i = 0; i < CRS_ST_count; i++)
238 if (cmd.a_statistics[i])
241 cmd.a_statistics[CRS_ST_CHISQ] = 1;
242 if (cmd.a_statistics[CRS_ST_ALL])
243 for (i = 0; i < CRS_ST_count; i++)
244 cmd.a_statistics[i] = 1;
248 if (cmd.miss == CRS_REPORT && mode == GENERAL)
250 msg (SE, _("Missing mode REPORT not allowed in general mode. "
251 "Assuming MISSING=TABLE."));
252 cmd.miss = CRS_TABLE;
256 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
257 cmd.a_write[CRS_WR_ALL] = 0;
258 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
260 msg (SE, _("Write mode ALL not allowed in general mode. "
261 "Assuming WRITE=CELLS."));
262 cmd.a_write[CRS_WR_CELLS] = 1;
265 && (cmd.a_write[CRS_WR_NONE]
266 + cmd.a_write[CRS_WR_ALL]
267 + cmd.a_write[CRS_WR_CELLS] == 0))
268 cmd.a_write[CRS_WR_CELLS] = 1;
269 if (cmd.a_write[CRS_WR_CELLS])
270 write = CRS_WR_CELLS;
271 else if (cmd.a_write[CRS_WR_ALL])
276 procedure_with_splits (precalc,
277 mode == GENERAL ? calc_general : calc_integer,
283 /* Parses the TABLES subcommand. */
285 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
287 struct var_set *var_set;
289 struct variable ***by = NULL;
294 /* Ensure that this is a TABLES subcommand. */
295 if (!lex_match_id ("TABLES")
296 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
301 if (variables != NULL)
302 var_set = var_set_create_from_array (variables, variables_cnt);
304 var_set = var_set_create_from_dict (default_dict);
305 assert (var_set != NULL);
309 by = xrealloc (by, sizeof *by * (n_by + 1));
310 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
311 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
312 PV_NO_DUPLICATE | PV_NO_SCRATCH))
317 if (!lex_match (T_BY))
321 lex_error (_("expecting BY"));
330 int *by_iter = xcalloc (sizeof *by_iter * n_by);
333 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
334 for (i = 0; i < nx; i++)
338 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
345 for (i = 0; i < n_by; i++)
346 x->vars[i] = by[i][by_iter[i]];
352 for (i = n_by - 1; i >= 0; i--)
354 if (++by_iter[i] < by_nvar[i])
367 /* All return paths lead here. */
371 for (i = 0; i < n_by; i++)
377 var_set_destroy (var_set);
382 /* Parses the VARIABLES subcommand. */
384 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
388 msg (SE, _("VARIABLES must be specified before TABLES."));
396 int orig_nv = variables_cnt;
401 if (!parse_variables (default_dict, &variables, &variables_cnt,
402 (PV_APPEND | PV_NUMERIC
403 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
408 lex_error ("expecting `('");
413 if (!lex_force_int ())
415 min = lex_integer ();
420 if (!lex_force_int ())
422 max = lex_integer ();
425 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
433 lex_error ("expecting `)'");
438 for (i = orig_nv; i < variables_cnt; i++)
440 variables[i]->p.crs.min = min;
441 variables[i]->p.crs.max = max + 1.;
442 variables[i]->p.crs.count = max - min + 1;
461 printf ("CROSSTABS\n");
463 if (variables != NULL)
467 printf ("\t/VARIABLES=");
468 for (i = 0; i < variables_cnt; i++)
470 struct variable *v = variables[i];
472 printf ("%s ", v->name);
473 if (i < variables_cnt - 1)
475 struct variable *nv = variables[i + 1];
477 if (v->p.crs.min == nv->p.crs.min
478 && v->p.crs.max == nv->p.crs.max)
481 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
489 printf ("\t/TABLES=");
490 for (i = 0; i < nxtab; i++)
492 struct crosstab *x = xtab[i];
497 for (j = 0; j < x->nvar; j++)
501 printf ("%s", x->v[j]->name);
507 #endif /* DEBUGGING */
509 /* Data file processing. */
511 static int compare_table_entry (const void *, const void *, void *);
512 static unsigned hash_table_entry (const void *, void *);
514 /* Set up the crosstabulation tables for processing. */
516 precalc (void *aux UNUSED)
520 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
530 for (i = 0; i < nxtab; i++)
532 struct crosstab *x = xtab[i];
537 x->ofs = n_sorted_tab;
539 for (j = 2; j < x->nvar; j++)
540 count *= x->vars[j - 2]->p.crs.count;
542 sorted_tab = xrealloc (sorted_tab,
543 sizeof *sorted_tab * (n_sorted_tab + count));
544 v = local_alloc (sizeof *v * x->nvar);
545 for (j = 2; j < x->nvar; j++)
546 v[j] = x->vars[j]->p.crs.min;
547 for (j = 0; j < count; j++)
549 struct table_entry *te;
552 te = sorted_tab[n_sorted_tab++]
553 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
557 const int mat_size = (x->vars[0]->p.crs.count
558 * x->vars[1]->p.crs.count);
561 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
562 for (m = 0; m < mat_size; m++)
566 for (k = 2; k < x->nvar; k++)
567 te->values[k].f = v[k];
568 for (k = 2; k < x->nvar; k++)
569 if (++v[k] >= x->vars[k]->p.crs.max)
570 v[k] = x->vars[k]->p.crs.min;
577 sorted_tab = xrealloc (sorted_tab,
578 sizeof *sorted_tab * (n_sorted_tab + 1));
579 sorted_tab[n_sorted_tab] = NULL;
583 /* Form crosstabulations for general mode. */
585 calc_general (struct ccase *c, void *aux UNUSED)
588 double weight = dict_get_case_weight (default_dict, c);
590 /* Flattened current table index. */
593 for (t = 0; t < nxtab; t++)
595 struct crosstab *x = xtab[t];
596 const size_t entry_size = (sizeof (struct table_entry)
597 + sizeof (union value) * (x->nvar - 1));
598 struct table_entry *te = local_alloc (entry_size);
600 /* Construct table entry for the current record and table. */
606 for (j = 0; j < x->nvar; j++)
608 if ((cmd.miss == CRS_TABLE
609 && is_missing (&c->data[x->vars[j]->fv], x->vars[j]))
610 || (cmd.miss == CRS_INCLUDE
611 && is_system_missing (&c->data[x->vars[j]->fv],
614 x->missing += weight;
618 if (x->vars[j]->type == NUMERIC)
619 te->values[j].f = c->data[x->vars[j]->fv].f;
622 memcpy (te->values[j].s, c->data[x->vars[j]->fv].s,
625 /* Necessary in order to simplify comparisons. */
626 memset (&te->values[j].s[x->vars[j]->width], 0,
627 sizeof (union value) - x->vars[j]->width);
632 /* Add record to hash table. */
634 struct table_entry **tepp
635 = (struct table_entry **) hsh_probe (gen_tab, te);
638 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
641 memcpy (tep, te, entry_size);
646 (*tepp)->u.freq += weight;
657 calc_integer (struct ccase *c, void *aux UNUSED)
660 double weight = dict_get_case_weight (default_dict, c);
662 /* Flattened current table index. */
665 for (t = 0; t < nxtab; t++)
667 struct crosstab *x = xtab[t];
672 for (i = 0; i < x->nvar; i++)
674 struct variable *const v = x->vars[i];
675 double value = c->data[v->fv].f;
677 /* Note that the first test also rules out SYSMIS. */
678 if ((value < v->p.crs.min || value >= v->p.crs.max)
679 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
681 x->missing += weight;
687 ofs += fact * ((int) value - v->p.crs.min);
688 fact *= v->p.crs.count;
693 const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min;
694 const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min;
695 const int col_dim = x->vars[COL_VAR]->p.crs.count;
697 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
707 /* Print out all table entries in NULL-terminated TAB for use by a
708 debugger (a person, not a program). */
710 print_table_entries (struct table_entry **tab)
712 printf ("raw crosstabulation data:\n");
715 const struct crosstab *x = xtab[(*tab)->table];
718 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
719 for (i = 0; i < x->nvar; i++)
723 printf ("%s:", x->v[i]->name);
725 if (x->v[i]->type == NUMERIC)
726 printf ("%g", (*tab)->v[i].f);
728 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
736 /* Compare the table_entry's at A and B and return a strcmp()-type
739 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
741 const struct table_entry *a = a_;
742 const struct table_entry *b = b_;
744 if (a->table > b->table)
746 else if (a->table < b->table)
750 const struct crosstab *x = xtab[a->table];
753 for (i = x->nvar - 1; i >= 0; i--)
754 if (x->vars[i]->type == NUMERIC)
756 const double diffnum = a->values[i].f - b->values[i].f;
759 else if (diffnum > 0)
764 assert (x->vars[i]->type == ALPHA);
766 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
777 /* Calculate a hash value from table_entry A. */
779 hash_table_entry (const void *a_, void *foo UNUSED)
781 const struct table_entry *a = a_;
786 for (i = 0; i < xtab[a->table]->nvar; i++)
787 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
792 /* Post-data reading calculations. */
794 static struct table_entry **find_pivot_extent (struct table_entry **,
795 int *cnt, int pivot);
796 static void enum_var_values (struct table_entry **entries, int entry_cnt,
798 union value **values, int *value_cnt);
799 static void output_pivot_table (struct table_entry **, struct table_entry **,
800 double **, double **, double **,
801 int *, int *, int *);
802 static void make_summary_table (void);
805 postcalc (void *aux UNUSED)
809 n_sorted_tab = hsh_count (gen_tab);
810 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
812 print_table_entries (sorted_tab);
816 make_summary_table ();
818 /* Identify all the individual crosstabulation tables, and deal with
821 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
822 int pc = n_sorted_tab; /* Pivot count. */
824 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
825 int maxrows = 0, maxcols = 0, maxcells = 0;
829 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
833 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
834 &maxrows, &maxcols, &maxcells);
843 hsh_destroy (gen_tab);
846 static void insert_summary (struct tab_table *, int tab_index, double valid);
848 /* Output a table summarizing the cases processed. */
850 make_summary_table (void)
852 struct tab_table *summary;
854 struct table_entry **pb = sorted_tab, **pe;
855 int pc = n_sorted_tab;
858 summary = tab_create (7, 3 + nxtab, 1);
859 tab_title (summary, 0, _("Summary."));
860 tab_headers (summary, 1, 0, 3, 0);
861 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
862 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
863 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
864 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
865 tab_hline (summary, TAL_1, 1, 6, 1);
866 tab_hline (summary, TAL_1, 1, 6, 2);
867 tab_vline (summary, TAL_1, 3, 1, 1);
868 tab_vline (summary, TAL_1, 5, 1, 1);
872 for (i = 0; i < 3; i++)
874 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
875 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
878 tab_offset (summary, 0, 3);
884 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
888 while (cur_tab < (*pb)->table)
889 insert_summary (summary, cur_tab++, 0.);
892 for (valid = 0.; pb < pe; pb++)
893 valid += (*pb)->u.freq;
896 const struct crosstab *const x = xtab[(*pb)->table];
897 const int n_cols = x->vars[COL_VAR]->p.crs.count;
898 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
899 const int count = n_cols * n_rows;
901 for (valid = 0.; pb < pe; pb++)
903 const double *data = (*pb)->u.data;
906 for (i = 0; i < count; i++)
910 insert_summary (summary, cur_tab++, valid);
915 while (cur_tab < nxtab)
916 insert_summary (summary, cur_tab++, 0.);
921 /* Inserts a line into T describing the crosstabulation at index
922 TAB_INDEX, which has VALID valid observations. */
924 insert_summary (struct tab_table *t, int tab_index, double valid)
926 struct crosstab *x = xtab[tab_index];
928 tab_hline (t, TAL_1, 0, 6, 0);
930 /* Crosstabulation name. */
932 char *buf = local_alloc (128 * x->nvar);
936 for (i = 0; i < x->nvar; i++)
939 cp = stpcpy (cp, " * ");
942 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
944 tab_text (t, 0, 0, TAB_LEFT, buf);
949 /* Counts and percentages. */
959 for (i = 0; i < 3; i++)
961 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
962 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
973 static struct tab_table *table; /* Crosstabulation table. */
974 static struct tab_table *chisq; /* Chi-square table. */
975 static struct tab_table *sym; /* Symmetric measures table. */
976 static struct tab_table *risk; /* Risk estimate table. */
977 static struct tab_table *direct; /* Directional measures table. */
980 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
982 /* Column values, number of columns. */
983 static union value *cols;
986 /* Row values, number of rows. */
987 static union value *rows;
990 /* Number of statistically interesting columns/rows (columns/rows with
992 static int ns_cols, ns_rows;
994 /* Crosstabulation. */
995 static struct crosstab *x;
997 /* Number of variables from the crosstabulation to consider. This is
998 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1001 /* Matrix contents. */
1002 static double *mat; /* Matrix proper. */
1003 static double *row_tot; /* Row totals. */
1004 static double *col_tot; /* Column totals. */
1005 static double W; /* Grand total. */
1007 static void display_dimensions (struct tab_table *, int first_difference,
1008 struct table_entry *);
1009 static void display_crosstabulation (void);
1010 static void display_chisq (void);
1011 static void display_symmetric (void);
1012 static void display_risk (void);
1013 static void display_directional (void);
1014 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1015 static void table_value_missing (struct tab_table *table, int c, int r,
1016 unsigned char opt, const union value *v,
1017 const struct variable *var);
1018 static void delete_missing (void);
1020 /* Output pivot table beginning at PB and continuing until PE,
1021 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1022 hold *MAXROWS entries. */
1024 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1025 double **matp, double **row_totp, double **col_totp,
1026 int *maxrows, int *maxcols, int *maxcells)
1029 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1030 int tc = pe - pb; /* Table count. */
1032 /* Table entry for header comparison. */
1033 struct table_entry *cmp = NULL;
1035 x = xtab[(*pb)->table];
1036 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1038 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1040 /* Crosstabulation table initialization. */
1043 table = tab_create (nvar + n_cols,
1044 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1045 tab_headers (table, nvar - 1, 0, 2, 0);
1047 /* First header line. */
1048 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1049 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1051 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1053 /* Second header line. */
1057 for (i = 2; i < nvar; i++)
1058 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1059 TAB_RIGHT | TAT_TITLE,
1061 ? x->vars[i]->label : x->vars[i]->name));
1062 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1063 x->vars[ROW_VAR]->name);
1064 for (i = 0; i < n_cols; i++)
1065 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1067 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1070 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1071 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1075 char *title = local_alloc (x->nvar * 64 + 128);
1079 if (cmd.pivot == CRS_PIVOT)
1080 for (i = 0; i < nvar; i++)
1083 cp = stpcpy (cp, " by ");
1084 cp = stpcpy (cp, x->vars[i]->name);
1088 cp = spprintf (cp, "%s by %s for",
1089 x->vars[0]->name, x->vars[1]->name);
1090 for (i = 2; i < nvar; i++)
1092 char buf[64], *bufp;
1097 cp = stpcpy (cp, x->vars[i]->name);
1099 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1100 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1102 cp = stpcpy (cp, bufp);
1106 cp = stpcpy (cp, " [");
1107 for (i = 0; i < num_cells; i++)
1115 static const struct tuple cell_names[] =
1117 {CRS_CL_COUNT, N_("count")},
1118 {CRS_CL_ROW, N_("row %")},
1119 {CRS_CL_COLUMN, N_("column %")},
1120 {CRS_CL_TOTAL, N_("total %")},
1121 {CRS_CL_EXPECTED, N_("expected")},
1122 {CRS_CL_RESIDUAL, N_("residual")},
1123 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1124 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1128 const struct tuple *t;
1130 for (t = cell_names; t->value != cells[i]; t++)
1131 assert (t->value != -1);
1133 cp = stpcpy (cp, ", ");
1134 cp = stpcpy (cp, gettext (t->name));
1138 tab_title (table, 0, title);
1142 tab_offset (table, 0, 2);
1147 /* Chi-square table initialization. */
1148 if (cmd.a_statistics[CRS_ST_CHISQ])
1150 chisq = tab_create (6 + (nvar - 2),
1151 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1152 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1154 tab_title (chisq, 0, "Chi-square tests.");
1156 tab_offset (chisq, nvar - 2, 0);
1157 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1158 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1159 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1160 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1161 _("Asymp. Sig. (2-sided)"));
1162 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1163 _("Exact. Sig. (2-sided)"));
1164 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1165 _("Exact. Sig. (1-sided)"));
1167 tab_offset (chisq, 0, 1);
1172 /* Symmetric measures. */
1173 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1174 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1175 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1176 || cmd.a_statistics[CRS_ST_KAPPA])
1178 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1179 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1180 tab_title (sym, 0, "Symmetric measures.");
1182 tab_offset (sym, nvar - 2, 0);
1183 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1184 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1185 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1186 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1187 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1188 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1189 tab_offset (sym, 0, 1);
1194 /* Risk estimate. */
1195 if (cmd.a_statistics[CRS_ST_RISK])
1197 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1198 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1199 tab_title (risk, 0, "Risk estimate.");
1201 tab_offset (risk, nvar - 2, 0);
1202 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1203 _(" 95%% Confidence Interval"));
1204 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1205 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1206 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1207 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1208 tab_hline (risk, TAL_1, 2, 3, 1);
1209 tab_vline (risk, TAL_1, 2, 0, 1);
1210 tab_offset (risk, 0, 2);
1215 /* Directional measures. */
1216 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1217 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1219 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1220 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1221 tab_title (direct, 0, "Directional measures.");
1223 tab_offset (direct, nvar - 2, 0);
1224 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1225 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1226 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1227 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1228 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1229 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1230 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1231 tab_offset (direct, 0, 1);
1238 /* Find pivot subtable if applicable. */
1239 te = find_pivot_extent (tb, &tc, 0);
1243 /* Find all the row variable values. */
1244 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1246 /* Allocate memory space for the column and row totals. */
1247 if (n_rows > *maxrows)
1249 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1250 row_tot = *row_totp;
1253 if (n_cols > *maxcols)
1255 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1256 col_tot = *col_totp;
1260 /* Allocate table space for the matrix. */
1261 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1262 tab_realloc (table, -1,
1263 max (tab_nr (table) + (n_rows + 1) * num_cells,
1264 tab_nr (table) * (pe - pb) / (te - tb)));
1266 if (mode == GENERAL)
1268 /* Allocate memory space for the matrix. */
1269 if (n_cols * n_rows > *maxcells)
1271 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1272 *maxcells = n_cols * n_rows;
1277 /* Build the matrix and calculate column totals. */
1279 union value *cur_col = cols;
1280 union value *cur_row = rows;
1282 double *cp = col_tot;
1283 struct table_entry **p;
1286 for (p = &tb[0]; p < te; p++)
1288 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1292 for (; cur_row < &rows[n_rows]; cur_row++)
1298 mp = &mat[cur_col - cols];
1301 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1308 *cp += *mp = (*p)->u.freq;
1313 /* Zero out the rest of the matrix. */
1314 for (; cur_row < &rows[n_rows]; cur_row++)
1320 if (cur_col < &cols[n_cols])
1322 const int rem_cols = n_cols - (cur_col - cols);
1325 for (c = 0; c < rem_cols; c++)
1327 mp = &mat[cur_col - cols];
1328 for (r = 0; r < n_rows; r++)
1330 for (c = 0; c < rem_cols; c++)
1332 mp += n_cols - rem_cols;
1340 double *tp = col_tot;
1342 assert (mode == INTEGER);
1343 mat = (*tb)->u.data;
1346 /* Calculate column totals. */
1347 for (c = 0; c < n_cols; c++)
1350 double *cp = &mat[c];
1352 for (r = 0; r < n_rows; r++)
1353 cum += cp[r * n_cols];
1361 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1362 ns_cols += *cp != 0.;
1365 /* Calculate row totals. */
1368 double *rp = row_tot;
1371 for (ns_rows = 0, r = n_rows; r--; )
1374 for (c = n_cols; c--; )
1382 /* Calculate grand total. */
1388 if (n_rows < n_cols)
1389 tp = row_tot, n = n_rows;
1391 tp = col_tot, n = n_cols;
1398 /* Print the matrix. */
1402 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1403 for (i = 2; i < nvar; i++)
1404 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1407 for (c = 0; c < n_cols; c++)
1408 printf ("%4g", cols[c].f);
1410 for (r = 0; r < n_rows; r++)
1412 printf ("%4g:", rows[r].f);
1413 for (c = 0; c < n_cols; c++)
1414 printf ("%4g", mat[c + r * n_cols]);
1415 printf ("%4g", row_tot[r]);
1419 for (c = 0; c < n_cols; c++)
1420 printf ("%4g", col_tot[c]);
1426 /* Find the first variable that differs from the last subtable,
1427 then display the values of the dimensioning variables for
1428 each table that needs it. */
1430 int first_difference = nvar - 1;
1433 for (; ; first_difference--)
1435 assert (first_difference >= 2);
1436 if (memcmp (&cmp->values[first_difference],
1437 &(*tb)->values[first_difference],
1438 sizeof *cmp->values))
1444 display_dimensions (table, first_difference, *tb);
1446 display_dimensions (chisq, first_difference, *tb);
1448 display_dimensions (sym, first_difference, *tb);
1450 display_dimensions (risk, first_difference, *tb);
1452 display_dimensions (direct, first_difference, *tb);
1456 display_crosstabulation ();
1457 if (cmd.miss == CRS_REPORT)
1462 display_symmetric ();
1466 display_directional ();
1477 tab_resize (chisq, 4 + (nvar - 2), -1);
1488 /* Delete missing rows and columns for statistical analysis when
1491 delete_missing (void)
1496 for (r = 0; r < n_rows; r++)
1497 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1501 for (c = 0; c < n_cols; c++)
1502 mat[c + r * n_cols] = 0.;
1510 for (c = 0; c < n_cols; c++)
1511 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1515 for (r = 0; r < n_rows; r++)
1516 mat[c + r * n_cols] = 0.;
1522 /* Prepare table T for submission, and submit it. */
1524 submit (struct tab_table *t)
1531 tab_resize (t, -1, 0);
1532 if (tab_nr (t) == tab_t (t))
1537 tab_offset (t, 0, 0);
1539 for (i = 2; i < nvar; i++)
1540 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1541 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1542 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1543 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1545 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1547 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1548 tab_dim (t, crosstabs_dim);
1552 /* Sets the widths of all the columns and heights of all the rows in
1553 table T for driver D. */
1555 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1559 /* Width of a numerical column. */
1560 int c = outp_string_width (d, "0.000000");
1561 if (cmd.miss == CRS_REPORT)
1562 c += outp_string_width (d, "M");
1564 /* Set width for header columns. */
1567 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1569 if (w < d->prop_em_width * 8)
1570 w = d->prop_em_width * 8;
1572 if (w > d->prop_em_width * 15)
1573 w = d->prop_em_width * 15;
1575 for (i = 0; i < t->l; i++)
1579 for (i = t->l; i < t->nc; i++)
1582 for (i = 0; i < t->nr; i++)
1583 t->h[i] = tab_natural_height (t, d, i);
1586 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1587 int *cnt, int pivot);
1588 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1589 int *cnt, int pivot);
1591 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1593 static struct table_entry **
1594 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1596 return (mode == GENERAL
1597 ? find_pivot_extent_general (tp, cnt, pivot)
1598 : find_pivot_extent_integer (tp, cnt, pivot));
1601 /* Find the extent of a region in TP that contains one table. If
1602 PIVOT != 0 that means a set of table entries with identical table
1603 number; otherwise they also have to have the same values for every
1604 dimension after the row and column dimensions. The table that is
1605 searched starts at TP and has length CNT. Returns the first entry
1606 after the last one in the table; sets *CNT to the number of
1607 remaining values. If there are no entries in TP at all, returns
1608 NULL. A yucky interface, admittedly, but it works. */
1609 static struct table_entry **
1610 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1612 struct table_entry *fp = *tp;
1617 x = xtab[(*tp)->table];
1625 if ((*tp)->table != fp->table)
1630 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1637 /* Integer mode correspondent to find_pivot_extent_general(). This
1638 could be optimized somewhat, but I just don't give a crap about
1639 CROSSTABS performance in integer mode, which is just a
1640 CROSSTABS wart as far as I'm concerned.
1642 That said, feel free to send optimization patches to me. */
1643 static struct table_entry **
1644 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1646 struct table_entry *fp = *tp;
1651 x = xtab[(*tp)->table];
1659 if ((*tp)->table != fp->table)
1664 if (memcmp (&(*tp)->values[2], &fp->values[2],
1665 sizeof (union value) * (x->nvar - 2)))
1672 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1673 result. WIDTH_ points to an int which is either 0 for a
1674 numeric value or a string width for a string value. */
1676 compare_value (const void *a_, const void *b_, void *width_)
1678 const union value *a = a_;
1679 const union value *b = b_;
1680 const int *pwidth = width_;
1681 const int width = *pwidth;
1684 return (a->f < b->f) ? -1 : (a->f > b->f);
1686 return strncmp (a->s, b->s, width);
1689 /* Given an array of ENTRY_CNT table_entry structures starting at
1690 ENTRIES, creates a sorted list of the values that the variable
1691 with index VAR_INDEX takes on. The values are returned as a
1692 malloc()'darray stored in *VALUES, with the number of values
1693 stored in *VALUE_CNT.
1696 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1697 union value **values, int *value_cnt)
1699 if (mode == GENERAL)
1701 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1704 *values = xmalloc (sizeof **values * entry_cnt);
1705 for (i = 0; i < entry_cnt; i++)
1706 (*values)[i] = entries[i]->values[var_idx];
1707 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1708 compare_value, &width);
1712 struct crosstab_proc *crs
1713 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1716 assert (mode == INTEGER);
1717 *values = xmalloc (sizeof **values * crs->count);
1718 for (i = 0; i < crs->count; i++)
1719 (*values)[i].f = i + crs->min;
1720 *value_cnt = crs->count;
1724 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1725 from V, displayed with print format spec from variable VAR. When
1726 in REPORT missing-value mode, missing values have an M appended. */
1728 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1729 const union value *v, const struct variable *var)
1731 struct len_string s;
1733 const char *label = val_labs_find (var->val_labs, *v);
1736 tab_text (table, c, r, TAB_LEFT, label);
1740 s.string = tab_alloc (table, var->print.w);
1741 format_short (s.string, &var->print, v);
1742 s.length = strlen (s.string);
1743 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1744 s.string[s.length++] = 'M';
1745 while (s.length && *s.string == ' ')
1750 tab_raw (table, c, r, opt, &s);
1753 /* Draws a line across TABLE at the current row to indicate the most
1754 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1755 that changed, and puts the values that changed into the table. TB
1756 and X must be the corresponding table_entry and crosstab,
1759 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1761 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1763 for (; first_difference >= 2; first_difference--)
1764 table_value_missing (table, nvar - first_difference - 1, 0,
1765 TAB_RIGHT, &tb->values[first_difference],
1766 x->vars[first_difference]);
1769 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1771 float_M_suffix (struct tab_table *table, int c, int r, double v)
1773 static const struct fmt_spec f = {FMT_F, 8, 0};
1774 struct len_string s;
1777 s.string = tab_alloc (table, 9);
1779 format_short (s.string, &f, (union value *) &v);
1780 while (*s.string == ' ')
1785 tab_raw (table, c, r, TAB_RIGHT, &s);
1788 /* Displays the crosstabulation table. */
1790 display_crosstabulation (void)
1795 for (r = 0; r < n_rows; r++)
1796 table_value_missing (table, nvar - 2, r * num_cells,
1797 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1799 tab_text (table, nvar - 2, n_rows * num_cells,
1800 TAB_LEFT, _("Total"));
1802 /* Put in the actual cells. */
1807 tab_offset (table, nvar - 1, -1);
1808 for (r = 0; r < n_rows; r++)
1811 tab_hline (table, TAL_1, -1, n_cols, 0);
1812 for (c = 0; c < n_cols; c++)
1814 double expected_value = row_tot[r] * col_tot[c] / W;
1815 for (i = 0; i < num_cells; i++)
1825 v = *mp / row_tot[r] * 100.;
1828 v = *mp / col_tot[c] * 100.;
1833 case CRS_CL_EXPECTED:
1836 case CRS_CL_RESIDUAL:
1837 v = *mp - expected_value;
1839 case CRS_CL_SRESIDUAL:
1840 v = (*mp - expected_value) / sqrt (expected_value);
1842 case CRS_CL_ASRESIDUAL:
1843 v = ((*mp - expected_value)
1844 / sqrt (expected_value
1845 * (1. - row_tot[r] / W)
1846 * (1. - col_tot[c] / W)));
1852 if (cmd.miss == CRS_REPORT
1853 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1854 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1855 float_M_suffix (table, c, i, v);
1857 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1863 tab_offset (table, -1, tab_row (table) + num_cells);
1871 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1872 for (r = 0; r < n_rows; r++)
1873 for (i = 0; i < num_cells; i++)
1886 v = row_tot[r] / W * 100.;
1889 v = row_tot[r] / W * 100.;
1891 case CRS_CL_EXPECTED:
1892 case CRS_CL_RESIDUAL:
1893 case CRS_CL_SRESIDUAL:
1894 case CRS_CL_ASRESIDUAL:
1901 if (cmd.miss == CRS_REPORT
1902 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1903 float_M_suffix (table, n_cols, 0, v);
1905 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1907 tab_next_row (table);
1911 /* Column totals, grand total. */
1917 tab_hline (table, TAL_1, -1, n_cols, 0);
1918 for (c = 0; c <= n_cols; c++)
1920 double ct = c < n_cols ? col_tot[c] : W;
1923 for (i = j = 0; i < num_cells; i++)
1941 case CRS_CL_EXPECTED:
1942 case CRS_CL_RESIDUAL:
1943 case CRS_CL_SRESIDUAL:
1944 case CRS_CL_ASRESIDUAL:
1950 if (cmd.miss == CRS_REPORT && c < n_cols
1951 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1952 float_M_suffix (table, c, j, v);
1954 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1961 tab_offset (table, -1, tab_row (table) + last_row);
1964 tab_offset (table, 0, -1);
1967 static void calc_r (double *X, double *Y, double *, double *, double *);
1968 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1970 /* Display chi-square statistics. */
1972 display_chisq (void)
1974 static const char *chisq_stats[N_CHISQ] =
1976 N_("Pearson Chi-Square"),
1977 N_("Likelihood Ratio"),
1978 N_("Fisher's Exact Test"),
1979 N_("Continuity Correction"),
1980 N_("Linear-by-Linear Association"),
1982 double chisq_v[N_CHISQ];
1983 double fisher1, fisher2;
1989 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1991 tab_offset (chisq, nvar - 2, -1);
1993 for (i = 0; i < N_CHISQ; i++)
1995 if ((i != 2 && chisq_v[i] == SYSMIS)
1996 || (i == 2 && fisher1 == SYSMIS))
2000 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2003 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2004 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2005 tab_float (chisq, 3, 0, TAB_RIGHT,
2006 chisq_sig (chisq_v[i], df[i]), 8, 3);
2011 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2012 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2014 tab_next_row (chisq);
2017 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2018 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2019 tab_next_row (chisq);
2021 tab_offset (chisq, 0, -1);
2024 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2025 double[N_SYMMETRIC]);
2027 /* Display symmetric measures. */
2029 display_symmetric (void)
2031 static const char *categories[] =
2033 N_("Nominal by Nominal"),
2034 N_("Ordinal by Ordinal"),
2035 N_("Interval by Interval"),
2036 N_("Measure of Agreement"),
2039 static const char *stats[N_SYMMETRIC] =
2043 N_("Contingency Coefficient"),
2044 N_("Kendall's tau-b"),
2045 N_("Kendall's tau-c"),
2047 N_("Spearman Correlation"),
2052 static const int stats_categories[N_SYMMETRIC] =
2054 0, 0, 0, 1, 1, 1, 1, 2, 3,
2058 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2061 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2064 tab_offset (sym, nvar - 2, -1);
2066 for (i = 0; i < N_SYMMETRIC; i++)
2068 if (sym_v[i] == SYSMIS)
2071 if (stats_categories[i] != last_cat)
2073 last_cat = stats_categories[i];
2074 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2077 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2078 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2079 if (sym_ase[i] != SYSMIS)
2080 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2081 if (sym_t[i] != SYSMIS)
2082 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2083 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2087 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2088 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2091 tab_offset (sym, 0, -1);
2094 static int calc_risk (double[], double[], double[], union value *);
2096 /* Display risk estimate. */
2101 double risk_v[3], lower[3], upper[3];
2105 if (!calc_risk (risk_v, upper, lower, c))
2108 tab_offset (risk, nvar - 2, -1);
2110 for (i = 0; i < 3; i++)
2112 if (risk_v[i] == SYSMIS)
2118 if (x->vars[COL_VAR]->type == NUMERIC)
2119 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2120 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2122 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2123 x->vars[COL_VAR]->name,
2124 x->vars[COL_VAR]->width, c[0].s,
2125 x->vars[COL_VAR]->width, c[1].s);
2129 if (x->vars[ROW_VAR]->type == NUMERIC)
2130 sprintf (buf, _("For cohort %s = %g"),
2131 x->vars[ROW_VAR]->name, rows[i - 1].f);
2133 sprintf (buf, _("For cohort %s = %.*s"),
2134 x->vars[ROW_VAR]->name,
2135 x->vars[ROW_VAR]->width, rows[i - 1].s);
2139 tab_text (risk, 0, 0, TAB_LEFT, buf);
2140 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2141 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2142 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2143 tab_next_row (risk);
2146 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2147 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2148 tab_next_row (risk);
2150 tab_offset (risk, 0, -1);
2153 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2154 double[N_DIRECTIONAL]);
2156 /* Display directional measures. */
2158 display_directional (void)
2160 static const char *categories[] =
2162 N_("Nominal by Nominal"),
2163 N_("Ordinal by Ordinal"),
2164 N_("Nominal by Interval"),
2167 static const char *stats[] =
2170 N_("Goodman and Kruskal tau"),
2171 N_("Uncertainty Coefficient"),
2176 static const char *types[] =
2183 static const int stats_categories[N_DIRECTIONAL] =
2185 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2188 static const int stats_stats[N_DIRECTIONAL] =
2190 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2193 static const int stats_types[N_DIRECTIONAL] =
2195 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2198 static const int *stats_lookup[] =
2205 static const char **stats_names[] =
2217 double direct_v[N_DIRECTIONAL];
2218 double direct_ase[N_DIRECTIONAL];
2219 double direct_t[N_DIRECTIONAL];
2223 if (!calc_directional (direct_v, direct_ase, direct_t))
2226 tab_offset (direct, nvar - 2, -1);
2228 for (i = 0; i < N_DIRECTIONAL; i++)
2230 if (direct_v[i] == SYSMIS)
2236 for (j = 0; j < 3; j++)
2237 if (last[j] != stats_lookup[j][i])
2240 tab_hline (direct, TAL_1, j, 6, 0);
2245 int k = last[j] = stats_lookup[j][i];
2250 string = x->vars[0]->name;
2252 string = x->vars[1]->name;
2254 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2255 gettext (stats_names[j][k]), string);
2260 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2261 if (direct_ase[i] != SYSMIS)
2262 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2263 if (direct_t[i] != SYSMIS)
2264 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2265 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2266 tab_next_row (direct);
2269 tab_offset (direct, 0, -1);
2272 /* Statistical calculations. */
2274 /* Returns the value of the gamma (factorial) function for an integer
2277 gamma_int (double x)
2282 for (i = 2; i < x; i++)
2287 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2289 static inline double
2290 Pr (int a, int b, int c, int d)
2292 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2293 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2294 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2295 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2296 / gamma_int (a + b + c + d + 1.));
2299 /* Swap the contents of A and B. */
2301 swap (int *a, int *b)
2308 /* Calculate significance for Fisher's exact test as specified in
2309 _SPSS Statistical Algorithms_, Appendix 5. */
2311 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2315 if (min (c, d) < min (a, b))
2316 swap (&a, &c), swap (&b, &d);
2317 if (min (b, d) < min (a, c))
2318 swap (&a, &b), swap (&c, &d);
2322 swap (&a, &b), swap (&c, &d);
2324 swap (&a, &c), swap (&b, &d);
2328 for (x = 0; x <= a; x++)
2329 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2331 *fisher2 = *fisher1;
2332 for (x = 1; x <= b; x++)
2333 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2336 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2337 columns with values COLS and N_ROWS rows with values ROWS. Values
2338 in the matrix sum to W. */
2340 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2341 double *fisher1, double *fisher2)
2345 chisq[0] = chisq[1] = 0.;
2346 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2347 *fisher1 = *fisher2 = SYSMIS;
2349 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2351 if (ns_rows <= 1 || ns_cols <= 1)
2353 chisq[0] = chisq[1] = SYSMIS;
2357 for (r = 0; r < n_rows; r++)
2358 for (c = 0; c < n_cols; c++)
2360 const double expected = row_tot[r] * col_tot[c] / W;
2361 const double freq = mat[n_cols * r + c];
2362 const double residual = freq - expected;
2364 chisq[0] += residual * residual / expected;
2366 chisq[1] += freq * log (expected / freq);
2377 /* Calculate Yates and Fisher exact test. */
2378 if (ns_cols == 2 && ns_rows == 2)
2380 double f11, f12, f21, f22;
2386 for (i = j = 0; i < n_cols; i++)
2387 if (col_tot[i] != 0.)
2396 f11 = mat[nz_cols[0]];
2397 f12 = mat[nz_cols[1]];
2398 f21 = mat[nz_cols[0] + n_cols];
2399 f22 = mat[nz_cols[1] + n_cols];
2404 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2407 chisq[3] = (W * x * x
2408 / (f11 + f12) / (f21 + f22)
2409 / (f11 + f21) / (f12 + f22));
2417 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2418 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2421 /* Calculate Mantel-Haenszel. */
2422 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2424 double r, ase_0, ase_1;
2425 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2427 chisq[4] = (W - 1.) * r * r;
2432 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2433 ASE_1, and ase_0 into ASE_0. The row and column values must be
2434 passed in X and Y. */
2436 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2438 double SX, SY, S, T;
2440 double sum_XYf, sum_X2Y2f;
2441 double sum_Xr, sum_X2r;
2442 double sum_Yc, sum_Y2c;
2445 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2446 for (j = 0; j < n_cols; j++)
2448 double fij = mat[j + i * n_cols];
2449 double product = X[i] * Y[j];
2450 double temp = fij * product;
2452 sum_X2Y2f += temp * product;
2455 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2457 sum_Xr += X[i] * row_tot[i];
2458 sum_X2r += X[i] * X[i] * row_tot[i];
2462 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2464 sum_Yc += Y[i] * col_tot[i];
2465 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2469 S = sum_XYf - sum_Xr * sum_Yc / W;
2470 SX = sum_X2r - sum_Xr * sum_Xr / W;
2471 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2474 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2479 for (s = c = 0., i = 0; i < n_rows; i++)
2480 for (j = 0; j < n_cols; j++)
2482 double Xresid, Yresid;
2485 Xresid = X[i] - Xbar;
2486 Yresid = Y[j] - Ybar;
2487 temp = (T * Xresid * Yresid
2489 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2490 y = mat[j + i * n_cols] * temp * temp - c;
2495 *ase_1 = sqrt (s) / (T * T);
2499 static double somers_d_v[3];
2500 static double somers_d_ase[3];
2501 static double somers_d_t[3];
2503 /* Calculate symmetric statistics and their asymptotic standard
2504 errors. Returns 0 if none could be calculated. */
2506 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2507 double t[N_SYMMETRIC])
2509 int q = min (ns_rows, ns_cols);
2518 for (i = 0; i < N_SYMMETRIC; i++)
2519 v[i] = ase[i] = t[i] = SYSMIS;
2522 /* Phi, Cramer's V, contingency coefficient. */
2523 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2525 double Xp = 0.; /* Pearson chi-square. */
2530 for (r = 0; r < n_rows; r++)
2531 for (c = 0; c < n_cols; c++)
2533 const double expected = row_tot[r] * col_tot[c] / W;
2534 const double freq = mat[n_cols * r + c];
2535 const double residual = freq - expected;
2537 Xp += residual * residual / expected;
2541 if (cmd.a_statistics[CRS_ST_PHI])
2543 v[0] = sqrt (Xp / W);
2544 v[1] = sqrt (Xp / (W * (q - 1)));
2546 if (cmd.a_statistics[CRS_ST_CC])
2547 v[2] = sqrt (Xp / (Xp + W));
2550 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2551 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2556 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2563 for (r = 0; r < n_rows; r++)
2564 Dr -= row_tot[r] * row_tot[r];
2565 for (c = 0; c < n_cols; c++)
2566 Dc -= col_tot[c] * col_tot[c];
2572 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2573 for (c = 0; c < n_cols; c++)
2577 for (r = 0; r < n_rows; r++)
2578 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2588 for (i = 0; i < n_rows; i++)
2592 for (j = 1; j < n_cols; j++)
2593 Cij += col_tot[j] - cum[j + i * n_cols];
2596 for (j = 1; j < n_cols; j++)
2597 Dij += cum[j + (i - 1) * n_cols];
2601 double fij = mat[j + i * n_cols];
2607 assert (j < n_cols);
2609 Cij -= col_tot[j] - cum[j + i * n_cols];
2610 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2614 Cij += cum[j - 1 + (i - 1) * n_cols];
2615 Dij -= cum[j + (i - 1) * n_cols];
2621 if (cmd.a_statistics[CRS_ST_BTAU])
2622 v[3] = (P - Q) / sqrt (Dr * Dc);
2623 if (cmd.a_statistics[CRS_ST_CTAU])
2624 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2625 if (cmd.a_statistics[CRS_ST_GAMMA])
2626 v[5] = (P - Q) / (P + Q);
2628 /* ASE for tau-b, tau-c, gamma. Calculations could be
2629 eliminated here, at expense of memory. */
2634 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2635 for (i = 0; i < n_rows; i++)
2639 for (j = 1; j < n_cols; j++)
2640 Cij += col_tot[j] - cum[j + i * n_cols];
2643 for (j = 1; j < n_cols; j++)
2644 Dij += cum[j + (i - 1) * n_cols];
2648 double fij = mat[j + i * n_cols];
2650 if (cmd.a_statistics[CRS_ST_BTAU])
2652 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2653 + v[3] * (row_tot[i] * Dc
2654 + col_tot[j] * Dr));
2655 btau_cum += fij * temp * temp;
2659 const double temp = Cij - Dij;
2660 ctau_cum += fij * temp * temp;
2663 if (cmd.a_statistics[CRS_ST_GAMMA])
2665 const double temp = Q * Cij - P * Dij;
2666 gamma_cum += fij * temp * temp;
2669 if (cmd.a_statistics[CRS_ST_D])
2671 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2672 - (P - Q) * (W - row_tot[i]));
2673 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2674 - (Q - P) * (W - col_tot[j]));
2679 assert (j < n_cols);
2681 Cij -= col_tot[j] - cum[j + i * n_cols];
2682 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2686 Cij += cum[j - 1 + (i - 1) * n_cols];
2687 Dij -= cum[j + (i - 1) * n_cols];
2693 btau_var = ((btau_cum
2694 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2696 if (cmd.a_statistics[CRS_ST_BTAU])
2698 ase[3] = sqrt (btau_var);
2699 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2702 if (cmd.a_statistics[CRS_ST_CTAU])
2704 ase[4] = ((2 * q / ((q - 1) * W * W))
2705 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2706 t[4] = v[4] / ase[4];
2708 if (cmd.a_statistics[CRS_ST_GAMMA])
2710 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2711 t[5] = v[5] / (2. / (P + Q)
2712 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2714 if (cmd.a_statistics[CRS_ST_D])
2716 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2717 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2718 somers_d_t[0] = (somers_d_v[0]
2720 * sqrt (ctau_cum - sqr (P - Q) / W)));
2721 somers_d_v[1] = (P - Q) / Dc;
2722 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2723 somers_d_t[1] = (somers_d_v[1]
2725 * sqrt (ctau_cum - sqr (P - Q) / W)));
2726 somers_d_v[2] = (P - Q) / Dr;
2727 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2728 somers_d_t[2] = (somers_d_v[2]
2730 * sqrt (ctau_cum - sqr (P - Q) / W)));
2736 /* Spearman correlation, Pearson's r. */
2737 if (cmd.a_statistics[CRS_ST_CORR])
2739 double *R = local_alloc (sizeof *R * n_rows);
2740 double *C = local_alloc (sizeof *C * n_cols);
2743 double y, t, c = 0., s = 0.;
2748 R[i] = s + (row_tot[i] + 1.) / 2.;
2755 assert (i < n_rows);
2760 double y, t, c = 0., s = 0.;
2765 C[j] = s + (col_tot[j] + 1.) / 2;
2772 assert (j < n_cols);
2776 calc_r (R, C, &v[6], &t[6], &ase[6]);
2782 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2786 /* Cohen's kappa. */
2787 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2789 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2792 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2793 i < ns_rows; i++, j++)
2797 while (col_tot[j] == 0.)
2800 prod = row_tot[i] * col_tot[j];
2801 sum = row_tot[i] + col_tot[j];
2803 sum_fii += mat[j + i * n_cols];
2805 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2806 sum_riciri_ci += prod * sum;
2808 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2809 for (j = 0; j < ns_cols; j++)
2811 double sum = row_tot[i] + col_tot[j];
2812 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2815 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2817 ase[8] = sqrt ((W * W * sum_rici
2818 + sum_rici * sum_rici
2819 - W * sum_riciri_ci)
2820 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2822 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2823 / sqr (W * W - sum_rici))
2824 + ((2. * (W - sum_fii)
2825 * (2. * sum_fii * sum_rici
2826 - W * sum_fiiri_ci))
2827 / cube (W * W - sum_rici))
2828 + (sqr (W - sum_fii)
2829 * (W * sum_fijri_ci2 - 4.
2830 * sum_rici * sum_rici)
2831 / pow4 (W * W - sum_rici))));
2833 t[8] = v[8] / ase[8];
2840 /* Calculate risk estimate. */
2842 calc_risk (double *value, double *upper, double *lower, union value *c)
2844 double f11, f12, f21, f22;
2850 for (i = 0; i < 3; i++)
2851 value[i] = upper[i] = lower[i] = SYSMIS;
2854 if (ns_rows != 2 || ns_cols != 2)
2861 for (i = j = 0; i < n_cols; i++)
2862 if (col_tot[i] != 0.)
2871 f11 = mat[nz_cols[0]];
2872 f12 = mat[nz_cols[1]];
2873 f21 = mat[nz_cols[0] + n_cols];
2874 f22 = mat[nz_cols[1] + n_cols];
2876 c[0] = cols[nz_cols[0]];
2877 c[1] = cols[nz_cols[1]];
2880 value[0] = (f11 * f22) / (f12 * f21);
2881 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2882 lower[0] = value[0] * exp (-1.960 * v);
2883 upper[0] = value[0] * exp (1.960 * v);
2885 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2886 v = sqrt ((f12 / (f11 * (f11 + f12)))
2887 + (f22 / (f21 * (f21 + f22))));
2888 lower[1] = value[1] * exp (-1.960 * v);
2889 upper[1] = value[1] * exp (1.960 * v);
2891 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2892 v = sqrt ((f11 / (f12 * (f11 + f12)))
2893 + (f21 / (f22 * (f21 + f22))));
2894 lower[2] = value[2] * exp (-1.960 * v);
2895 upper[2] = value[2] * exp (1.960 * v);
2900 /* Calculate directional measures. */
2902 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2903 double t[N_DIRECTIONAL])
2908 for (i = 0; i < N_DIRECTIONAL; i++)
2909 v[i] = ase[i] = t[i] = SYSMIS;
2913 if (cmd.a_statistics[CRS_ST_LAMBDA])
2915 double *fim = xmalloc (sizeof *fim * n_rows);
2916 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2917 double *fmj = xmalloc (sizeof *fmj * n_cols);
2918 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2919 double sum_fim, sum_fmj;
2921 int rm_index, cm_index;
2924 /* Find maximum for each row and their sum. */
2925 for (sum_fim = 0., i = 0; i < n_rows; i++)
2927 double max = mat[i * n_cols];
2930 for (j = 1; j < n_cols; j++)
2931 if (mat[j + i * n_cols] > max)
2933 max = mat[j + i * n_cols];
2937 sum_fim += fim[i] = max;
2938 fim_index[i] = index;
2941 /* Find maximum for each column. */
2942 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2944 double max = mat[j];
2947 for (i = 1; i < n_rows; i++)
2948 if (mat[j + i * n_cols] > max)
2950 max = mat[j + i * n_cols];
2954 sum_fmj += fmj[j] = max;
2955 fmj_index[j] = index;
2958 /* Find maximum row total. */
2961 for (i = 1; i < n_rows; i++)
2962 if (row_tot[i] > rm)
2968 /* Find maximum column total. */
2971 for (j = 1; j < n_cols; j++)
2972 if (col_tot[j] > cm)
2978 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2979 v[1] = (sum_fmj - rm) / (W - rm);
2980 v[2] = (sum_fim - cm) / (W - cm);
2982 /* ASE1 for Y given X. */
2986 for (accum = 0., i = 0; i < n_rows; i++)
2987 for (j = 0; j < n_cols; j++)
2989 const int deltaj = j == cm_index;
2990 accum += (mat[j + i * n_cols]
2991 * sqr ((j == fim_index[i])
2996 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2999 /* ASE0 for Y given X. */
3003 for (accum = 0., i = 0; i < n_rows; i++)
3004 if (cm_index != fim_index[i])
3005 accum += (mat[i * n_cols + fim_index[i]]
3006 + mat[i * n_cols + cm_index]);
3007 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3010 /* ASE1 for X given Y. */
3014 for (accum = 0., i = 0; i < n_rows; i++)
3015 for (j = 0; j < n_cols; j++)
3017 const int deltaj = i == rm_index;
3018 accum += (mat[j + i * n_cols]
3019 * sqr ((i == fmj_index[j])
3024 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3027 /* ASE0 for X given Y. */
3031 for (accum = 0., j = 0; j < n_cols; j++)
3032 if (rm_index != fmj_index[j])
3033 accum += (mat[j + n_cols * fmj_index[j]]
3034 + mat[j + n_cols * rm_index]);
3035 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3038 /* Symmetric ASE0 and ASE1. */
3043 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3044 for (j = 0; j < n_cols; j++)
3046 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3047 int temp1 = (i == rm_index) + (j == cm_index);
3048 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3049 accum1 += (mat[j + i * n_cols]
3050 * sqr (temp0 + (v[0] - 1.) * temp1));
3052 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3053 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3054 / (2. * W - rm - cm));
3063 double sum_fij2_ri, sum_fij2_ci;
3064 double sum_ri2, sum_cj2;
3066 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3067 for (j = 0; j < n_cols; j++)
3069 double temp = sqr (mat[j + i * n_cols]);
3070 sum_fij2_ri += temp / row_tot[i];
3071 sum_fij2_ci += temp / col_tot[j];
3074 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3075 sum_ri2 += row_tot[i] * row_tot[i];
3077 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3078 sum_cj2 += col_tot[j] * col_tot[j];
3080 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3081 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3085 if (cmd.a_statistics[CRS_ST_UC])
3087 double UX, UY, UXY, P;
3088 double ase1_yx, ase1_xy, ase1_sym;
3091 for (UX = 0., i = 0; i < n_rows; i++)
3092 if (row_tot[i] > 0.)
3093 UX -= row_tot[i] / W * log (row_tot[i] / W);
3095 for (UY = 0., j = 0; j < n_cols; j++)
3096 if (col_tot[j] > 0.)
3097 UY -= col_tot[j] / W * log (col_tot[j] / W);
3099 for (UXY = P = 0., i = 0; i < n_rows; i++)
3100 for (j = 0; j < n_cols; j++)
3102 double entry = mat[j + i * n_cols];
3107 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3108 UXY -= entry / W * log (entry / W);
3111 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3112 for (j = 0; j < n_cols; j++)
3114 double entry = mat[j + i * n_cols];
3119 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3120 + (UX - UXY) * log (col_tot[j] / W));
3121 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3122 + (UY - UXY) * log (row_tot[i] / W));
3123 ase1_sym += entry * sqr ((UXY
3124 * log (row_tot[i] * col_tot[j] / (W * W)))
3125 - (UX + UY) * log (entry / W));
3128 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3129 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3130 t[5] = v[5] / ((2. / (W * (UX + UY)))
3131 * sqrt (P - sqr (UX + UY - UXY) / W));
3133 v[6] = (UX + UY - UXY) / UX;
3134 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3135 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3137 v[7] = (UX + UY - UXY) / UY;
3138 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3139 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3143 if (cmd.a_statistics[CRS_ST_D])
3148 calc_symmetric (NULL, NULL, NULL);
3149 for (i = 0; i < 3; i++)
3151 v[8 + i] = somers_d_v[i];
3152 ase[8 + i] = somers_d_ase[i];
3153 t[8 + i] = somers_d_t[i];
3158 if (cmd.a_statistics[CRS_ST_ETA])
3161 double sum_Xr, sum_X2r;
3165 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3167 sum_Xr += rows[i].f * row_tot[i];
3168 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3170 SX = sum_X2r - sum_Xr * sum_Xr / W;
3172 for (SXW = 0., j = 0; j < n_cols; j++)
3176 for (cum = 0., i = 0; i < n_rows; i++)
3178 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3179 cum += rows[i].f * mat[j + i * n_cols];
3182 SXW -= cum * cum / col_tot[j];
3184 v[11] = sqrt (1. - SXW / SX);
3188 double sum_Yc, sum_Y2c;
3192 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3194 sum_Yc += cols[i].f * col_tot[i];
3195 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3197 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3199 for (SYW = 0., i = 0; i < n_rows; i++)
3203 for (cum = 0., j = 0; j < n_cols; j++)
3205 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3206 cum += cols[j].f * mat[j + i * n_cols];
3209 SYW -= cum * cum / row_tot[i];
3211 v[12] = sqrt (1. - SYW / SY);
3218 /* A wrapper around data_out() that limits string output to short
3219 string width and null terminates the result. */
3221 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3223 struct fmt_spec fmt_subst;
3225 /* Limit to short string width. */
3226 if (formats[fp->type].cat & FCAT_STRING)
3230 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3231 if (fmt_subst.type == FMT_A)
3232 fmt_subst.w = min (8, fmt_subst.w);
3234 fmt_subst.w = min (16, fmt_subst.w);
3240 data_out (s, fp, v);
3242 /* Null terminate. */