1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value values[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *vars[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
140 static int write; /* One of WR_* that specifies the WRITE style. */
142 /* Command parsing info. */
143 static struct cmd_crosstabs cmd;
146 static struct pool *pl_tc; /* For table cells. */
147 static struct pool *pl_col; /* For column data. */
149 static int internal_cmd_crosstabs (void);
150 static void precalc (void *);
151 static int calc_general (struct ccase *, void *);
152 static int calc_integer (struct ccase *, void *);
153 static void postcalc (void *);
154 static void submit (struct tab_table *);
156 static void format_short (char *s, const struct fmt_spec *fp,
157 const union value *v);
160 static void debug_print (void);
161 static void print_table_entries (struct table_entry **tab);
164 /* Parse and execute CROSSTABS, then clean up. */
168 int result = internal_cmd_crosstabs ();
171 pool_destroy (pl_tc);
172 pool_destroy (pl_col);
177 /* Parses and executes the CROSSTABS procedure. */
179 internal_cmd_crosstabs (void)
185 pl_tc = pool_create ();
186 pl_col = pool_create ();
188 lex_match_id ("CROSSTABS");
189 if (!parse_crosstabs (&cmd))
193 /* Needs variables. */
197 mode = variables ? INTEGER : GENERAL;
202 cmd.a_cells[CRS_CL_COUNT] = 1;
210 for (i = 0; i < CRS_CL_count; i++)
215 cmd.a_cells[CRS_CL_COUNT] = 1;
216 cmd.a_cells[CRS_CL_ROW] = 1;
217 cmd.a_cells[CRS_CL_COLUMN] = 1;
218 cmd.a_cells[CRS_CL_TOTAL] = 1;
220 if (cmd.a_cells[CRS_CL_ALL])
222 for (i = 0; i < CRS_CL_count; i++)
224 cmd.a_cells[CRS_CL_ALL] = 0;
226 cmd.a_cells[CRS_CL_NONE] = 0;
227 for (num_cells = i = 0; i < CRS_CL_count; i++)
229 cmd.a_cells[num_cells++] = i;
233 if (cmd.sbc_statistics)
238 for (i = 0; i < CRS_ST_count; i++)
239 if (cmd.a_statistics[i])
242 cmd.a_statistics[CRS_ST_CHISQ] = 1;
243 if (cmd.a_statistics[CRS_ST_ALL])
244 for (i = 0; i < CRS_ST_count; i++)
245 cmd.a_statistics[i] = 1;
249 if (cmd.miss == CRS_REPORT && mode == GENERAL)
251 msg (SE, _("Missing mode REPORT not allowed in general mode. "
252 "Assuming MISSING=TABLE."));
253 cmd.miss = CRS_TABLE;
257 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
258 cmd.a_write[CRS_WR_ALL] = 0;
259 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
261 msg (SE, _("Write mode ALL not allowed in general mode. "
262 "Assuming WRITE=CELLS."));
263 cmd.a_write[CRS_WR_CELLS] = 1;
266 && (cmd.a_write[CRS_WR_NONE]
267 + cmd.a_write[CRS_WR_ALL]
268 + cmd.a_write[CRS_WR_CELLS] == 0))
269 cmd.a_write[CRS_WR_CELLS] = 1;
270 if (cmd.a_write[CRS_WR_CELLS])
271 write = CRS_WR_CELLS;
272 else if (cmd.a_write[CRS_WR_ALL])
277 procedure_with_splits (precalc,
278 mode == GENERAL ? calc_general : calc_integer,
284 /* Parses the TABLES subcommand. */
286 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
288 struct var_set *var_set;
290 struct variable ***by = NULL;
295 /* Ensure that this is a TABLES subcommand. */
296 if (!lex_match_id ("TABLES")
297 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
302 if (variables != NULL)
303 var_set = var_set_create_from_array (variables, variables_cnt);
305 var_set = var_set_create_from_dict (default_dict);
306 assert (var_set != NULL);
310 by = xrealloc (by, sizeof *by * (n_by + 1));
311 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
312 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
313 PV_NO_DUPLICATE | PV_NO_SCRATCH))
318 if (!lex_match (T_BY))
322 lex_error (_("expecting BY"));
331 int *by_iter = xcalloc (sizeof *by_iter * n_by);
334 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
335 for (i = 0; i < nx; i++)
339 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
346 for (i = 0; i < n_by; i++)
347 x->vars[i] = by[i][by_iter[i]];
353 for (i = n_by - 1; i >= 0; i--)
355 if (++by_iter[i] < by_nvar[i])
368 /* All return paths lead here. */
372 for (i = 0; i < n_by; i++)
378 var_set_destroy (var_set);
383 /* Parses the VARIABLES subcommand. */
385 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
389 msg (SE, _("VARIABLES must be specified before TABLES."));
397 int orig_nv = variables_cnt;
402 if (!parse_variables (default_dict, &variables, &variables_cnt,
403 (PV_APPEND | PV_NUMERIC
404 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
409 lex_error ("expecting `('");
414 if (!lex_force_int ())
416 min = lex_integer ();
421 if (!lex_force_int ())
423 max = lex_integer ();
426 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
434 lex_error ("expecting `)'");
439 for (i = orig_nv; i < variables_cnt; i++)
441 variables[i]->p.crs.min = min;
442 variables[i]->p.crs.max = max + 1.;
443 variables[i]->p.crs.count = max - min + 1;
462 printf ("CROSSTABS\n");
464 if (variables != NULL)
468 printf ("\t/VARIABLES=");
469 for (i = 0; i < variables_cnt; i++)
471 struct variable *v = variables[i];
473 printf ("%s ", v->name);
474 if (i < variables_cnt - 1)
476 struct variable *nv = variables[i + 1];
478 if (v->p.crs.min == nv->p.crs.min
479 && v->p.crs.max == nv->p.crs.max)
482 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
490 printf ("\t/TABLES=");
491 for (i = 0; i < nxtab; i++)
493 struct crosstab *x = xtab[i];
498 for (j = 0; j < x->nvar; j++)
502 printf ("%s", x->v[j]->name);
508 #endif /* DEBUGGING */
510 /* Data file processing. */
512 static int compare_table_entry (const void *, const void *, void *);
513 static unsigned hash_table_entry (const void *, void *);
515 /* Set up the crosstabulation tables for processing. */
517 precalc (void *aux UNUSED)
521 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
531 for (i = 0; i < nxtab; i++)
533 struct crosstab *x = xtab[i];
538 x->ofs = n_sorted_tab;
540 for (j = 2; j < x->nvar; j++)
541 count *= x->vars[j - 2]->p.crs.count;
543 sorted_tab = xrealloc (sorted_tab,
544 sizeof *sorted_tab * (n_sorted_tab + count));
545 v = local_alloc (sizeof *v * x->nvar);
546 for (j = 2; j < x->nvar; j++)
547 v[j] = x->vars[j]->p.crs.min;
548 for (j = 0; j < count; j++)
550 struct table_entry *te;
553 te = sorted_tab[n_sorted_tab++]
554 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
558 const int mat_size = (x->vars[0]->p.crs.count
559 * x->vars[1]->p.crs.count);
562 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
563 for (m = 0; m < mat_size; m++)
567 for (k = 2; k < x->nvar; k++)
568 te->values[k].f = v[k];
569 for (k = 2; k < x->nvar; k++)
570 if (++v[k] >= x->vars[k]->p.crs.max)
571 v[k] = x->vars[k]->p.crs.min;
578 sorted_tab = xrealloc (sorted_tab,
579 sizeof *sorted_tab * (n_sorted_tab + 1));
580 sorted_tab[n_sorted_tab] = NULL;
584 /* Form crosstabulations for general mode. */
586 calc_general (struct ccase *c, void *aux UNUSED)
589 double weight = dict_get_case_weight (default_dict, c);
591 /* Flattened current table index. */
594 for (t = 0; t < nxtab; t++)
596 struct crosstab *x = xtab[t];
597 const size_t entry_size = (sizeof (struct table_entry)
598 + sizeof (union value) * (x->nvar - 1));
599 struct table_entry *te = local_alloc (entry_size);
601 /* Construct table entry for the current record and table. */
607 for (j = 0; j < x->nvar; j++)
609 if ((cmd.miss == CRS_TABLE
610 && is_missing (&c->data[x->vars[j]->fv], x->vars[j]))
611 || (cmd.miss == CRS_INCLUDE
612 && is_system_missing (&c->data[x->vars[j]->fv],
615 x->missing += weight;
619 if (x->vars[j]->type == NUMERIC)
620 te->values[j].f = c->data[x->vars[j]->fv].f;
623 memcpy (te->values[j].s, c->data[x->vars[j]->fv].s,
626 /* Necessary in order to simplify comparisons. */
627 memset (&te->values[j].s[x->vars[j]->width], 0,
628 sizeof (union value) - x->vars[j]->width);
633 /* Add record to hash table. */
635 struct table_entry **tepp
636 = (struct table_entry **) hsh_probe (gen_tab, te);
639 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
642 memcpy (tep, te, entry_size);
647 (*tepp)->u.freq += weight;
658 calc_integer (struct ccase *c, void *aux UNUSED)
661 double weight = dict_get_case_weight (default_dict, c);
663 /* Flattened current table index. */
666 for (t = 0; t < nxtab; t++)
668 struct crosstab *x = xtab[t];
673 for (i = 0; i < x->nvar; i++)
675 struct variable *const v = x->vars[i];
676 double value = c->data[v->fv].f;
678 /* Note that the first test also rules out SYSMIS. */
679 if ((value < v->p.crs.min || value >= v->p.crs.max)
680 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
682 x->missing += weight;
688 ofs += fact * ((int) value - v->p.crs.min);
689 fact *= v->p.crs.count;
694 const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min;
695 const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min;
696 const int col_dim = x->vars[COL_VAR]->p.crs.count;
698 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
708 /* Print out all table entries in NULL-terminated TAB for use by a
709 debugger (a person, not a program). */
711 print_table_entries (struct table_entry **tab)
713 printf ("raw crosstabulation data:\n");
716 const struct crosstab *x = xtab[(*tab)->table];
719 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
720 for (i = 0; i < x->nvar; i++)
724 printf ("%s:", x->v[i]->name);
726 if (x->v[i]->type == NUMERIC)
727 printf ("%g", (*tab)->v[i].f);
729 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
737 /* Compare the table_entry's at A and B and return a strcmp()-type
740 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
742 const struct table_entry *a = a_;
743 const struct table_entry *b = b_;
745 if (a->table > b->table)
747 else if (a->table < b->table)
751 const struct crosstab *x = xtab[a->table];
754 for (i = x->nvar - 1; i >= 0; i--)
755 if (x->vars[i]->type == NUMERIC)
757 const double diffnum = a->values[i].f - b->values[i].f;
760 else if (diffnum > 0)
765 assert (x->vars[i]->type == ALPHA);
767 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
778 /* Calculate a hash value from table_entry A. */
780 hash_table_entry (const void *a_, void *foo UNUSED)
782 const struct table_entry *a = a_;
787 for (i = 0; i < xtab[a->table]->nvar; i++)
788 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
793 /* Post-data reading calculations. */
795 static struct table_entry **find_pivot_extent (struct table_entry **,
796 int *cnt, int pivot);
797 static void enum_var_values (struct table_entry **entries, int entry_cnt,
799 union value **values, int *value_cnt);
800 static void output_pivot_table (struct table_entry **, struct table_entry **,
801 double **, double **, double **,
802 int *, int *, int *);
803 static void make_summary_table (void);
806 postcalc (void *aux UNUSED)
810 n_sorted_tab = hsh_count (gen_tab);
811 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
813 print_table_entries (sorted_tab);
817 make_summary_table ();
819 /* Identify all the individual crosstabulation tables, and deal with
822 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
823 int pc = n_sorted_tab; /* Pivot count. */
825 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
826 int maxrows = 0, maxcols = 0, maxcells = 0;
830 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
834 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
835 &maxrows, &maxcols, &maxcells);
844 hsh_destroy (gen_tab);
847 static void insert_summary (struct tab_table *, int tab_index, double valid);
849 /* Output a table summarizing the cases processed. */
851 make_summary_table (void)
853 struct tab_table *summary;
855 struct table_entry **pb = sorted_tab, **pe;
856 int pc = n_sorted_tab;
859 summary = tab_create (7, 3 + nxtab, 1);
860 tab_title (summary, 0, _("Summary."));
861 tab_headers (summary, 1, 0, 3, 0);
862 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
863 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
864 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
865 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
866 tab_hline (summary, TAL_1, 1, 6, 1);
867 tab_hline (summary, TAL_1, 1, 6, 2);
868 tab_vline (summary, TAL_1, 3, 1, 1);
869 tab_vline (summary, TAL_1, 5, 1, 1);
873 for (i = 0; i < 3; i++)
875 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
876 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
879 tab_offset (summary, 0, 3);
885 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
889 while (cur_tab < (*pb)->table)
890 insert_summary (summary, cur_tab++, 0.);
893 for (valid = 0.; pb < pe; pb++)
894 valid += (*pb)->u.freq;
897 const struct crosstab *const x = xtab[(*pb)->table];
898 const int n_cols = x->vars[COL_VAR]->p.crs.count;
899 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
900 const int count = n_cols * n_rows;
902 for (valid = 0.; pb < pe; pb++)
904 const double *data = (*pb)->u.data;
907 for (i = 0; i < count; i++)
911 insert_summary (summary, cur_tab++, valid);
916 while (cur_tab < nxtab)
917 insert_summary (summary, cur_tab++, 0.);
922 /* Inserts a line into T describing the crosstabulation at index
923 TAB_INDEX, which has VALID valid observations. */
925 insert_summary (struct tab_table *t, int tab_index, double valid)
927 struct crosstab *x = xtab[tab_index];
929 tab_hline (t, TAL_1, 0, 6, 0);
931 /* Crosstabulation name. */
933 char *buf = local_alloc (128 * x->nvar);
937 for (i = 0; i < x->nvar; i++)
940 cp = stpcpy (cp, " * ");
943 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
945 tab_text (t, 0, 0, TAB_LEFT, buf);
950 /* Counts and percentages. */
960 for (i = 0; i < 3; i++)
962 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
963 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
974 static struct tab_table *table; /* Crosstabulation table. */
975 static struct tab_table *chisq; /* Chi-square table. */
976 static struct tab_table *sym; /* Symmetric measures table. */
977 static struct tab_table *risk; /* Risk estimate table. */
978 static struct tab_table *direct; /* Directional measures table. */
981 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
983 /* Column values, number of columns. */
984 static union value *cols;
987 /* Row values, number of rows. */
988 static union value *rows;
991 /* Number of statistically interesting columns/rows (columns/rows with
993 static int ns_cols, ns_rows;
995 /* Crosstabulation. */
996 static struct crosstab *x;
998 /* Number of variables from the crosstabulation to consider. This is
999 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1002 /* Matrix contents. */
1003 static double *mat; /* Matrix proper. */
1004 static double *row_tot; /* Row totals. */
1005 static double *col_tot; /* Column totals. */
1006 static double W; /* Grand total. */
1008 static void display_dimensions (struct tab_table *, int first_difference,
1009 struct table_entry *);
1010 static void display_crosstabulation (void);
1011 static void display_chisq (void);
1012 static void display_symmetric (void);
1013 static void display_risk (void);
1014 static void display_directional (void);
1015 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1016 static void table_value_missing (struct tab_table *table, int c, int r,
1017 unsigned char opt, const union value *v,
1018 const struct variable *var);
1019 static void delete_missing (void);
1021 /* Output pivot table beginning at PB and continuing until PE,
1022 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1023 hold *MAXROWS entries. */
1025 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1026 double **matp, double **row_totp, double **col_totp,
1027 int *maxrows, int *maxcols, int *maxcells)
1030 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1031 int tc = pe - pb; /* Table count. */
1033 /* Table entry for header comparison. */
1034 struct table_entry *cmp = NULL;
1036 x = xtab[(*pb)->table];
1037 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1039 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1041 /* Crosstabulation table initialization. */
1044 table = tab_create (nvar + n_cols,
1045 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1046 tab_headers (table, nvar - 1, 0, 2, 0);
1048 /* First header line. */
1049 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1050 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1052 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1054 /* Second header line. */
1058 for (i = 2; i < nvar; i++)
1059 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1060 TAB_RIGHT | TAT_TITLE,
1062 ? x->vars[i]->label : x->vars[i]->name));
1063 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1064 x->vars[ROW_VAR]->name);
1065 for (i = 0; i < n_cols; i++)
1066 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1068 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1071 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1072 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1076 char *title = local_alloc (x->nvar * 64 + 128);
1080 if (cmd.pivot == CRS_PIVOT)
1081 for (i = 0; i < nvar; i++)
1084 cp = stpcpy (cp, " by ");
1085 cp = stpcpy (cp, x->vars[i]->name);
1089 cp = spprintf (cp, "%s by %s for",
1090 x->vars[0]->name, x->vars[1]->name);
1091 for (i = 2; i < nvar; i++)
1093 char buf[64], *bufp;
1098 cp = stpcpy (cp, x->vars[i]->name);
1100 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1101 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1103 cp = stpcpy (cp, bufp);
1107 cp = stpcpy (cp, " [");
1108 for (i = 0; i < num_cells; i++)
1116 static const struct tuple cell_names[] =
1118 {CRS_CL_COUNT, N_("count")},
1119 {CRS_CL_ROW, N_("row %")},
1120 {CRS_CL_COLUMN, N_("column %")},
1121 {CRS_CL_TOTAL, N_("total %")},
1122 {CRS_CL_EXPECTED, N_("expected")},
1123 {CRS_CL_RESIDUAL, N_("residual")},
1124 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1125 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1129 const struct tuple *t;
1131 for (t = cell_names; t->value != cells[i]; t++)
1132 assert (t->value != -1);
1134 cp = stpcpy (cp, ", ");
1135 cp = stpcpy (cp, gettext (t->name));
1139 tab_title (table, 0, title);
1143 tab_offset (table, 0, 2);
1148 /* Chi-square table initialization. */
1149 if (cmd.a_statistics[CRS_ST_CHISQ])
1151 chisq = tab_create (6 + (nvar - 2),
1152 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1153 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1155 tab_title (chisq, 0, "Chi-square tests.");
1157 tab_offset (chisq, nvar - 2, 0);
1158 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1159 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1160 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1161 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1162 _("Asymp. Sig. (2-sided)"));
1163 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1164 _("Exact. Sig. (2-sided)"));
1165 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1166 _("Exact. Sig. (1-sided)"));
1168 tab_offset (chisq, 0, 1);
1173 /* Symmetric measures. */
1174 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1175 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1176 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1177 || cmd.a_statistics[CRS_ST_KAPPA])
1179 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1180 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1181 tab_title (sym, 0, "Symmetric measures.");
1183 tab_offset (sym, nvar - 2, 0);
1184 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1185 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1186 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1187 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1188 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1189 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1190 tab_offset (sym, 0, 1);
1195 /* Risk estimate. */
1196 if (cmd.a_statistics[CRS_ST_RISK])
1198 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1199 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1200 tab_title (risk, 0, "Risk estimate.");
1202 tab_offset (risk, nvar - 2, 0);
1203 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1204 _(" 95%% Confidence Interval"));
1205 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1206 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1207 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1208 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1209 tab_hline (risk, TAL_1, 2, 3, 1);
1210 tab_vline (risk, TAL_1, 2, 0, 1);
1211 tab_offset (risk, 0, 2);
1216 /* Directional measures. */
1217 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1218 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1220 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1221 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1222 tab_title (direct, 0, "Directional measures.");
1224 tab_offset (direct, nvar - 2, 0);
1225 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1226 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1227 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1228 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1229 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1230 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1231 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1232 tab_offset (direct, 0, 1);
1239 /* Find pivot subtable if applicable. */
1240 te = find_pivot_extent (tb, &tc, 0);
1244 /* Find all the row variable values. */
1245 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1247 /* Allocate memory space for the column and row totals. */
1248 if (n_rows > *maxrows)
1250 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1251 row_tot = *row_totp;
1254 if (n_cols > *maxcols)
1256 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1257 col_tot = *col_totp;
1261 /* Allocate table space for the matrix. */
1262 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1263 tab_realloc (table, -1,
1264 max (tab_nr (table) + (n_rows + 1) * num_cells,
1265 tab_nr (table) * (pe - pb) / (te - tb)));
1267 if (mode == GENERAL)
1269 /* Allocate memory space for the matrix. */
1270 if (n_cols * n_rows > *maxcells)
1272 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1273 *maxcells = n_cols * n_rows;
1278 /* Build the matrix and calculate column totals. */
1280 union value *cur_col = cols;
1281 union value *cur_row = rows;
1283 double *cp = col_tot;
1284 struct table_entry **p;
1287 for (p = &tb[0]; p < te; p++)
1289 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1293 for (; cur_row < &rows[n_rows]; cur_row++)
1299 mp = &mat[cur_col - cols];
1302 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1309 *cp += *mp = (*p)->u.freq;
1314 /* Zero out the rest of the matrix. */
1315 for (; cur_row < &rows[n_rows]; cur_row++)
1321 if (cur_col < &cols[n_cols])
1323 const int rem_cols = n_cols - (cur_col - cols);
1326 for (c = 0; c < rem_cols; c++)
1328 mp = &mat[cur_col - cols];
1329 for (r = 0; r < n_rows; r++)
1331 for (c = 0; c < rem_cols; c++)
1333 mp += n_cols - rem_cols;
1341 double *tp = col_tot;
1343 assert (mode == INTEGER);
1344 mat = (*tb)->u.data;
1347 /* Calculate column totals. */
1348 for (c = 0; c < n_cols; c++)
1351 double *cp = &mat[c];
1353 for (r = 0; r < n_rows; r++)
1354 cum += cp[r * n_cols];
1362 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1363 ns_cols += *cp != 0.;
1366 /* Calculate row totals. */
1369 double *rp = row_tot;
1372 for (ns_rows = 0, r = n_rows; r--; )
1375 for (c = n_cols; c--; )
1383 /* Calculate grand total. */
1389 if (n_rows < n_cols)
1390 tp = row_tot, n = n_rows;
1392 tp = col_tot, n = n_cols;
1399 /* Print the matrix. */
1403 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1404 for (i = 2; i < nvar; i++)
1405 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1408 for (c = 0; c < n_cols; c++)
1409 printf ("%4g", cols[c].f);
1411 for (r = 0; r < n_rows; r++)
1413 printf ("%4g:", rows[r].f);
1414 for (c = 0; c < n_cols; c++)
1415 printf ("%4g", mat[c + r * n_cols]);
1416 printf ("%4g", row_tot[r]);
1420 for (c = 0; c < n_cols; c++)
1421 printf ("%4g", col_tot[c]);
1427 /* Find the first variable that differs from the last subtable,
1428 then display the values of the dimensioning variables for
1429 each table that needs it. */
1431 int first_difference = nvar - 1;
1434 for (; ; first_difference--)
1436 assert (first_difference >= 2);
1437 if (memcmp (&cmp->values[first_difference],
1438 &(*tb)->values[first_difference],
1439 sizeof *cmp->values))
1445 display_dimensions (table, first_difference, *tb);
1447 display_dimensions (chisq, first_difference, *tb);
1449 display_dimensions (sym, first_difference, *tb);
1451 display_dimensions (risk, first_difference, *tb);
1453 display_dimensions (direct, first_difference, *tb);
1457 display_crosstabulation ();
1458 if (cmd.miss == CRS_REPORT)
1463 display_symmetric ();
1467 display_directional ();
1478 tab_resize (chisq, 4 + (nvar - 2), -1);
1489 /* Delete missing rows and columns for statistical analysis when
1492 delete_missing (void)
1497 for (r = 0; r < n_rows; r++)
1498 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1502 for (c = 0; c < n_cols; c++)
1503 mat[c + r * n_cols] = 0.;
1511 for (c = 0; c < n_cols; c++)
1512 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1516 for (r = 0; r < n_rows; r++)
1517 mat[c + r * n_cols] = 0.;
1523 /* Prepare table T for submission, and submit it. */
1525 submit (struct tab_table *t)
1532 tab_resize (t, -1, 0);
1533 if (tab_nr (t) == tab_t (t))
1538 tab_offset (t, 0, 0);
1540 for (i = 2; i < nvar; i++)
1541 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1542 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1543 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1544 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1546 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1548 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1549 tab_dim (t, crosstabs_dim);
1553 /* Sets the widths of all the columns and heights of all the rows in
1554 table T for driver D. */
1556 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1560 /* Width of a numerical column. */
1561 int c = outp_string_width (d, "0.000000");
1562 if (cmd.miss == CRS_REPORT)
1563 c += outp_string_width (d, "M");
1565 /* Set width for header columns. */
1568 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1570 if (w < d->prop_em_width * 8)
1571 w = d->prop_em_width * 8;
1573 if (w > d->prop_em_width * 15)
1574 w = d->prop_em_width * 15;
1576 for (i = 0; i < t->l; i++)
1580 for (i = t->l; i < t->nc; i++)
1583 for (i = 0; i < t->nr; i++)
1584 t->h[i] = tab_natural_height (t, d, i);
1587 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1588 int *cnt, int pivot);
1589 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1590 int *cnt, int pivot);
1592 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1594 static struct table_entry **
1595 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1597 return (mode == GENERAL
1598 ? find_pivot_extent_general (tp, cnt, pivot)
1599 : find_pivot_extent_integer (tp, cnt, pivot));
1602 /* Find the extent of a region in TP that contains one table. If
1603 PIVOT != 0 that means a set of table entries with identical table
1604 number; otherwise they also have to have the same values for every
1605 dimension after the row and column dimensions. The table that is
1606 searched starts at TP and has length CNT. Returns the first entry
1607 after the last one in the table; sets *CNT to the number of
1608 remaining values. If there are no entries in TP at all, returns
1609 NULL. A yucky interface, admittedly, but it works. */
1610 static struct table_entry **
1611 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1613 struct table_entry *fp = *tp;
1618 x = xtab[(*tp)->table];
1626 if ((*tp)->table != fp->table)
1631 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1638 /* Integer mode correspondent to find_pivot_extent_general(). This
1639 could be optimized somewhat, but I just don't give a crap about
1640 CROSSTABS performance in integer mode, which is just a
1641 CROSSTABS wart as far as I'm concerned.
1643 That said, feel free to send optimization patches to me. */
1644 static struct table_entry **
1645 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1647 struct table_entry *fp = *tp;
1652 x = xtab[(*tp)->table];
1660 if ((*tp)->table != fp->table)
1665 if (memcmp (&(*tp)->values[2], &fp->values[2],
1666 sizeof (union value) * (x->nvar - 2)))
1673 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1674 result. WIDTH_ points to an int which is either 0 for a
1675 numeric value or a string width for a string value. */
1677 compare_value (const void *a_, const void *b_, void *width_)
1679 const union value *a = a_;
1680 const union value *b = b_;
1681 const int *pwidth = width_;
1682 const int width = *pwidth;
1685 return (a->f < b->f) ? -1 : (a->f > b->f);
1687 return strncmp (a->s, b->s, width);
1690 /* Given an array of ENTRY_CNT table_entry structures starting at
1691 ENTRIES, creates a sorted list of the values that the variable
1692 with index VAR_INDEX takes on. The values are returned as a
1693 malloc()'darray stored in *VALUES, with the number of values
1694 stored in *VALUE_CNT.
1697 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1698 union value **values, int *value_cnt)
1700 if (mode == GENERAL)
1702 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1705 *values = xmalloc (sizeof **values * entry_cnt);
1706 for (i = 0; i < entry_cnt; i++)
1707 (*values)[i] = entries[i]->values[var_idx];
1708 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1709 compare_value, &width);
1713 struct crosstab_proc *crs
1714 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1717 assert (mode == INTEGER);
1718 *values = xmalloc (sizeof **values * crs->count);
1719 for (i = 0; i < crs->count; i++)
1720 (*values)[i].f = i + crs->min;
1721 *value_cnt = crs->count;
1725 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1726 from V, displayed with print format spec from variable VAR. When
1727 in REPORT missing-value mode, missing values have an M appended. */
1729 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1730 const union value *v, const struct variable *var)
1732 struct len_string s;
1734 const char *label = val_labs_find (var->val_labs, *v);
1737 tab_text (table, c, r, TAB_LEFT, label);
1741 s.string = tab_alloc (table, var->print.w);
1742 format_short (s.string, &var->print, v);
1743 s.length = strlen (s.string);
1744 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1745 s.string[s.length++] = 'M';
1746 while (s.length && *s.string == ' ')
1751 tab_raw (table, c, r, opt, &s);
1754 /* Draws a line across TABLE at the current row to indicate the most
1755 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1756 that changed, and puts the values that changed into the table. TB
1757 and X must be the corresponding table_entry and crosstab,
1760 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1762 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1764 for (; first_difference >= 2; first_difference--)
1765 table_value_missing (table, nvar - first_difference - 1, 0,
1766 TAB_RIGHT, &tb->values[first_difference],
1767 x->vars[first_difference]);
1770 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1772 float_M_suffix (struct tab_table *table, int c, int r, double v)
1774 static const struct fmt_spec f = {FMT_F, 8, 0};
1775 struct len_string s;
1778 s.string = tab_alloc (table, 9);
1780 format_short (s.string, &f, (union value *) &v);
1781 while (*s.string == ' ')
1786 tab_raw (table, c, r, TAB_RIGHT, &s);
1789 /* Displays the crosstabulation table. */
1791 display_crosstabulation (void)
1796 for (r = 0; r < n_rows; r++)
1797 table_value_missing (table, nvar - 2, r * num_cells,
1798 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1800 tab_text (table, nvar - 2, n_rows * num_cells,
1801 TAB_LEFT, _("Total"));
1803 /* Put in the actual cells. */
1808 tab_offset (table, nvar - 1, -1);
1809 for (r = 0; r < n_rows; r++)
1812 tab_hline (table, TAL_1, -1, n_cols, 0);
1813 for (c = 0; c < n_cols; c++)
1815 double expected_value = row_tot[r] * col_tot[c] / W;
1816 for (i = 0; i < num_cells; i++)
1826 v = *mp / row_tot[r] * 100.;
1829 v = *mp / col_tot[c] * 100.;
1834 case CRS_CL_EXPECTED:
1837 case CRS_CL_RESIDUAL:
1838 v = *mp - expected_value;
1840 case CRS_CL_SRESIDUAL:
1841 v = (*mp - expected_value) / sqrt (expected_value);
1843 case CRS_CL_ASRESIDUAL:
1844 v = ((*mp - expected_value)
1845 / sqrt (expected_value
1846 * (1. - row_tot[r] / W)
1847 * (1. - col_tot[c] / W)));
1853 if (cmd.miss == CRS_REPORT
1854 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1855 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1856 float_M_suffix (table, c, i, v);
1858 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1864 tab_offset (table, -1, tab_row (table) + num_cells);
1872 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1873 for (r = 0; r < n_rows; r++)
1874 for (i = 0; i < num_cells; i++)
1887 v = row_tot[r] / W * 100.;
1890 v = row_tot[r] / W * 100.;
1892 case CRS_CL_EXPECTED:
1893 case CRS_CL_RESIDUAL:
1894 case CRS_CL_SRESIDUAL:
1895 case CRS_CL_ASRESIDUAL:
1902 if (cmd.miss == CRS_REPORT
1903 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1904 float_M_suffix (table, n_cols, 0, v);
1906 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1908 tab_next_row (table);
1912 /* Column totals, grand total. */
1918 tab_hline (table, TAL_1, -1, n_cols, 0);
1919 for (c = 0; c <= n_cols; c++)
1921 double ct = c < n_cols ? col_tot[c] : W;
1924 for (i = j = 0; i < num_cells; i++)
1942 case CRS_CL_EXPECTED:
1943 case CRS_CL_RESIDUAL:
1944 case CRS_CL_SRESIDUAL:
1945 case CRS_CL_ASRESIDUAL:
1951 if (cmd.miss == CRS_REPORT && c < n_cols
1952 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1953 float_M_suffix (table, c, j, v);
1955 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1962 tab_offset (table, -1, tab_row (table) + last_row);
1965 tab_offset (table, 0, -1);
1968 static void calc_r (double *X, double *Y, double *, double *, double *);
1969 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1971 /* Display chi-square statistics. */
1973 display_chisq (void)
1975 static const char *chisq_stats[N_CHISQ] =
1977 N_("Pearson Chi-Square"),
1978 N_("Likelihood Ratio"),
1979 N_("Fisher's Exact Test"),
1980 N_("Continuity Correction"),
1981 N_("Linear-by-Linear Association"),
1983 double chisq_v[N_CHISQ];
1984 double fisher1, fisher2;
1990 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1992 tab_offset (chisq, nvar - 2, -1);
1994 for (i = 0; i < N_CHISQ; i++)
1996 if ((i != 2 && chisq_v[i] == SYSMIS)
1997 || (i == 2 && fisher1 == SYSMIS))
2001 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2004 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2005 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2006 tab_float (chisq, 3, 0, TAB_RIGHT,
2007 chisq_sig (chisq_v[i], df[i]), 8, 3);
2012 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2013 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2015 tab_next_row (chisq);
2018 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2019 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2020 tab_next_row (chisq);
2022 tab_offset (chisq, 0, -1);
2025 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2026 double[N_SYMMETRIC]);
2028 /* Display symmetric measures. */
2030 display_symmetric (void)
2032 static const char *categories[] =
2034 N_("Nominal by Nominal"),
2035 N_("Ordinal by Ordinal"),
2036 N_("Interval by Interval"),
2037 N_("Measure of Agreement"),
2040 static const char *stats[N_SYMMETRIC] =
2044 N_("Contingency Coefficient"),
2045 N_("Kendall's tau-b"),
2046 N_("Kendall's tau-c"),
2048 N_("Spearman Correlation"),
2053 static const int stats_categories[N_SYMMETRIC] =
2055 0, 0, 0, 1, 1, 1, 1, 2, 3,
2059 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2062 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2065 tab_offset (sym, nvar - 2, -1);
2067 for (i = 0; i < N_SYMMETRIC; i++)
2069 if (sym_v[i] == SYSMIS)
2072 if (stats_categories[i] != last_cat)
2074 last_cat = stats_categories[i];
2075 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2078 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2079 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2080 if (sym_ase[i] != SYSMIS)
2081 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2082 if (sym_t[i] != SYSMIS)
2083 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2084 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2088 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2089 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2092 tab_offset (sym, 0, -1);
2095 static int calc_risk (double[], double[], double[], union value *);
2097 /* Display risk estimate. */
2102 double risk_v[3], lower[3], upper[3];
2106 if (!calc_risk (risk_v, upper, lower, c))
2109 tab_offset (risk, nvar - 2, -1);
2111 for (i = 0; i < 3; i++)
2113 if (risk_v[i] == SYSMIS)
2119 if (x->vars[COL_VAR]->type == NUMERIC)
2120 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2121 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2123 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2124 x->vars[COL_VAR]->name,
2125 x->vars[COL_VAR]->width, c[0].s,
2126 x->vars[COL_VAR]->width, c[1].s);
2130 if (x->vars[ROW_VAR]->type == NUMERIC)
2131 sprintf (buf, _("For cohort %s = %g"),
2132 x->vars[ROW_VAR]->name, rows[i - 1].f);
2134 sprintf (buf, _("For cohort %s = %.*s"),
2135 x->vars[ROW_VAR]->name,
2136 x->vars[ROW_VAR]->width, rows[i - 1].s);
2140 tab_text (risk, 0, 0, TAB_LEFT, buf);
2141 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2142 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2143 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2144 tab_next_row (risk);
2147 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2148 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2149 tab_next_row (risk);
2151 tab_offset (risk, 0, -1);
2154 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2155 double[N_DIRECTIONAL]);
2157 /* Display directional measures. */
2159 display_directional (void)
2161 static const char *categories[] =
2163 N_("Nominal by Nominal"),
2164 N_("Ordinal by Ordinal"),
2165 N_("Nominal by Interval"),
2168 static const char *stats[] =
2171 N_("Goodman and Kruskal tau"),
2172 N_("Uncertainty Coefficient"),
2177 static const char *types[] =
2184 static const int stats_categories[N_DIRECTIONAL] =
2186 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2189 static const int stats_stats[N_DIRECTIONAL] =
2191 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2194 static const int stats_types[N_DIRECTIONAL] =
2196 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2199 static const int *stats_lookup[] =
2206 static const char **stats_names[] =
2218 double direct_v[N_DIRECTIONAL];
2219 double direct_ase[N_DIRECTIONAL];
2220 double direct_t[N_DIRECTIONAL];
2224 if (!calc_directional (direct_v, direct_ase, direct_t))
2227 tab_offset (direct, nvar - 2, -1);
2229 for (i = 0; i < N_DIRECTIONAL; i++)
2231 if (direct_v[i] == SYSMIS)
2237 for (j = 0; j < 3; j++)
2238 if (last[j] != stats_lookup[j][i])
2241 tab_hline (direct, TAL_1, j, 6, 0);
2246 int k = last[j] = stats_lookup[j][i];
2251 string = x->vars[0]->name;
2253 string = x->vars[1]->name;
2255 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2256 gettext (stats_names[j][k]), string);
2261 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2262 if (direct_ase[i] != SYSMIS)
2263 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2264 if (direct_t[i] != SYSMIS)
2265 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2266 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2267 tab_next_row (direct);
2270 tab_offset (direct, 0, -1);
2273 /* Statistical calculations. */
2275 /* Returns the value of the gamma (factorial) function for an integer
2278 gamma_int (double x)
2283 for (i = 2; i < x; i++)
2288 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2290 static inline double
2291 Pr (int a, int b, int c, int d)
2293 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2294 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2295 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2296 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2297 / gamma_int (a + b + c + d + 1.));
2300 /* Swap the contents of A and B. */
2302 swap (int *a, int *b)
2309 /* Calculate significance for Fisher's exact test as specified in
2310 _SPSS Statistical Algorithms_, Appendix 5. */
2312 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2316 if (min (c, d) < min (a, b))
2317 swap (&a, &c), swap (&b, &d);
2318 if (min (b, d) < min (a, c))
2319 swap (&a, &b), swap (&c, &d);
2323 swap (&a, &b), swap (&c, &d);
2325 swap (&a, &c), swap (&b, &d);
2329 for (x = 0; x <= a; x++)
2330 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2332 *fisher2 = *fisher1;
2333 for (x = 1; x <= b; x++)
2334 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2337 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2338 columns with values COLS and N_ROWS rows with values ROWS. Values
2339 in the matrix sum to W. */
2341 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2342 double *fisher1, double *fisher2)
2346 chisq[0] = chisq[1] = 0.;
2347 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2348 *fisher1 = *fisher2 = SYSMIS;
2350 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2352 if (ns_rows <= 1 || ns_cols <= 1)
2354 chisq[0] = chisq[1] = SYSMIS;
2358 for (r = 0; r < n_rows; r++)
2359 for (c = 0; c < n_cols; c++)
2361 const double expected = row_tot[r] * col_tot[c] / W;
2362 const double freq = mat[n_cols * r + c];
2363 const double residual = freq - expected;
2365 chisq[0] += residual * residual / expected;
2367 chisq[1] += freq * log (expected / freq);
2378 /* Calculate Yates and Fisher exact test. */
2379 if (ns_cols == 2 && ns_rows == 2)
2381 double f11, f12, f21, f22;
2387 for (i = j = 0; i < n_cols; i++)
2388 if (col_tot[i] != 0.)
2397 f11 = mat[nz_cols[0]];
2398 f12 = mat[nz_cols[1]];
2399 f21 = mat[nz_cols[0] + n_cols];
2400 f22 = mat[nz_cols[1] + n_cols];
2405 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2408 chisq[3] = (W * x * x
2409 / (f11 + f12) / (f21 + f22)
2410 / (f11 + f21) / (f12 + f22));
2418 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2419 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2422 /* Calculate Mantel-Haenszel. */
2423 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2425 double r, ase_0, ase_1;
2426 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2428 chisq[4] = (W - 1.) * r * r;
2433 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2434 ASE_1, and ase_0 into ASE_0. The row and column values must be
2435 passed in X and Y. */
2437 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2439 double SX, SY, S, T;
2441 double sum_XYf, sum_X2Y2f;
2442 double sum_Xr, sum_X2r;
2443 double sum_Yc, sum_Y2c;
2446 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2447 for (j = 0; j < n_cols; j++)
2449 double fij = mat[j + i * n_cols];
2450 double product = X[i] * Y[j];
2451 double temp = fij * product;
2453 sum_X2Y2f += temp * product;
2456 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2458 sum_Xr += X[i] * row_tot[i];
2459 sum_X2r += X[i] * X[i] * row_tot[i];
2463 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2465 sum_Yc += Y[i] * col_tot[i];
2466 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2470 S = sum_XYf - sum_Xr * sum_Yc / W;
2471 SX = sum_X2r - sum_Xr * sum_Xr / W;
2472 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2475 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2480 for (s = c = 0., i = 0; i < n_rows; i++)
2481 for (j = 0; j < n_cols; j++)
2483 double Xresid, Yresid;
2486 Xresid = X[i] - Xbar;
2487 Yresid = Y[j] - Ybar;
2488 temp = (T * Xresid * Yresid
2490 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2491 y = mat[j + i * n_cols] * temp * temp - c;
2496 *ase_1 = sqrt (s) / (T * T);
2500 static double somers_d_v[3];
2501 static double somers_d_ase[3];
2502 static double somers_d_t[3];
2504 /* Calculate symmetric statistics and their asymptotic standard
2505 errors. Returns 0 if none could be calculated. */
2507 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2508 double t[N_SYMMETRIC])
2510 int q = min (ns_rows, ns_cols);
2519 for (i = 0; i < N_SYMMETRIC; i++)
2520 v[i] = ase[i] = t[i] = SYSMIS;
2523 /* Phi, Cramer's V, contingency coefficient. */
2524 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2526 double Xp = 0.; /* Pearson chi-square. */
2531 for (r = 0; r < n_rows; r++)
2532 for (c = 0; c < n_cols; c++)
2534 const double expected = row_tot[r] * col_tot[c] / W;
2535 const double freq = mat[n_cols * r + c];
2536 const double residual = freq - expected;
2538 Xp += residual * residual / expected;
2542 if (cmd.a_statistics[CRS_ST_PHI])
2544 v[0] = sqrt (Xp / W);
2545 v[1] = sqrt (Xp / (W * (q - 1)));
2547 if (cmd.a_statistics[CRS_ST_CC])
2548 v[2] = sqrt (Xp / (Xp + W));
2551 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2552 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2557 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2564 for (r = 0; r < n_rows; r++)
2565 Dr -= row_tot[r] * row_tot[r];
2566 for (c = 0; c < n_cols; c++)
2567 Dc -= col_tot[c] * col_tot[c];
2573 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2574 for (c = 0; c < n_cols; c++)
2578 for (r = 0; r < n_rows; r++)
2579 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2589 for (i = 0; i < n_rows; i++)
2593 for (j = 1; j < n_cols; j++)
2594 Cij += col_tot[j] - cum[j + i * n_cols];
2597 for (j = 1; j < n_cols; j++)
2598 Dij += cum[j + (i - 1) * n_cols];
2602 double fij = mat[j + i * n_cols];
2608 assert (j < n_cols);
2610 Cij -= col_tot[j] - cum[j + i * n_cols];
2611 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2615 Cij += cum[j - 1 + (i - 1) * n_cols];
2616 Dij -= cum[j + (i - 1) * n_cols];
2622 if (cmd.a_statistics[CRS_ST_BTAU])
2623 v[3] = (P - Q) / sqrt (Dr * Dc);
2624 if (cmd.a_statistics[CRS_ST_CTAU])
2625 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2626 if (cmd.a_statistics[CRS_ST_GAMMA])
2627 v[5] = (P - Q) / (P + Q);
2629 /* ASE for tau-b, tau-c, gamma. Calculations could be
2630 eliminated here, at expense of memory. */
2635 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2636 for (i = 0; i < n_rows; i++)
2640 for (j = 1; j < n_cols; j++)
2641 Cij += col_tot[j] - cum[j + i * n_cols];
2644 for (j = 1; j < n_cols; j++)
2645 Dij += cum[j + (i - 1) * n_cols];
2649 double fij = mat[j + i * n_cols];
2651 if (cmd.a_statistics[CRS_ST_BTAU])
2653 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2654 + v[3] * (row_tot[i] * Dc
2655 + col_tot[j] * Dr));
2656 btau_cum += fij * temp * temp;
2660 const double temp = Cij - Dij;
2661 ctau_cum += fij * temp * temp;
2664 if (cmd.a_statistics[CRS_ST_GAMMA])
2666 const double temp = Q * Cij - P * Dij;
2667 gamma_cum += fij * temp * temp;
2670 if (cmd.a_statistics[CRS_ST_D])
2672 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2673 - (P - Q) * (W - row_tot[i]));
2674 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2675 - (Q - P) * (W - col_tot[j]));
2680 assert (j < n_cols);
2682 Cij -= col_tot[j] - cum[j + i * n_cols];
2683 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2687 Cij += cum[j - 1 + (i - 1) * n_cols];
2688 Dij -= cum[j + (i - 1) * n_cols];
2694 btau_var = ((btau_cum
2695 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2697 if (cmd.a_statistics[CRS_ST_BTAU])
2699 ase[3] = sqrt (btau_var);
2700 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2703 if (cmd.a_statistics[CRS_ST_CTAU])
2705 ase[4] = ((2 * q / ((q - 1) * W * W))
2706 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2707 t[4] = v[4] / ase[4];
2709 if (cmd.a_statistics[CRS_ST_GAMMA])
2711 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2712 t[5] = v[5] / (2. / (P + Q)
2713 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2715 if (cmd.a_statistics[CRS_ST_D])
2717 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2718 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2719 somers_d_t[0] = (somers_d_v[0]
2721 * sqrt (ctau_cum - sqr (P - Q) / W)));
2722 somers_d_v[1] = (P - Q) / Dc;
2723 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2724 somers_d_t[1] = (somers_d_v[1]
2726 * sqrt (ctau_cum - sqr (P - Q) / W)));
2727 somers_d_v[2] = (P - Q) / Dr;
2728 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2729 somers_d_t[2] = (somers_d_v[2]
2731 * sqrt (ctau_cum - sqr (P - Q) / W)));
2737 /* Spearman correlation, Pearson's r. */
2738 if (cmd.a_statistics[CRS_ST_CORR])
2740 double *R = local_alloc (sizeof *R * n_rows);
2741 double *C = local_alloc (sizeof *C * n_cols);
2744 double y, t, c = 0., s = 0.;
2749 R[i] = s + (row_tot[i] + 1.) / 2.;
2756 assert (i < n_rows);
2761 double y, t, c = 0., s = 0.;
2766 C[j] = s + (col_tot[j] + 1.) / 2;
2773 assert (j < n_cols);
2777 calc_r (R, C, &v[6], &t[6], &ase[6]);
2783 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2787 /* Cohen's kappa. */
2788 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2790 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2793 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2794 i < ns_rows; i++, j++)
2798 while (col_tot[j] == 0.)
2801 prod = row_tot[i] * col_tot[j];
2802 sum = row_tot[i] + col_tot[j];
2804 sum_fii += mat[j + i * n_cols];
2806 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2807 sum_riciri_ci += prod * sum;
2809 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2810 for (j = 0; j < ns_cols; j++)
2812 double sum = row_tot[i] + col_tot[j];
2813 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2816 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2818 ase[8] = sqrt ((W * W * sum_rici
2819 + sum_rici * sum_rici
2820 - W * sum_riciri_ci)
2821 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2823 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2824 / sqr (W * W - sum_rici))
2825 + ((2. * (W - sum_fii)
2826 * (2. * sum_fii * sum_rici
2827 - W * sum_fiiri_ci))
2828 / cube (W * W - sum_rici))
2829 + (sqr (W - sum_fii)
2830 * (W * sum_fijri_ci2 - 4.
2831 * sum_rici * sum_rici)
2832 / pow4 (W * W - sum_rici))));
2834 t[8] = v[8] / ase[8];
2841 /* Calculate risk estimate. */
2843 calc_risk (double *value, double *upper, double *lower, union value *c)
2845 double f11, f12, f21, f22;
2851 for (i = 0; i < 3; i++)
2852 value[i] = upper[i] = lower[i] = SYSMIS;
2855 if (ns_rows != 2 || ns_cols != 2)
2862 for (i = j = 0; i < n_cols; i++)
2863 if (col_tot[i] != 0.)
2872 f11 = mat[nz_cols[0]];
2873 f12 = mat[nz_cols[1]];
2874 f21 = mat[nz_cols[0] + n_cols];
2875 f22 = mat[nz_cols[1] + n_cols];
2877 c[0] = cols[nz_cols[0]];
2878 c[1] = cols[nz_cols[1]];
2881 value[0] = (f11 * f22) / (f12 * f21);
2882 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2883 lower[0] = value[0] * exp (-1.960 * v);
2884 upper[0] = value[0] * exp (1.960 * v);
2886 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2887 v = sqrt ((f12 / (f11 * (f11 + f12)))
2888 + (f22 / (f21 * (f21 + f22))));
2889 lower[1] = value[1] * exp (-1.960 * v);
2890 upper[1] = value[1] * exp (1.960 * v);
2892 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2893 v = sqrt ((f11 / (f12 * (f11 + f12)))
2894 + (f21 / (f22 * (f21 + f22))));
2895 lower[2] = value[2] * exp (-1.960 * v);
2896 upper[2] = value[2] * exp (1.960 * v);
2901 /* Calculate directional measures. */
2903 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2904 double t[N_DIRECTIONAL])
2909 for (i = 0; i < N_DIRECTIONAL; i++)
2910 v[i] = ase[i] = t[i] = SYSMIS;
2914 if (cmd.a_statistics[CRS_ST_LAMBDA])
2916 double *fim = xmalloc (sizeof *fim * n_rows);
2917 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2918 double *fmj = xmalloc (sizeof *fmj * n_cols);
2919 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2920 double sum_fim, sum_fmj;
2922 int rm_index, cm_index;
2925 /* Find maximum for each row and their sum. */
2926 for (sum_fim = 0., i = 0; i < n_rows; i++)
2928 double max = mat[i * n_cols];
2931 for (j = 1; j < n_cols; j++)
2932 if (mat[j + i * n_cols] > max)
2934 max = mat[j + i * n_cols];
2938 sum_fim += fim[i] = max;
2939 fim_index[i] = index;
2942 /* Find maximum for each column. */
2943 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2945 double max = mat[j];
2948 for (i = 1; i < n_rows; i++)
2949 if (mat[j + i * n_cols] > max)
2951 max = mat[j + i * n_cols];
2955 sum_fmj += fmj[j] = max;
2956 fmj_index[j] = index;
2959 /* Find maximum row total. */
2962 for (i = 1; i < n_rows; i++)
2963 if (row_tot[i] > rm)
2969 /* Find maximum column total. */
2972 for (j = 1; j < n_cols; j++)
2973 if (col_tot[j] > cm)
2979 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2980 v[1] = (sum_fmj - rm) / (W - rm);
2981 v[2] = (sum_fim - cm) / (W - cm);
2983 /* ASE1 for Y given X. */
2987 for (accum = 0., i = 0; i < n_rows; i++)
2988 for (j = 0; j < n_cols; j++)
2990 const int deltaj = j == cm_index;
2991 accum += (mat[j + i * n_cols]
2992 * sqr ((j == fim_index[i])
2997 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3000 /* ASE0 for Y given X. */
3004 for (accum = 0., i = 0; i < n_rows; i++)
3005 if (cm_index != fim_index[i])
3006 accum += (mat[i * n_cols + fim_index[i]]
3007 + mat[i * n_cols + cm_index]);
3008 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3011 /* ASE1 for X given Y. */
3015 for (accum = 0., i = 0; i < n_rows; i++)
3016 for (j = 0; j < n_cols; j++)
3018 const int deltaj = i == rm_index;
3019 accum += (mat[j + i * n_cols]
3020 * sqr ((i == fmj_index[j])
3025 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3028 /* ASE0 for X given Y. */
3032 for (accum = 0., j = 0; j < n_cols; j++)
3033 if (rm_index != fmj_index[j])
3034 accum += (mat[j + n_cols * fmj_index[j]]
3035 + mat[j + n_cols * rm_index]);
3036 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3039 /* Symmetric ASE0 and ASE1. */
3044 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3045 for (j = 0; j < n_cols; j++)
3047 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3048 int temp1 = (i == rm_index) + (j == cm_index);
3049 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3050 accum1 += (mat[j + i * n_cols]
3051 * sqr (temp0 + (v[0] - 1.) * temp1));
3053 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3054 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3055 / (2. * W - rm - cm));
3064 double sum_fij2_ri, sum_fij2_ci;
3065 double sum_ri2, sum_cj2;
3067 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3068 for (j = 0; j < n_cols; j++)
3070 double temp = sqr (mat[j + i * n_cols]);
3071 sum_fij2_ri += temp / row_tot[i];
3072 sum_fij2_ci += temp / col_tot[j];
3075 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3076 sum_ri2 += row_tot[i] * row_tot[i];
3078 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3079 sum_cj2 += col_tot[j] * col_tot[j];
3081 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3082 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3086 if (cmd.a_statistics[CRS_ST_UC])
3088 double UX, UY, UXY, P;
3089 double ase1_yx, ase1_xy, ase1_sym;
3092 for (UX = 0., i = 0; i < n_rows; i++)
3093 if (row_tot[i] > 0.)
3094 UX -= row_tot[i] / W * log (row_tot[i] / W);
3096 for (UY = 0., j = 0; j < n_cols; j++)
3097 if (col_tot[j] > 0.)
3098 UY -= col_tot[j] / W * log (col_tot[j] / W);
3100 for (UXY = P = 0., i = 0; i < n_rows; i++)
3101 for (j = 0; j < n_cols; j++)
3103 double entry = mat[j + i * n_cols];
3108 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3109 UXY -= entry / W * log (entry / W);
3112 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3113 for (j = 0; j < n_cols; j++)
3115 double entry = mat[j + i * n_cols];
3120 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3121 + (UX - UXY) * log (col_tot[j] / W));
3122 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3123 + (UY - UXY) * log (row_tot[i] / W));
3124 ase1_sym += entry * sqr ((UXY
3125 * log (row_tot[i] * col_tot[j] / (W * W)))
3126 - (UX + UY) * log (entry / W));
3129 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3130 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3131 t[5] = v[5] / ((2. / (W * (UX + UY)))
3132 * sqrt (P - sqr (UX + UY - UXY) / W));
3134 v[6] = (UX + UY - UXY) / UX;
3135 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3136 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3138 v[7] = (UX + UY - UXY) / UY;
3139 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3140 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3144 if (cmd.a_statistics[CRS_ST_D])
3149 calc_symmetric (NULL, NULL, NULL);
3150 for (i = 0; i < 3; i++)
3152 v[8 + i] = somers_d_v[i];
3153 ase[8 + i] = somers_d_ase[i];
3154 t[8 + i] = somers_d_t[i];
3159 if (cmd.a_statistics[CRS_ST_ETA])
3162 double sum_Xr, sum_X2r;
3166 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3168 sum_Xr += rows[i].f * row_tot[i];
3169 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3171 SX = sum_X2r - sum_Xr * sum_Xr / W;
3173 for (SXW = 0., j = 0; j < n_cols; j++)
3177 for (cum = 0., i = 0; i < n_rows; i++)
3179 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3180 cum += rows[i].f * mat[j + i * n_cols];
3183 SXW -= cum * cum / col_tot[j];
3185 v[11] = sqrt (1. - SXW / SX);
3189 double sum_Yc, sum_Y2c;
3193 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3195 sum_Yc += cols[i].f * col_tot[i];
3196 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3198 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3200 for (SYW = 0., i = 0; i < n_rows; i++)
3204 for (cum = 0., j = 0; j < n_cols; j++)
3206 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3207 cum += cols[j].f * mat[j + i * n_cols];
3210 SYW -= cum * cum / row_tot[i];
3212 v[12] = sqrt (1. - SYW / SY);
3219 /* A wrapper around data_out() that limits string output to short
3220 string width and null terminates the result. */
3222 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3224 struct fmt_spec fmt_subst;
3226 /* Limit to short string width. */
3227 if (formats[fp->type].cat & FCAT_STRING)
3231 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3232 if (fmt_subst.type == FMT_A)
3233 fmt_subst.w = min (8, fmt_subst.w);
3235 fmt_subst.w = min (16, fmt_subst.w);
3241 data_out (s, fp, v);
3243 /* Null terminate. */