1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
37 #include "algorithm.h"
49 #include "value-labels.h"
53 #include "debug-print.h"
59 +missing=miss:!table/include/report;
60 +write[wr_]=none,cells,all;
61 +format=fmt:!labels/nolabels/novallabs,
64 tabl:!tables/notables,
67 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
69 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
75 /* Number of chi-square statistics. */
78 /* Number of symmetric statistics. */
81 /* Number of directional statistics. */
82 #define N_DIRECTIONAL 13
84 /* A single table entry for general mode. */
87 int table; /* Flattened table number. */
90 double freq; /* Frequency count. */
91 double *data; /* Crosstabulation table for integer mode. */
94 union value values[1]; /* Values. */
97 /* A crosstabulation. */
100 int nvar; /* Number of variables. */
101 double missing; /* Missing cases count. */
102 int ofs; /* Integer mode: Offset into sorted_tab[]. */
103 struct variable *vars[2]; /* At least two variables; sorted by
104 larger indices first. */
107 /* Indexes into crosstab.v. */
114 /* General mode crosstabulation table. */
115 static struct hsh_table *gen_tab; /* Hash table. */
116 static int n_sorted_tab; /* Number of entries in sorted_tab. */
117 static struct table_entry **sorted_tab; /* Sorted table. */
119 /* Variables specifies on VARIABLES. */
120 static struct variable **variables;
121 static size_t variables_cnt;
124 static struct crosstab **xtab;
127 /* Integer or general mode? */
136 static int num_cells; /* Number of cells requested. */
137 static int cells[8]; /* Cells requested. */
138 static int expected; /* Nonzero if expected value is needed. */
141 static int write; /* One of WR_* that specifies the WRITE style. */
143 /* Command parsing info. */
144 static struct cmd_crosstabs cmd;
147 static struct pool *pl_tc; /* For table cells. */
148 static struct pool *pl_col; /* For column data. */
150 static int internal_cmd_crosstabs (void);
151 static void precalc (void *);
152 static int calc_general (struct ccase *, void *);
153 static int calc_integer (struct ccase *, void *);
154 static void postcalc (void *);
155 static void submit (struct tab_table *);
157 static void format_short (char *s, const struct fmt_spec *fp,
158 const union value *v);
161 static void debug_print (void);
162 static void print_table_entries (struct table_entry **tab);
165 /* Parse and execute CROSSTABS, then clean up. */
169 int result = internal_cmd_crosstabs ();
172 pool_destroy (pl_tc);
173 pool_destroy (pl_col);
178 /* Parses and executes the CROSSTABS procedure. */
180 internal_cmd_crosstabs (void)
186 pl_tc = pool_create ();
187 pl_col = pool_create ();
189 lex_match_id ("CROSSTABS");
190 if (!parse_crosstabs (&cmd))
194 /* Needs variables. */
198 mode = variables ? INTEGER : GENERAL;
204 cmd.a_cells[CRS_CL_COUNT] = 1;
212 for (i = 0; i < CRS_CL_count; i++)
217 cmd.a_cells[CRS_CL_COUNT] = 1;
218 cmd.a_cells[CRS_CL_ROW] = 1;
219 cmd.a_cells[CRS_CL_COLUMN] = 1;
220 cmd.a_cells[CRS_CL_TOTAL] = 1;
222 if (cmd.a_cells[CRS_CL_ALL])
224 for (i = 0; i < CRS_CL_count; i++)
226 cmd.a_cells[CRS_CL_ALL] = 0;
228 cmd.a_cells[CRS_CL_NONE] = 0;
229 for (num_cells = i = 0; i < CRS_CL_count; i++)
232 if (i >= CRS_CL_EXPECTED)
234 cmd.a_cells[num_cells++] = i;
239 if (cmd.sbc_statistics)
244 for (i = 0; i < CRS_ST_count; i++)
245 if (cmd.a_statistics[i])
248 cmd.a_statistics[CRS_ST_CHISQ] = 1;
249 if (cmd.a_statistics[CRS_ST_ALL])
250 for (i = 0; i < CRS_ST_count; i++)
251 cmd.a_statistics[i] = 1;
255 if (cmd.miss == CRS_REPORT && mode == GENERAL)
257 msg (SE, _("Missing mode REPORT not allowed in general mode. "
258 "Assuming MISSING=TABLE."));
259 cmd.miss = CRS_TABLE;
263 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
264 cmd.a_write[CRS_WR_ALL] = 0;
265 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
267 msg (SE, _("Write mode ALL not allowed in general mode. "
268 "Assuming WRITE=CELLS."));
269 cmd.a_write[CRS_WR_CELLS] = 1;
272 && (cmd.a_write[CRS_WR_NONE]
273 + cmd.a_write[CRS_WR_ALL]
274 + cmd.a_write[CRS_WR_CELLS] == 0))
275 cmd.a_write[CRS_WR_CELLS] = 1;
276 if (cmd.a_write[CRS_WR_CELLS])
277 write = CRS_WR_CELLS;
278 else if (cmd.a_write[CRS_WR_ALL])
283 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc,
289 /* Parses the TABLES subcommand. */
291 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
293 struct var_set *var_set;
295 struct variable ***by = NULL;
300 /* Ensure that this is a TABLES subcommand. */
301 if (!lex_match_id ("TABLES")
302 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
307 if (variables != NULL)
308 var_set = var_set_create_from_array (variables, variables_cnt);
310 var_set = var_set_create_from_dict (default_dict);
311 assert (var_set != NULL);
315 by = xrealloc (by, sizeof *by * (n_by + 1));
316 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
317 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
318 PV_NO_DUPLICATE | PV_NO_SCRATCH))
323 if (!lex_match (T_BY))
327 lex_error (_("expecting BY"));
336 int *by_iter = xcalloc (sizeof *by_iter * n_by);
339 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
340 for (i = 0; i < nx; i++)
344 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
351 for (i = 0; i < n_by; i++)
352 x->vars[i] = by[i][by_iter[i]];
358 for (i = n_by - 1; i >= 0; i--)
360 if (++by_iter[i] < by_nvar[i])
373 /* All return paths lead here. */
377 for (i = 0; i < n_by; i++)
383 var_set_destroy (var_set);
388 /* Parses the VARIABLES subcommand. */
390 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
394 msg (SE, _("VARIABLES must be specified before TABLES."));
402 int orig_nv = variables_cnt;
407 if (!parse_variables (default_dict, &variables, &variables_cnt,
408 (PV_APPEND | PV_NUMERIC
409 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
414 lex_error ("expecting `('");
419 if (!lex_force_int ())
421 min = lex_integer ();
426 if (!lex_force_int ())
428 max = lex_integer ();
431 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
439 lex_error ("expecting `)'");
444 for (i = orig_nv; i < variables_cnt; i++)
446 variables[i]->p.crs.min = min;
447 variables[i]->p.crs.max = max + 1.;
448 variables[i]->p.crs.count = max - min + 1;
467 printf ("CROSSTABS\n");
469 if (variables != NULL)
473 printf ("\t/VARIABLES=");
474 for (i = 0; i < variables_cnt; i++)
476 struct variable *v = variables[i];
478 printf ("%s ", v->name);
479 if (i < variables_cnt - 1)
481 struct variable *nv = variables[i + 1];
483 if (v->p.crs.min == nv->p.crs.min
484 && v->p.crs.max == nv->p.crs.max)
487 printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1);
495 printf ("\t/TABLES=");
496 for (i = 0; i < nxtab; i++)
498 struct crosstab *x = xtab[i];
503 for (j = 0; j < x->nvar; j++)
507 printf ("%s", x->v[j]->name);
513 #endif /* DEBUGGING */
515 /* Data file processing. */
517 static int compare_table_entry (const void *, const void *, void *);
518 static unsigned hash_table_entry (const void *, void *);
520 /* Set up the crosstabulation tables for processing. */
522 precalc (void *aux UNUSED)
526 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
536 for (i = 0; i < nxtab; i++)
538 struct crosstab *x = xtab[i];
543 x->ofs = n_sorted_tab;
545 for (j = 2; j < x->nvar; j++)
546 count *= x->vars[j - 2]->p.crs.count;
548 sorted_tab = xrealloc (sorted_tab,
549 sizeof *sorted_tab * (n_sorted_tab + count));
550 v = local_alloc (sizeof *v * x->nvar);
551 for (j = 2; j < x->nvar; j++)
552 v[j] = x->vars[j]->p.crs.min;
553 for (j = 0; j < count; j++)
555 struct table_entry *te;
558 te = sorted_tab[n_sorted_tab++]
559 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
563 const int mat_size = (x->vars[0]->p.crs.count
564 * x->vars[1]->p.crs.count);
567 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
568 for (m = 0; m < mat_size; m++)
572 for (k = 2; k < x->nvar; k++)
573 te->values[k].f = v[k];
574 for (k = 2; k < x->nvar; k++)
575 if (++v[k] >= x->vars[k]->p.crs.max)
576 v[k] = x->vars[k]->p.crs.min;
583 sorted_tab = xrealloc (sorted_tab,
584 sizeof *sorted_tab * (n_sorted_tab + 1));
585 sorted_tab[n_sorted_tab] = NULL;
589 /* Form crosstabulations for general mode. */
591 calc_general (struct ccase *c, void *aux UNUSED)
594 double weight = dict_get_case_weight (default_dict, c);
596 /* Flattened current table index. */
599 for (t = 0; t < nxtab; t++)
601 struct crosstab *x = xtab[t];
602 const size_t entry_size = (sizeof (struct table_entry)
603 + sizeof (union value) * (x->nvar - 1));
604 struct table_entry *te = local_alloc (entry_size);
606 /* Construct table entry for the current record and table. */
612 for (j = 0; j < x->nvar; j++)
614 if ((cmd.miss == CRS_TABLE
615 && is_missing (&c->data[x->vars[j]->fv], x->vars[j]))
616 || (cmd.miss == CRS_INCLUDE
617 && is_system_missing (&c->data[x->vars[j]->fv],
620 x->missing += weight;
624 if (x->vars[j]->type == NUMERIC)
625 te->values[j].f = c->data[x->vars[j]->fv].f;
628 memcpy (te->values[j].s, c->data[x->vars[j]->fv].s,
631 /* Necessary in order to simplify comparisons. */
632 memset (&te->values[j].s[x->vars[j]->width], 0,
633 sizeof (union value) - x->vars[j]->width);
638 /* Add record to hash table. */
640 struct table_entry **tepp
641 = (struct table_entry **) hsh_probe (gen_tab, te);
644 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
647 memcpy (tep, te, entry_size);
652 (*tepp)->u.freq += weight;
663 calc_integer (struct ccase *c, void *aux UNUSED)
666 double weight = dict_get_case_weight (default_dict, c);
668 /* Flattened current table index. */
671 for (t = 0; t < nxtab; t++)
673 struct crosstab *x = xtab[t];
678 for (i = 0; i < x->nvar; i++)
680 struct variable *const v = x->vars[i];
681 double value = c->data[v->fv].f;
683 /* Note that the first test also rules out SYSMIS. */
684 if ((value < v->p.crs.min || value >= v->p.crs.max)
685 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
687 x->missing += weight;
693 ofs += fact * ((int) value - v->p.crs.min);
694 fact *= v->p.crs.count;
699 const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min;
700 const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min;
701 const int col_dim = x->vars[COL_VAR]->p.crs.count;
703 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
713 /* Print out all table entries in NULL-terminated TAB for use by a
714 debugger (a person, not a program). */
716 print_table_entries (struct table_entry **tab)
718 printf ("raw crosstabulation data:\n");
721 const struct crosstab *x = xtab[(*tab)->table];
724 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
725 for (i = 0; i < x->nvar; i++)
729 printf ("%s:", x->v[i]->name);
731 if (x->v[i]->type == NUMERIC)
732 printf ("%g", (*tab)->v[i].f);
734 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
742 /* Compare the table_entry's at A and B and return a strcmp()-type
745 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
747 const struct table_entry *a = a_;
748 const struct table_entry *b = b_;
750 if (a->table > b->table)
752 else if (a->table < b->table)
756 const struct crosstab *x = xtab[a->table];
759 for (i = x->nvar - 1; i >= 0; i--)
760 if (x->vars[i]->type == NUMERIC)
762 const double diffnum = a->values[i].f - b->values[i].f;
765 else if (diffnum > 0)
770 assert (x->vars[i]->type == ALPHA);
772 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
783 /* Calculate a hash value from table_entry A. */
785 hash_table_entry (const void *a_, void *foo UNUSED)
787 const struct table_entry *a = a_;
792 for (i = 0; i < xtab[a->table]->nvar; i++)
793 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
798 /* Post-data reading calculations. */
800 static struct table_entry **find_pivot_extent (struct table_entry **,
801 int *cnt, int pivot);
802 static void enum_var_values (struct table_entry **entries, int entry_cnt,
804 union value **values, int *value_cnt);
805 static void output_pivot_table (struct table_entry **, struct table_entry **,
806 double **, double **, double **,
807 int *, int *, int *);
808 static void make_summary_table (void);
811 postcalc (void *aux UNUSED)
815 n_sorted_tab = hsh_count (gen_tab);
816 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
818 print_table_entries (sorted_tab);
822 make_summary_table ();
824 /* Identify all the individual crosstabulation tables, and deal with
827 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
828 int pc = n_sorted_tab; /* Pivot count. */
830 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
831 int maxrows = 0, maxcols = 0, maxcells = 0;
835 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
839 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
840 &maxrows, &maxcols, &maxcells);
849 hsh_destroy (gen_tab);
852 static void insert_summary (struct tab_table *, int tab_index, double valid);
854 /* Output a table summarizing the cases processed. */
856 make_summary_table (void)
858 struct tab_table *summary;
860 struct table_entry **pb = sorted_tab, **pe;
861 int pc = n_sorted_tab;
864 summary = tab_create (7, 3 + nxtab, 1);
865 tab_title (summary, 0, _("Summary."));
866 tab_headers (summary, 1, 0, 3, 0);
867 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
868 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
869 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
870 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
871 tab_hline (summary, TAL_1, 1, 6, 1);
872 tab_hline (summary, TAL_1, 1, 6, 2);
873 tab_vline (summary, TAL_1, 3, 1, 1);
874 tab_vline (summary, TAL_1, 5, 1, 1);
878 for (i = 0; i < 3; i++)
880 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
881 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
884 tab_offset (summary, 0, 3);
890 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
894 while (cur_tab < (*pb)->table)
895 insert_summary (summary, cur_tab++, 0.);
898 for (valid = 0.; pb < pe; pb++)
899 valid += (*pb)->u.freq;
902 const struct crosstab *const x = xtab[(*pb)->table];
903 const int n_cols = x->vars[COL_VAR]->p.crs.count;
904 const int n_rows = x->vars[ROW_VAR]->p.crs.count;
905 const int count = n_cols * n_rows;
907 for (valid = 0.; pb < pe; pb++)
909 const double *data = (*pb)->u.data;
912 for (i = 0; i < count; i++)
916 insert_summary (summary, cur_tab++, valid);
921 while (cur_tab < nxtab)
922 insert_summary (summary, cur_tab++, 0.);
927 /* Inserts a line into T describing the crosstabulation at index
928 TAB_INDEX, which has VALID valid observations. */
930 insert_summary (struct tab_table *t, int tab_index, double valid)
932 struct crosstab *x = xtab[tab_index];
934 tab_hline (t, TAL_1, 0, 6, 0);
936 /* Crosstabulation name. */
938 char *buf = local_alloc (128 * x->nvar);
942 for (i = 0; i < x->nvar; i++)
945 cp = stpcpy (cp, " * ");
948 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
950 tab_text (t, 0, 0, TAB_LEFT, buf);
955 /* Counts and percentages. */
965 for (i = 0; i < 3; i++)
967 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
968 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
979 static struct tab_table *table; /* Crosstabulation table. */
980 static struct tab_table *chisq; /* Chi-square table. */
981 static struct tab_table *sym; /* Symmetric measures table. */
982 static struct tab_table *risk; /* Risk estimate table. */
983 static struct tab_table *direct; /* Directional measures table. */
986 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
988 /* Column values, number of columns. */
989 static union value *cols;
992 /* Row values, number of rows. */
993 static union value *rows;
996 /* Number of statistically interesting columns/rows (columns/rows with
998 static int ns_cols, ns_rows;
1000 /* Crosstabulation. */
1001 static struct crosstab *x;
1003 /* Number of variables from the crosstabulation to consider. This is
1004 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1007 /* Matrix contents. */
1008 static double *mat; /* Matrix proper. */
1009 static double *row_tot; /* Row totals. */
1010 static double *col_tot; /* Column totals. */
1011 static double W; /* Grand total. */
1013 static void display_dimensions (struct tab_table *, int first_difference,
1014 struct table_entry *);
1015 static void display_crosstabulation (void);
1016 static void display_chisq (void);
1017 static void display_symmetric (void);
1018 static void display_risk (void);
1019 static void display_directional (void);
1020 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1021 static void table_value_missing (struct tab_table *table, int c, int r,
1022 unsigned char opt, const union value *v,
1023 const struct variable *var);
1024 static void delete_missing (void);
1026 /* Output pivot table beginning at PB and continuing until PE,
1027 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1028 hold *MAXROWS entries. */
1030 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1031 double **matp, double **row_totp, double **col_totp,
1032 int *maxrows, int *maxcols, int *maxcells)
1035 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1036 int tc = pe - pb; /* Table count. */
1038 /* Table entry for header comparison. */
1039 struct table_entry *cmp;
1041 x = xtab[(*pb)->table];
1042 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1044 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1046 /* Crosstabulation table initialization. */
1049 table = tab_create (nvar + n_cols,
1050 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1051 tab_headers (table, nvar - 1, 0, 2, 0);
1053 /* First header line. */
1054 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1055 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1057 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1059 /* Second header line. */
1063 for (i = 2; i < nvar; i++)
1064 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1065 TAB_RIGHT | TAT_TITLE,
1067 ? x->vars[i]->label : x->vars[i]->name));
1068 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1069 x->vars[ROW_VAR]->name);
1070 for (i = 0; i < n_cols; i++)
1071 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1073 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1076 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1077 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1081 char *title = local_alloc (x->nvar * 64 + 128);
1085 if (cmd.pivot == CRS_PIVOT)
1086 for (i = 0; i < nvar; i++)
1089 cp = stpcpy (cp, " by ");
1090 cp = stpcpy (cp, x->vars[i]->name);
1094 cp = spprintf (cp, "%s by %s for",
1095 x->vars[0]->name, x->vars[1]->name);
1096 for (i = 2; i < nvar; i++)
1098 char buf[64], *bufp;
1103 cp = stpcpy (cp, x->vars[i]->name);
1105 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1106 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1108 cp = stpcpy (cp, bufp);
1112 cp = stpcpy (cp, " [");
1113 for (i = 0; i < num_cells; i++)
1121 static const struct tuple cell_names[] =
1123 {CRS_CL_COUNT, N_("count")},
1124 {CRS_CL_ROW, N_("row %")},
1125 {CRS_CL_COLUMN, N_("column %")},
1126 {CRS_CL_TOTAL, N_("total %")},
1127 {CRS_CL_EXPECTED, N_("expected")},
1128 {CRS_CL_RESIDUAL, N_("residual")},
1129 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1130 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1134 const struct tuple *t;
1136 for (t = cell_names; t->value != cells[i]; t++)
1137 assert (t->value != -1);
1139 cp = stpcpy (cp, ", ");
1140 cp = stpcpy (cp, gettext (t->name));
1144 tab_title (table, 0, title);
1148 tab_offset (table, 0, 2);
1153 /* Chi-square table initialization. */
1154 if (cmd.a_statistics[CRS_ST_CHISQ])
1156 chisq = tab_create (6 + (nvar - 2),
1157 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1158 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1160 tab_title (chisq, 0, "Chi-square tests.");
1162 tab_offset (chisq, nvar - 2, 0);
1163 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1164 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1165 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1166 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1167 _("Asymp. Sig. (2-sided)"));
1168 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1169 _("Exact. Sig. (2-sided)"));
1170 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1171 _("Exact. Sig. (1-sided)"));
1173 tab_offset (chisq, 0, 1);
1178 /* Symmetric measures. */
1179 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1180 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1181 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1182 || cmd.a_statistics[CRS_ST_KAPPA])
1184 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1185 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1186 tab_title (sym, 0, "Symmetric measures.");
1188 tab_offset (sym, nvar - 2, 0);
1189 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1190 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1191 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1192 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1193 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1194 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1195 tab_offset (sym, 0, 1);
1200 /* Risk estimate. */
1201 if (cmd.a_statistics[CRS_ST_RISK])
1203 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1204 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1205 tab_title (risk, 0, "Risk estimate.");
1207 tab_offset (risk, nvar - 2, 0);
1208 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1209 _(" 95%% Confidence Interval"));
1210 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1211 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1212 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1213 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1214 tab_hline (risk, TAL_1, 2, 3, 1);
1215 tab_vline (risk, TAL_1, 2, 0, 1);
1216 tab_offset (risk, 0, 2);
1221 /* Directional measures. */
1222 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1223 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1225 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1226 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1227 tab_title (direct, 0, "Directional measures.");
1229 tab_offset (direct, nvar - 2, 0);
1230 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1231 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1232 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1233 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1234 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1235 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1236 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1237 tab_offset (direct, 0, 1);
1244 /* Find pivot subtable if applicable. */
1245 te = find_pivot_extent (tb, &tc, 0);
1249 /* Find all the row variable values. */
1250 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1252 /* Allocate memory space for the column and row totals. */
1253 if (n_rows > *maxrows)
1255 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1256 row_tot = *row_totp;
1259 if (n_cols > *maxcols)
1261 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1262 col_tot = *col_totp;
1266 /* Allocate table space for the matrix. */
1267 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1268 tab_realloc (table, -1,
1269 max (tab_nr (table) + (n_rows + 1) * num_cells,
1270 tab_nr (table) * (pe - pb) / (te - tb)));
1272 if (mode == GENERAL)
1274 /* Allocate memory space for the matrix. */
1275 if (n_cols * n_rows > *maxcells)
1277 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1278 *maxcells = n_cols * n_rows;
1283 /* Build the matrix and calculate column totals. */
1285 union value *cur_col = cols;
1286 union value *cur_row = rows;
1288 double *cp = col_tot;
1289 struct table_entry **p;
1292 for (p = &tb[0]; p < te; p++)
1294 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1298 for (; cur_row < &rows[n_rows]; cur_row++)
1304 mp = &mat[cur_col - cols];
1307 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1314 *cp += *mp = (*p)->u.freq;
1319 /* Zero out the rest of the matrix. */
1320 for (; cur_row < &rows[n_rows]; cur_row++)
1326 if (cur_col < &cols[n_cols])
1328 const int rem_cols = n_cols - (cur_col - cols);
1331 for (c = 0; c < rem_cols; c++)
1333 mp = &mat[cur_col - cols];
1334 for (r = 0; r < n_rows; r++)
1336 for (c = 0; c < rem_cols; c++)
1338 mp += n_cols - rem_cols;
1346 double *tp = col_tot;
1348 assert (mode == INTEGER);
1349 mat = (*tb)->u.data;
1352 /* Calculate column totals. */
1353 for (c = 0; c < n_cols; c++)
1356 double *cp = &mat[c];
1358 for (r = 0; r < n_rows; r++)
1359 cum += cp[r * n_cols];
1367 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1368 ns_cols += *cp != 0.;
1371 /* Calculate row totals. */
1374 double *rp = row_tot;
1377 for (ns_rows = 0, r = n_rows; r--; )
1380 for (c = n_cols; c--; )
1388 /* Calculate grand total. */
1394 if (n_rows < n_cols)
1395 tp = row_tot, n = n_rows;
1397 tp = col_tot, n = n_cols;
1404 /* Print the matrix. */
1408 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1409 for (i = 2; i < nvar; i++)
1410 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1413 for (c = 0; c < n_cols; c++)
1414 printf ("%4g", cols[c].f);
1416 for (r = 0; r < n_rows; r++)
1418 printf ("%4g:", rows[r].f);
1419 for (c = 0; c < n_cols; c++)
1420 printf ("%4g", mat[c + r * n_cols]);
1421 printf ("%4g", row_tot[r]);
1425 for (c = 0; c < n_cols; c++)
1426 printf ("%4g", col_tot[c]);
1432 /* Find the first variable that differs from the last subtable,
1433 then display the values of the dimensioning variables for
1434 each table that needs it. */
1436 int first_difference = nvar - 1;
1439 for (; ; first_difference--)
1441 assert (first_difference >= 2);
1442 if (memcmp (&cmp->values[first_difference],
1443 &(*tb)->values[first_difference],
1444 sizeof *cmp->values))
1450 display_dimensions (table, first_difference, *tb);
1452 display_dimensions (chisq, first_difference, *tb);
1454 display_dimensions (sym, first_difference, *tb);
1456 display_dimensions (risk, first_difference, *tb);
1458 display_dimensions (direct, first_difference, *tb);
1462 display_crosstabulation ();
1463 if (cmd.miss == CRS_REPORT)
1468 display_symmetric ();
1472 display_directional ();
1483 tab_resize (chisq, 4 + (nvar - 2), -1);
1494 /* Delete missing rows and columns for statistical analysis when
1497 delete_missing (void)
1502 for (r = 0; r < n_rows; r++)
1503 if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1507 for (c = 0; c < n_cols; c++)
1508 mat[c + r * n_cols] = 0.;
1516 for (c = 0; c < n_cols; c++)
1517 if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1521 for (r = 0; r < n_rows; r++)
1522 mat[c + r * n_cols] = 0.;
1528 /* Prepare table T for submission, and submit it. */
1530 submit (struct tab_table *t)
1537 tab_resize (t, -1, 0);
1538 if (tab_nr (t) == tab_t (t))
1543 tab_offset (t, 0, 0);
1545 for (i = 2; i < nvar; i++)
1546 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1547 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1548 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1549 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1551 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1553 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1554 tab_dim (t, crosstabs_dim);
1558 /* Sets the widths of all the columns and heights of all the rows in
1559 table T for driver D. */
1561 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1565 /* Width of a numerical column. */
1566 int c = outp_string_width (d, "0.000000");
1567 if (cmd.miss == CRS_REPORT)
1568 c += outp_string_width (d, "M");
1570 /* Set width for header columns. */
1573 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1575 if (w < d->prop_em_width * 8)
1576 w = d->prop_em_width * 8;
1578 if (w > d->prop_em_width * 15)
1579 w = d->prop_em_width * 15;
1581 for (i = 0; i < t->l; i++)
1585 for (i = t->l; i < t->nc; i++)
1588 for (i = 0; i < t->nr; i++)
1589 t->h[i] = tab_natural_height (t, d, i);
1592 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1593 int *cnt, int pivot);
1594 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1595 int *cnt, int pivot);
1597 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1599 static struct table_entry **
1600 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1602 return (mode == GENERAL
1603 ? find_pivot_extent_general (tp, cnt, pivot)
1604 : find_pivot_extent_integer (tp, cnt, pivot));
1607 /* Find the extent of a region in TP that contains one table. If
1608 PIVOT != 0 that means a set of table entries with identical table
1609 number; otherwise they also have to have the same values for every
1610 dimension after the row and column dimensions. The table that is
1611 searched starts at TP and has length CNT. Returns the first entry
1612 after the last one in the table; sets *CNT to the number of
1613 remaining values. If there are no entries in TP at all, returns
1614 NULL. A yucky interface, admittedly, but it works. */
1615 static struct table_entry **
1616 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1618 struct table_entry *fp = *tp;
1623 x = xtab[(*tp)->table];
1631 if ((*tp)->table != fp->table)
1636 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1643 /* Integer mode correspondent to find_pivot_extent_general(). This
1644 could be optimized somewhat, but I just don't give a crap about
1645 CROSSTABS performance in integer mode, which is just a
1646 CROSSTABS wart as far as I'm concerned.
1648 That said, feel free to send optimization patches to me. */
1649 static struct table_entry **
1650 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1652 struct table_entry *fp = *tp;
1657 x = xtab[(*tp)->table];
1665 if ((*tp)->table != fp->table)
1670 if (memcmp (&(*tp)->values[2], &fp->values[2],
1671 sizeof (union value) * (x->nvar - 2)))
1678 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1679 result. WIDTH_ points to an int which is either 0 for a
1680 numeric value or a string width for a string value. */
1682 compare_value (const void *a_, const void *b_, void *width_)
1684 const union value *a = a_;
1685 const union value *b = b_;
1686 const int *pwidth = width_;
1687 const int width = *pwidth;
1690 return (a->f < b->f) ? -1 : (a->f > b->f);
1692 return strncmp (a->s, b->s, width);
1695 /* Given an array of ENTRY_CNT table_entry structures starting at
1696 ENTRIES, creates a sorted list of the values that the variable
1697 with index VAR_INDEX takes on. The values are returned as a
1698 malloc()'darray stored in *VALUES, with the number of values
1699 stored in *VALUE_CNT.
1702 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1703 union value **values, int *value_cnt)
1705 if (mode == GENERAL)
1707 int width = xtab[(*entries)->table]->vars[var_idx]->width;
1710 *values = xmalloc (sizeof **values * entry_cnt);
1711 for (i = 0; i < entry_cnt; i++)
1712 (*values)[i] = entries[i]->values[var_idx];
1713 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1714 compare_value, &width);
1718 struct crosstab_proc *crs
1719 = &xtab[(*entries)->table]->vars[var_idx]->p.crs;
1722 assert (mode == INTEGER);
1723 *values = xmalloc (sizeof **values * crs->count);
1724 for (i = 0; i < crs->count; i++)
1725 (*values)[i].f = i + crs->min;
1726 *value_cnt = crs->count;
1730 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1731 from V, displayed with print format spec from variable VAR. When
1732 in REPORT missing-value mode, missing values have an M appended. */
1734 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1735 const union value *v, const struct variable *var)
1737 struct len_string s;
1739 const char *label = val_labs_find (var->val_labs, *v);
1742 tab_text (table, c, r, TAB_LEFT, label);
1746 s.string = tab_alloc (table, var->print.w);
1747 format_short (s.string, &var->print, v);
1748 s.length = strlen (s.string);
1749 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1750 s.string[s.length++] = 'M';
1751 while (s.length && *s.string == ' ')
1756 tab_raw (table, c, r, opt, &s);
1759 /* Draws a line across TABLE at the current row to indicate the most
1760 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1761 that changed, and puts the values that changed into the table. TB
1762 and X must be the corresponding table_entry and crosstab,
1765 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1767 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1769 for (; first_difference >= 2; first_difference--)
1770 table_value_missing (table, nvar - first_difference - 1, 0,
1771 TAB_RIGHT, &tb->values[first_difference],
1772 x->vars[first_difference]);
1775 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1777 float_M_suffix (struct tab_table *table, int c, int r, double v)
1779 static const struct fmt_spec f = {FMT_F, 8, 0};
1780 struct len_string s;
1783 s.string = tab_alloc (table, 9);
1785 format_short (s.string, &f, (union value *) &v);
1786 while (*s.string == ' ')
1791 tab_raw (table, c, r, TAB_RIGHT, &s);
1794 /* Displays the crosstabulation table. */
1796 display_crosstabulation (void)
1801 for (r = 0; r < n_rows; r++)
1802 table_value_missing (table, nvar - 2, r * num_cells,
1803 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1805 tab_text (table, nvar - 2, n_rows * num_cells,
1806 TAB_LEFT, _("Total"));
1808 /* Put in the actual cells. */
1813 tab_offset (table, nvar - 1, -1);
1814 for (r = 0; r < n_rows; r++)
1817 tab_hline (table, TAL_1, -1, n_cols, 0);
1818 for (c = 0; c < n_cols; c++)
1820 double expected_value;
1823 expected_value = row_tot[r] * col_tot[c] / W;
1824 for (i = 0; i < num_cells; i++)
1834 v = *mp / row_tot[r] * 100.;
1837 v = *mp / col_tot[c] * 100.;
1842 case CRS_CL_EXPECTED:
1845 case CRS_CL_RESIDUAL:
1846 v = *mp - expected_value;
1848 case CRS_CL_SRESIDUAL:
1849 v = (*mp - expected_value) / sqrt (expected_value);
1851 case CRS_CL_ASRESIDUAL:
1852 v = ((*mp - expected_value)
1853 / sqrt (expected_value
1854 * (1. - row_tot[r] / W)
1855 * (1. - col_tot[c] / W)));
1861 if (cmd.miss == CRS_REPORT
1862 && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
1863 || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
1864 float_M_suffix (table, c, i, v);
1866 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1872 tab_offset (table, -1, tab_row (table) + num_cells);
1880 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1881 for (r = 0; r < n_rows; r++)
1882 for (i = 0; i < num_cells; i++)
1895 v = row_tot[r] / W * 100.;
1898 v = row_tot[r] / W * 100.;
1900 case CRS_CL_EXPECTED:
1901 case CRS_CL_RESIDUAL:
1902 case CRS_CL_SRESIDUAL:
1903 case CRS_CL_ASRESIDUAL:
1910 if (cmd.miss == CRS_REPORT
1911 && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
1912 float_M_suffix (table, n_cols, 0, v);
1914 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1916 tab_next_row (table);
1920 /* Column totals, grand total. */
1925 tab_hline (table, TAL_1, -1, n_cols, 0);
1926 for (c = 0; c <= n_cols; c++)
1928 double ct = c < n_cols ? col_tot[c] : W;
1931 for (i = j = 0; i < num_cells; i++)
1949 case CRS_CL_EXPECTED:
1950 case CRS_CL_RESIDUAL:
1951 case CRS_CL_SRESIDUAL:
1952 case CRS_CL_ASRESIDUAL:
1958 if (cmd.miss == CRS_REPORT && c < n_cols
1959 && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
1960 float_M_suffix (table, c, j, v);
1962 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
1968 tab_offset (table, -1, tab_row (table) + j);
1971 tab_offset (table, 0, -1);
1974 static void calc_r (double *X, double *Y, double *, double *, double *);
1975 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1977 /* Display chi-square statistics. */
1979 display_chisq (void)
1981 static const char *chisq_stats[N_CHISQ] =
1983 N_("Pearson Chi-Square"),
1984 N_("Likelihood Ratio"),
1985 N_("Fisher's Exact Test"),
1986 N_("Continuity Correction"),
1987 N_("Linear-by-Linear Association"),
1989 double chisq_v[N_CHISQ];
1990 double fisher1, fisher2;
1996 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1998 tab_offset (chisq, nvar - 2, -1);
2000 for (i = 0; i < N_CHISQ; i++)
2002 if ((i != 2 && chisq_v[i] == SYSMIS)
2003 || (i == 2 && fisher1 == SYSMIS))
2007 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2010 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2011 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2012 tab_float (chisq, 3, 0, TAB_RIGHT,
2013 chisq_sig (chisq_v[i], df[i]), 8, 3);
2018 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2019 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2021 tab_next_row (chisq);
2024 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2025 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2026 tab_next_row (chisq);
2028 tab_offset (chisq, 0, -1);
2031 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2032 double[N_SYMMETRIC]);
2034 /* Display symmetric measures. */
2036 display_symmetric (void)
2038 static const char *categories[] =
2040 N_("Nominal by Nominal"),
2041 N_("Ordinal by Ordinal"),
2042 N_("Interval by Interval"),
2043 N_("Measure of Agreement"),
2046 static const char *stats[N_SYMMETRIC] =
2050 N_("Contingency Coefficient"),
2051 N_("Kendall's tau-b"),
2052 N_("Kendall's tau-c"),
2054 N_("Spearman Correlation"),
2059 static const int stats_categories[N_SYMMETRIC] =
2061 0, 0, 0, 1, 1, 1, 1, 2, 3,
2065 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2068 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2071 tab_offset (sym, nvar - 2, -1);
2073 for (i = 0; i < N_SYMMETRIC; i++)
2075 if (sym_v[i] == SYSMIS)
2078 if (stats_categories[i] != last_cat)
2080 last_cat = stats_categories[i];
2081 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2084 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2085 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2086 if (sym_ase[i] != SYSMIS)
2087 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2088 if (sym_t[i] != SYSMIS)
2089 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2090 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2094 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2095 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2098 tab_offset (sym, 0, -1);
2101 static int calc_risk (double[], double[], double[], union value *);
2103 /* Display risk estimate. */
2108 double risk_v[3], lower[3], upper[3];
2112 if (!calc_risk (risk_v, upper, lower, c))
2115 tab_offset (risk, nvar - 2, -1);
2117 for (i = 0; i < 3; i++)
2119 if (risk_v[i] == SYSMIS)
2125 if (x->vars[COL_VAR]->type == NUMERIC)
2126 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2127 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2129 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2130 x->vars[COL_VAR]->name,
2131 x->vars[COL_VAR]->width, c[0].s,
2132 x->vars[COL_VAR]->width, c[1].s);
2136 if (x->vars[ROW_VAR]->type == NUMERIC)
2137 sprintf (buf, _("For cohort %s = %g"),
2138 x->vars[ROW_VAR]->name, rows[i - 1].f);
2140 sprintf (buf, _("For cohort %s = %.*s"),
2141 x->vars[ROW_VAR]->name,
2142 x->vars[ROW_VAR]->width, rows[i - 1].s);
2146 tab_text (risk, 0, 0, TAB_LEFT, buf);
2147 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2148 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2149 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2150 tab_next_row (risk);
2153 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2154 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2155 tab_next_row (risk);
2157 tab_offset (risk, 0, -1);
2160 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2161 double[N_DIRECTIONAL]);
2163 /* Display directional measures. */
2165 display_directional (void)
2167 static const char *categories[] =
2169 N_("Nominal by Nominal"),
2170 N_("Ordinal by Ordinal"),
2171 N_("Nominal by Interval"),
2174 static const char *stats[] =
2177 N_("Goodman and Kruskal tau"),
2178 N_("Uncertainty Coefficient"),
2183 static const char *types[] =
2190 static const int stats_categories[N_DIRECTIONAL] =
2192 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2195 static const int stats_stats[N_DIRECTIONAL] =
2197 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2200 static const int stats_types[N_DIRECTIONAL] =
2202 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2205 static const int *stats_lookup[] =
2212 static const char **stats_names[] =
2224 double direct_v[N_DIRECTIONAL];
2225 double direct_ase[N_DIRECTIONAL];
2226 double direct_t[N_DIRECTIONAL];
2230 if (!calc_directional (direct_v, direct_ase, direct_t))
2233 tab_offset (direct, nvar - 2, -1);
2235 for (i = 0; i < N_DIRECTIONAL; i++)
2237 if (direct_v[i] == SYSMIS)
2243 for (j = 0; j < 3; j++)
2244 if (last[j] != stats_lookup[j][i])
2247 tab_hline (direct, TAL_1, j, 6, 0);
2252 int k = last[j] = stats_lookup[j][i];
2257 string = x->vars[0]->name;
2259 string = x->vars[1]->name;
2261 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2262 gettext (stats_names[j][k]), string);
2267 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2268 if (direct_ase[i] != SYSMIS)
2269 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2270 if (direct_t[i] != SYSMIS)
2271 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2272 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2273 tab_next_row (direct);
2276 tab_offset (direct, 0, -1);
2279 /* Statistical calculations. */
2281 /* Returns the value of the gamma (factorial) function for an integer
2284 gamma_int (double x)
2289 for (i = 2; i < x; i++)
2294 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2296 static inline double
2297 Pr (int a, int b, int c, int d)
2299 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2300 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2301 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2302 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2303 / gamma_int (a + b + c + d + 1.));
2306 /* Swap the contents of A and B. */
2308 swap (int *a, int *b)
2315 /* Calculate significance for Fisher's exact test as specified in
2316 _SPSS Statistical Algorithms_, Appendix 5. */
2318 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2322 if (min (c, d) < min (a, b))
2323 swap (&a, &c), swap (&b, &d);
2324 if (min (b, d) < min (a, c))
2325 swap (&a, &b), swap (&c, &d);
2329 swap (&a, &b), swap (&c, &d);
2331 swap (&a, &c), swap (&b, &d);
2335 for (x = 0; x <= a; x++)
2336 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2338 *fisher2 = *fisher1;
2339 for (x = 1; x <= b; x++)
2340 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2343 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2344 columns with values COLS and N_ROWS rows with values ROWS. Values
2345 in the matrix sum to W. */
2347 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2348 double *fisher1, double *fisher2)
2352 chisq[0] = chisq[1] = 0.;
2353 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2354 *fisher1 = *fisher2 = SYSMIS;
2356 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2358 if (ns_rows <= 1 || ns_cols <= 1)
2360 chisq[0] = chisq[1] = SYSMIS;
2364 for (r = 0; r < n_rows; r++)
2365 for (c = 0; c < n_cols; c++)
2367 const double expected = row_tot[r] * col_tot[c] / W;
2368 const double freq = mat[n_cols * r + c];
2369 const double residual = freq - expected;
2372 chisq[0] += residual * residual / expected;
2374 chisq[1] += freq * log (expected / freq);
2385 /* Calculate Yates and Fisher exact test. */
2386 if (ns_cols == 2 && ns_rows == 2)
2388 double f11, f12, f21, f22;
2394 for (i = j = 0; i < n_cols; i++)
2395 if (col_tot[i] != 0.)
2404 f11 = mat[nz_cols[0]];
2405 f12 = mat[nz_cols[1]];
2406 f21 = mat[nz_cols[0] + n_cols];
2407 f22 = mat[nz_cols[1] + n_cols];
2412 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2415 chisq[3] = (W * x * x
2416 / (f11 + f12) / (f21 + f22)
2417 / (f11 + f21) / (f12 + f22));
2425 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2426 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2429 /* Calculate Mantel-Haenszel. */
2430 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2432 double r, ase_0, ase_1;
2433 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2435 chisq[4] = (W - 1.) * r * r;
2440 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2441 ASE_1, and ase_0 into ASE_0. The row and column values must be
2442 passed in X and Y. */
2444 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2446 double SX, SY, S, T;
2448 double sum_XYf, sum_X2Y2f;
2449 double sum_Xr, sum_X2r;
2450 double sum_Yc, sum_Y2c;
2453 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2454 for (j = 0; j < n_cols; j++)
2456 double fij = mat[j + i * n_cols];
2457 double product = X[i] * Y[j];
2458 double temp = fij * product;
2460 sum_X2Y2f += temp * product;
2463 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2465 sum_Xr += X[i] * row_tot[i];
2466 sum_X2r += X[i] * X[i] * row_tot[i];
2470 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2472 sum_Yc += Y[i] * col_tot[i];
2473 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2477 S = sum_XYf - sum_Xr * sum_Yc / W;
2478 SX = sum_X2r - sum_Xr * sum_Xr / W;
2479 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2482 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2487 for (s = c = 0., i = 0; i < n_rows; i++)
2488 for (j = 0; j < n_cols; j++)
2490 double Xresid, Yresid;
2493 Xresid = X[i] - Xbar;
2494 Yresid = Y[j] - Ybar;
2495 temp = (T * Xresid * Yresid
2497 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2498 y = mat[j + i * n_cols] * temp * temp - c;
2503 *ase_1 = sqrt (s) / (T * T);
2507 static double somers_d_v[3];
2508 static double somers_d_ase[3];
2509 static double somers_d_t[3];
2511 /* Calculate symmetric statistics and their asymptotic standard
2512 errors. Returns 0 if none could be calculated. */
2514 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2515 double t[N_SYMMETRIC])
2517 int q = min (ns_rows, ns_cols);
2526 for (i = 0; i < N_SYMMETRIC; i++)
2527 v[i] = ase[i] = t[i] = SYSMIS;
2530 /* Phi, Cramer's V, contingency coefficient. */
2531 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2533 double Xp = 0.; /* Pearson chi-square. */
2538 for (r = 0; r < n_rows; r++)
2539 for (c = 0; c < n_cols; c++)
2541 const double expected = row_tot[r] * col_tot[c] / W;
2542 const double freq = mat[n_cols * r + c];
2543 const double residual = freq - expected;
2546 Xp += residual * residual / expected;
2550 if (cmd.a_statistics[CRS_ST_PHI])
2552 v[0] = sqrt (Xp / W);
2553 v[1] = sqrt (Xp / (W * (q - 1)));
2555 if (cmd.a_statistics[CRS_ST_CC])
2556 v[2] = sqrt (Xp / (Xp + W));
2559 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2560 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2565 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2572 for (r = 0; r < n_rows; r++)
2573 Dr -= row_tot[r] * row_tot[r];
2574 for (c = 0; c < n_cols; c++)
2575 Dc -= col_tot[c] * col_tot[c];
2581 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2582 for (c = 0; c < n_cols; c++)
2586 for (r = 0; r < n_rows; r++)
2587 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2597 for (i = 0; i < n_rows; i++)
2601 for (j = 1; j < n_cols; j++)
2602 Cij += col_tot[j] - cum[j + i * n_cols];
2605 for (j = 1; j < n_cols; j++)
2606 Dij += cum[j + (i - 1) * n_cols];
2610 double fij = mat[j + i * n_cols];
2616 assert (j < n_cols);
2618 Cij -= col_tot[j] - cum[j + i * n_cols];
2619 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2623 Cij += cum[j - 1 + (i - 1) * n_cols];
2624 Dij -= cum[j + (i - 1) * n_cols];
2630 if (cmd.a_statistics[CRS_ST_BTAU])
2631 v[3] = (P - Q) / sqrt (Dr * Dc);
2632 if (cmd.a_statistics[CRS_ST_CTAU])
2633 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2634 if (cmd.a_statistics[CRS_ST_GAMMA])
2635 v[5] = (P - Q) / (P + Q);
2637 /* ASE for tau-b, tau-c, gamma. Calculations could be
2638 eliminated here, at expense of memory. */
2643 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2644 for (i = 0; i < n_rows; i++)
2648 for (j = 1; j < n_cols; j++)
2649 Cij += col_tot[j] - cum[j + i * n_cols];
2652 for (j = 1; j < n_cols; j++)
2653 Dij += cum[j + (i - 1) * n_cols];
2657 double fij = mat[j + i * n_cols];
2659 if (cmd.a_statistics[CRS_ST_BTAU])
2661 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2662 + v[3] * (row_tot[i] * Dc
2663 + col_tot[j] * Dr));
2664 btau_cum += fij * temp * temp;
2668 const double temp = Cij - Dij;
2669 ctau_cum += fij * temp * temp;
2672 if (cmd.a_statistics[CRS_ST_GAMMA])
2674 const double temp = Q * Cij - P * Dij;
2675 gamma_cum += fij * temp * temp;
2678 if (cmd.a_statistics[CRS_ST_D])
2680 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2681 - (P - Q) * (W - row_tot[i]));
2682 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2683 - (Q - P) * (W - col_tot[j]));
2688 assert (j < n_cols);
2690 Cij -= col_tot[j] - cum[j + i * n_cols];
2691 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2695 Cij += cum[j - 1 + (i - 1) * n_cols];
2696 Dij -= cum[j + (i - 1) * n_cols];
2702 btau_var = ((btau_cum
2703 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2705 if (cmd.a_statistics[CRS_ST_BTAU])
2707 ase[3] = sqrt (btau_var);
2708 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2711 if (cmd.a_statistics[CRS_ST_CTAU])
2713 ase[4] = ((2 * q / ((q - 1) * W * W))
2714 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2715 t[4] = v[4] / ase[4];
2717 if (cmd.a_statistics[CRS_ST_GAMMA])
2719 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2720 t[5] = v[5] / (2. / (P + Q)
2721 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2723 if (cmd.a_statistics[CRS_ST_D])
2725 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2726 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2727 somers_d_t[0] = (somers_d_v[0]
2729 * sqrt (ctau_cum - sqr (P - Q) / W)));
2730 somers_d_v[1] = (P - Q) / Dc;
2731 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2732 somers_d_t[1] = (somers_d_v[1]
2734 * sqrt (ctau_cum - sqr (P - Q) / W)));
2735 somers_d_v[2] = (P - Q) / Dr;
2736 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2737 somers_d_t[2] = (somers_d_v[2]
2739 * sqrt (ctau_cum - sqr (P - Q) / W)));
2745 /* Spearman correlation, Pearson's r. */
2746 if (cmd.a_statistics[CRS_ST_CORR])
2748 double *R = local_alloc (sizeof *R * n_rows);
2749 double *C = local_alloc (sizeof *C * n_cols);
2752 double y, t, c = 0., s = 0.;
2757 R[i] = s + (row_tot[i] + 1.) / 2.;
2764 assert (i < n_rows);
2769 double y, t, c = 0., s = 0.;
2774 C[j] = s + (col_tot[j] + 1.) / 2;
2781 assert (j < n_cols);
2785 calc_r (R, C, &v[6], &t[6], &ase[6]);
2791 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2795 /* Cohen's kappa. */
2796 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2798 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2801 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2802 i < ns_rows; i++, j++)
2806 while (col_tot[j] == 0.)
2809 prod = row_tot[i] * col_tot[j];
2810 sum = row_tot[i] + col_tot[j];
2812 sum_fii += mat[j + i * n_cols];
2814 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2815 sum_riciri_ci += prod * sum;
2817 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2818 for (j = 0; j < ns_cols; j++)
2820 double sum = row_tot[i] + col_tot[j];
2821 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2824 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2826 ase[8] = sqrt ((W * W * sum_rici
2827 + sum_rici * sum_rici
2828 - W * sum_riciri_ci)
2829 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2831 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2832 / sqr (W * W - sum_rici))
2833 + ((2. * (W - sum_fii)
2834 * (2. * sum_fii * sum_rici
2835 - W * sum_fiiri_ci))
2836 / cube (W * W - sum_rici))
2837 + (sqr (W - sum_fii)
2838 * (W * sum_fijri_ci2 - 4.
2839 * sum_rici * sum_rici)
2840 / pow4 (W * W - sum_rici))));
2842 t[8] = v[8] / ase[8];
2849 /* Calculate risk estimate. */
2851 calc_risk (double *value, double *upper, double *lower, union value *c)
2853 double f11, f12, f21, f22;
2859 for (i = 0; i < 3; i++)
2860 value[i] = upper[i] = lower[i] = SYSMIS;
2863 if (ns_rows != 2 || ns_cols != 2)
2870 for (i = j = 0; i < n_cols; i++)
2871 if (col_tot[i] != 0.)
2880 f11 = mat[nz_cols[0]];
2881 f12 = mat[nz_cols[1]];
2882 f21 = mat[nz_cols[0] + n_cols];
2883 f22 = mat[nz_cols[1] + n_cols];
2885 c[0] = cols[nz_cols[0]];
2886 c[1] = cols[nz_cols[1]];
2889 value[0] = (f11 * f22) / (f12 * f21);
2890 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2891 lower[0] = value[0] * exp (-1.960 * v);
2892 upper[0] = value[0] * exp (1.960 * v);
2894 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2895 v = sqrt ((f12 / (f11 * (f11 + f12)))
2896 + (f22 / (f21 * (f21 + f22))));
2897 lower[1] = value[1] * exp (-1.960 * v);
2898 upper[1] = value[1] * exp (1.960 * v);
2900 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2901 v = sqrt ((f11 / (f12 * (f11 + f12)))
2902 + (f21 / (f22 * (f21 + f22))));
2903 lower[2] = value[2] * exp (-1.960 * v);
2904 upper[2] = value[2] * exp (1.960 * v);
2909 /* Calculate directional measures. */
2911 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2912 double t[N_DIRECTIONAL])
2917 for (i = 0; i < N_DIRECTIONAL; i++)
2918 v[i] = ase[i] = t[i] = SYSMIS;
2922 if (cmd.a_statistics[CRS_ST_LAMBDA])
2924 double *fim = xmalloc (sizeof *fim * n_rows);
2925 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2926 double *fmj = xmalloc (sizeof *fmj * n_cols);
2927 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2928 double sum_fim, sum_fmj;
2930 int rm_index, cm_index;
2933 /* Find maximum for each row and their sum. */
2934 for (sum_fim = 0., i = 0; i < n_rows; i++)
2936 double max = mat[i * n_cols];
2939 for (j = 1; j < n_cols; j++)
2940 if (mat[j + i * n_cols] > max)
2942 max = mat[j + i * n_cols];
2946 sum_fim += fim[i] = max;
2947 fim_index[i] = index;
2950 /* Find maximum for each column. */
2951 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2953 double max = mat[j];
2956 for (i = 1; i < n_rows; i++)
2957 if (mat[j + i * n_cols] > max)
2959 max = mat[j + i * n_cols];
2963 sum_fmj += fmj[j] = max;
2964 fmj_index[j] = index;
2967 /* Find maximum row total. */
2970 for (i = 1; i < n_rows; i++)
2971 if (row_tot[i] > rm)
2977 /* Find maximum column total. */
2980 for (j = 1; j < n_cols; j++)
2981 if (col_tot[j] > cm)
2987 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2988 v[1] = (sum_fmj - rm) / (W - rm);
2989 v[2] = (sum_fim - cm) / (W - cm);
2991 /* ASE1 for Y given X. */
2995 for (accum = 0., i = 0; i < n_rows; i++)
2996 for (j = 0; j < n_cols; j++)
2998 const int deltaj = j == cm_index;
2999 accum += (mat[j + i * n_cols]
3000 * sqr ((j == fim_index[i])
3005 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3008 /* ASE0 for Y given X. */
3012 for (accum = 0., i = 0; i < n_rows; i++)
3013 if (cm_index != fim_index[i])
3014 accum += (mat[i * n_cols + fim_index[i]]
3015 + mat[i * n_cols + cm_index]);
3016 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3019 /* ASE1 for X given Y. */
3023 for (accum = 0., i = 0; i < n_rows; i++)
3024 for (j = 0; j < n_cols; j++)
3026 const int deltaj = i == rm_index;
3027 accum += (mat[j + i * n_cols]
3028 * sqr ((i == fmj_index[j])
3033 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3036 /* ASE0 for X given Y. */
3040 for (accum = 0., j = 0; j < n_cols; j++)
3041 if (rm_index != fmj_index[j])
3042 accum += (mat[j + n_cols * fmj_index[j]]
3043 + mat[j + n_cols * rm_index]);
3044 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3047 /* Symmetric ASE0 and ASE1. */
3052 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3053 for (j = 0; j < n_cols; j++)
3055 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3056 int temp1 = (i == rm_index) + (j == cm_index);
3057 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3058 accum1 += (mat[j + i * n_cols]
3059 * sqr (temp0 + (v[0] - 1.) * temp1));
3061 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3062 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3063 / (2. * W - rm - cm));
3072 double sum_fij2_ri, sum_fij2_ci;
3073 double sum_ri2, sum_cj2;
3075 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3076 for (j = 0; j < n_cols; j++)
3078 double temp = sqr (mat[j + i * n_cols]);
3079 sum_fij2_ri += temp / row_tot[i];
3080 sum_fij2_ci += temp / col_tot[j];
3083 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3084 sum_ri2 += row_tot[i] * row_tot[i];
3086 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3087 sum_cj2 += col_tot[j] * col_tot[j];
3089 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3090 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3094 if (cmd.a_statistics[CRS_ST_UC])
3096 double UX, UY, UXY, P;
3097 double ase1_yx, ase1_xy, ase1_sym;
3100 for (UX = 0., i = 0; i < n_rows; i++)
3101 if (row_tot[i] > 0.)
3102 UX -= row_tot[i] / W * log (row_tot[i] / W);
3104 for (UY = 0., j = 0; j < n_cols; j++)
3105 if (col_tot[j] > 0.)
3106 UY -= col_tot[j] / W * log (col_tot[j] / W);
3108 for (UXY = P = 0., i = 0; i < n_rows; i++)
3109 for (j = 0; j < n_cols; j++)
3111 double entry = mat[j + i * n_cols];
3116 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3117 UXY -= entry / W * log (entry / W);
3120 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3121 for (j = 0; j < n_cols; j++)
3123 double entry = mat[j + i * n_cols];
3128 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3129 + (UX - UXY) * log (col_tot[j] / W));
3130 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3131 + (UY - UXY) * log (row_tot[i] / W));
3132 ase1_sym += entry * sqr ((UXY
3133 * log (row_tot[i] * col_tot[j] / (W * W)))
3134 - (UX + UY) * log (entry / W));
3137 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3138 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3139 t[5] = v[5] / ((2. / (W * (UX + UY)))
3140 * sqrt (P - sqr (UX + UY - UXY) / W));
3142 v[6] = (UX + UY - UXY) / UX;
3143 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3144 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3146 v[7] = (UX + UY - UXY) / UY;
3147 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3148 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3152 if (cmd.a_statistics[CRS_ST_D])
3157 calc_symmetric (NULL, NULL, NULL);
3158 for (i = 0; i < 3; i++)
3160 v[8 + i] = somers_d_v[i];
3161 ase[8 + i] = somers_d_ase[i];
3162 t[8 + i] = somers_d_t[i];
3167 if (cmd.a_statistics[CRS_ST_ETA])
3170 double sum_Xr, sum_X2r;
3174 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3176 sum_Xr += rows[i].f * row_tot[i];
3177 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3179 SX = sum_X2r - sum_Xr * sum_Xr / W;
3181 for (SXW = 0., j = 0; j < n_cols; j++)
3185 for (cum = 0., i = 0; i < n_rows; i++)
3187 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3188 cum += rows[i].f * mat[j + i * n_cols];
3191 SXW -= cum * cum / col_tot[j];
3193 v[11] = sqrt (1. - SXW / SX);
3197 double sum_Yc, sum_Y2c;
3201 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3203 sum_Yc += cols[i].f * col_tot[i];
3204 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3206 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3208 for (SYW = 0., i = 0; i < n_rows; i++)
3212 for (cum = 0., j = 0; j < n_cols; j++)
3214 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3215 cum += cols[j].f * mat[j + i * n_cols];
3218 SYW -= cum * cum / row_tot[i];
3220 v[12] = sqrt (1. - SYW / SY);
3227 /* A wrapper around data_out() that limits string output to short
3228 string width and null terminates the result. */
3230 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3232 struct fmt_spec fmt_subst;
3234 /* Limit to short string width. */
3235 if (formats[fp->type].cat & FCAT_STRING)
3239 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3240 if (fmt_subst.type == FMT_A)
3241 fmt_subst.w = min (8, fmt_subst.w);
3243 fmt_subst.w = min (16, fmt_subst.w);
3249 data_out (s, fp, v);
3251 /* Null terminate. */