1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
32 /* AIX requires this to be the first thing in the file. */
35 #define alloca __builtin_alloca
43 #ifndef alloca /* predefined by HP cc +Olibcalls */
58 #include "dcdflib/cdflib.h"
70 #include "debug-print.h"
76 +missing=miss:!table/include/report;
77 +write[wr_]=none,cells,all;
78 +format=fmt:!labels/nolabels/novallabs,
81 tabl:!tables/notables,
84 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
86 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
92 /* Number of chi-square statistics. */
95 /* Number of symmetric statistics. */
98 /* Number of directional statistics. */
99 #define N_DIRECTIONAL 13
101 /* A single table entry for general mode. */
104 int table; /* Flattened table number. */
107 double freq; /* Frequency count. */
108 double *data; /* Crosstabulation table for integer mode. */
111 union value v[1]; /* Values. */
114 /* A crosstabulation. */
117 int nvar; /* Number of variables. */
118 double missing; /* Missing cases count. */
119 int ofs; /* Integer mode: Offset into sorted_tab[]. */
120 struct variable *v[2]; /* At least two variables; sorted by
121 larger indices first. */
124 /* Indexes into crosstab.v. */
131 /* General mode crosstabulation table. */
132 static struct hsh_table *gen_tab; /* Hash table. */
133 static int n_sorted_tab; /* Number of entries in sorted_tab. */
134 static struct table_entry **sorted_tab; /* Sorted table. */
136 /* VARIABLES dictionary. */
137 static struct dictionary *var_dict;
140 static struct crosstab **xtab;
143 /* Integer or general mode? */
152 static int num_cells; /* Number of cells requested. */
153 static int cells[8]; /* Cells requested. */
154 static int expected; /* Nonzero if expected value is needed. */
157 static int write; /* One of WR_* that specifies the WRITE style. */
159 /* Command parsing info. */
160 static struct cmd_crosstabs cmd;
163 static struct pool *pl_tc; /* For table cells. */
164 static struct pool *pl_col; /* For column data. */
166 static int internal_cmd_crosstabs (void);
167 static void free_var_dict (void);
168 static void precalc (void);
169 static int calc_general (struct ccase *);
170 static int calc_integer (struct ccase *);
171 static void postcalc (void);
172 static void submit (struct tab_table *);
175 static void debug_print (void);
176 static void print_table_entries (struct table_entry **tab);
179 /* Parse and execute CROSSTABS, then clean up. */
183 int result = internal_cmd_crosstabs ();
186 pool_destroy (pl_tc);
187 pool_destroy (pl_col);
192 /* Parses and executes the CROSSTABS procedure. */
194 internal_cmd_crosstabs (void)
199 pl_tc = pool_create ();
200 pl_col = pool_create ();
202 lex_match_id ("CROSSTABS");
203 if (!parse_crosstabs (&cmd))
207 /* Needs var_dict. */
211 mode = var_dict ? INTEGER : GENERAL;
218 cmd.a_cells[CRS_CL_COUNT] = 1;
226 for (i = 0; i < CRS_CL_count; i++)
231 cmd.a_cells[CRS_CL_COUNT] = 1;
232 cmd.a_cells[CRS_CL_ROW] = 1;
233 cmd.a_cells[CRS_CL_COLUMN] = 1;
234 cmd.a_cells[CRS_CL_TOTAL] = 1;
236 if (cmd.a_cells[CRS_CL_ALL])
238 for (i = 0; i < CRS_CL_count; i++)
240 cmd.a_cells[CRS_CL_ALL] = 0;
242 cmd.a_cells[CRS_CL_NONE] = 0;
243 for (num_cells = i = 0; i < CRS_CL_count; i++)
246 if (i >= CRS_CL_EXPECTED)
248 cmd.a_cells[num_cells++] = i;
253 if (cmd.sbc_statistics)
258 for (i = 0; i < CRS_ST_count; i++)
259 if (cmd.a_statistics[i])
262 cmd.a_statistics[CRS_ST_CHISQ] = 1;
263 if (cmd.a_statistics[CRS_ST_ALL])
264 for (i = 0; i < CRS_ST_count; i++)
265 cmd.a_statistics[i] = 1;
269 if (cmd.miss == CRS_REPORT && mode == GENERAL)
271 msg (SE, _("Missing mode REPORT not allowed in general mode. "
272 "Assuming MISSING=TABLE."));
273 cmd.miss = CRS_TABLE;
277 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
278 cmd.a_write[CRS_WR_ALL] = 0;
279 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
281 msg (SE, _("Write mode ALL not allowed in general mode. "
282 "Assuming WRITE=CELLS."));
283 cmd.a_write[CRS_WR_CELLS] = 1;
286 && (cmd.a_write[CRS_WR_NONE]
287 + cmd.a_write[CRS_WR_ALL]
288 + cmd.a_write[CRS_WR_CELLS] == 0))
289 cmd.a_write[CRS_WR_CELLS] = 1;
290 if (cmd.a_write[CRS_WR_CELLS])
291 write = CRS_WR_CELLS;
292 else if (cmd.a_write[CRS_WR_ALL])
297 update_weighting (&default_dict);
298 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
303 /* Frees var_dict once it's no longer needed. */
313 if (var_dict->var_by_name)
315 avl_destroy (var_dict->var_by_name, NULL);
316 var_dict->var_by_name = NULL;
319 for (i = 0; i < var_dict->nvar; i++)
320 free (var_dict->var[i]);
321 free (var_dict->var);
322 var_dict->var = NULL;
325 free_dictionary (var_dict);
331 /* Parses the TABLES subcommand. */
333 crs_custom_tables (struct cmd_crosstabs *cmd unused)
335 struct dictionary *dict;
337 struct variable ***by = NULL;
342 /* Ensure that this is a TABLES subcommand. */
343 if (!lex_match_id ("TABLES")
344 && (token != T_ID || !is_varname (tokid))
349 dict = var_dict ? var_dict : &default_dict;
353 by = xrealloc (by, sizeof *by * (n_by + 1));
354 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
355 if (!parse_variables (dict, &by[n_by], &by_nvar[n_by],
356 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (!lex_match (T_BY))
365 lex_error (_("expecting BY"));
374 int *by_iter = xcalloc (sizeof *by_iter * n_by);
377 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
378 for (i = 0; i < nx; i++)
382 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
389 if (var_dict == NULL)
390 for (i = 0; i < n_by; i++)
391 x->v[i] = by[i][by_iter[i]];
393 for (i = 0; i < n_by; i++)
394 x->v[i] = default_dict.var[by[i][by_iter[i]]->foo];
400 for (i = n_by - 1; i >= 0; i--)
402 if (++by_iter[i] < by_nvar[i])
414 /* Despite the name, we come here whether we're successful or
420 for (i = 0; i < n_by; i++)
429 /* Parses the VARIABLES subcommand. */
431 crs_custom_variables (struct cmd_crosstabs *cmd unused)
433 struct variable **v = NULL;
438 msg (SE, _("VARIABLES must be specified before TABLES."));
451 if (!parse_variables (&default_dict, &v, &nv,
452 (PV_APPEND | PV_NUMERIC
453 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
458 lex_error ("expecting `('");
463 if (!lex_force_int ())
465 min = lex_integer ();
470 if (!lex_force_int ())
472 max = lex_integer ();
475 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
483 lex_error ("expecting `)'");
488 for (i = orig_nv; i < nv; i++)
490 v[i]->p.crs.min = min;
491 v[i]->p.crs.max = max + 1.;
492 v[i]->p.crs.count = max - min + 1;
502 var_dict = new_dictionary (0);
503 var_dict->var = xmalloc (sizeof *var_dict->var * nv);
505 for (i = 0; i < nv; i++)
507 struct variable *var = xmalloc (offsetof (struct variable, width));
508 strcpy (var->name, v[i]->name);
510 var->type = v[i]->type;
511 var->foo = v[i]->index;
512 var_dict->var[i] = var;
513 avl_force_insert (var_dict->var_by_name, var);
529 printf ("CROSSTABS\n");
535 printf ("\t/VARIABLES=");
536 for (i = 0; i < var_dict->nvar; i++)
538 struct variable *v = var_dict->var[i];
539 struct variable *iv = default_dict.var[v->foo];
541 printf ("%s ", v->name);
542 if (i < var_dict->nvar - 1)
544 struct variable *nv = var_dict->var[i + 1];
545 struct variable *niv = default_dict.var[nv->foo];
547 if (iv->p.crs.min == niv->p.crs.min
548 && iv->p.crs.max == niv->p.crs.max)
551 printf ("(%d,%d) ", iv->p.crs.min, iv->p.crs.max - 1);
559 printf ("\t/TABLES=");
560 for (i = 0; i < nxtab; i++)
562 struct crosstab *x = xtab[i];
567 for (j = 0; j < x->nvar; j++)
571 printf ("%s", x->v[j]->name);
577 #endif /* DEBUGGING */
579 /* Data file processing. */
581 static int compare_table_entry (const void *, const void *, void *);
582 static unsigned hash_table_entry (const void *, void *);
584 /* Set up the crosstabulation tables for processing. */
590 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
600 for (i = 0; i < nxtab; i++)
602 struct crosstab *x = xtab[i];
607 x->ofs = n_sorted_tab;
609 for (j = 2; j < x->nvar; j++)
610 count *= x->v[j - 2]->p.crs.count;
612 sorted_tab = xrealloc (sorted_tab,
613 sizeof *sorted_tab * (n_sorted_tab + count));
614 v = local_alloc (sizeof *v * x->nvar);
615 for (j = 2; j < x->nvar; j++)
616 v[j] = x->v[j]->p.crs.min;
617 for (j = 0; j < count; j++)
619 struct table_entry *te;
622 te = sorted_tab[n_sorted_tab++]
623 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
627 const int mat_size = (x->v[0]->p.crs.count
628 * x->v[1]->p.crs.count);
631 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
632 for (m = 0; m < mat_size; m++)
636 for (k = 2; k < x->nvar; k++)
638 for (k = 2; k < x->nvar; k++)
639 if (++v[k] >= x->v[k]->p.crs.max)
640 v[k] = x->v[k]->p.crs.min;
647 sorted_tab = xrealloc (sorted_tab,
648 sizeof *sorted_tab * (n_sorted_tab + 1));
649 sorted_tab[n_sorted_tab] = NULL;
653 /* Form crosstabulations for general mode. */
655 calc_general (struct ccase *c)
658 double w = (default_dict.weight_index != -1
659 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
662 /* Flattened current table index. */
665 for (t = 0; t < nxtab; t++)
667 struct crosstab *x = xtab[t];
668 const size_t entry_size = (sizeof (struct table_entry)
669 + sizeof (union value) * (x->nvar - 1));
670 struct table_entry *te = local_alloc (entry_size);
672 /* Construct table entry for the current record and table. */
678 for (j = 0; j < x->nvar; j++)
680 if ((cmd.miss == CRS_TABLE
681 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
682 || (cmd.miss == CRS_INCLUDE
683 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
689 if (x->v[j]->type == NUMERIC)
690 te->v[j].f = c->data[x->v[j]->fv].f;
693 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
695 /* Necessary in order to simplify comparisons. */
696 memset (&te->v[j].s[x->v[j]->width], 0,
697 sizeof (union value) - x->v[j]->width);
702 /* Add record to hash table. */
704 struct table_entry **tepp = (struct table_entry **) hsh_probe (gen_tab, te);
707 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
710 memcpy (tep, te, entry_size);
715 (*tepp)->u.freq += w;
726 calc_integer (struct ccase *c)
729 double w = (default_dict.weight_index != -1
730 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
733 /* Flattened current table index. */
736 for (t = 0; t < nxtab; t++)
738 struct crosstab *x = xtab[t];
743 for (i = 0; i < x->nvar; i++)
745 struct variable *const v = x->v[i];
746 double value = c->data[v->fv].f;
748 /* Note that the first test also rules out SYSMIS. */
749 if ((value < v->p.crs.min || value >= v->p.crs.max)
750 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
758 ofs += fact * ((int) value - v->p.crs.min);
759 fact *= v->p.crs.count;
764 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
765 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
766 const int col_dim = x->v[COL_VAR]->p.crs.count;
768 sorted_tab[ofs]->u.data[col + row * col_dim] += w;
778 /* Print out all table entries in NULL-terminated TAB for use by a
779 debugger (a person, not a program). */
781 print_table_entries (struct table_entry **tab)
783 printf ("raw crosstabulation data:\n");
786 const struct crosstab *x = xtab[(*tab)->table];
789 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
790 for (i = 0; i < x->nvar; i++)
794 printf ("%s:", x->v[i]->name);
796 if (x->v[i]->type == NUMERIC)
797 printf ("%g", (*tab)->v[i].f);
799 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
807 /* Compare the table_entry's at PA and PB and return a strcmp()-type
810 compare_table_entry (const void *pa, const void *pb, void *foo unused)
812 const struct table_entry *a = pa;
813 const struct table_entry *b = pb;
816 const int difftable = a->table - b->table;
822 const struct crosstab *x = xtab[a->table];
825 for (i = x->nvar - 1; i >= 0; i--)
826 if (x->v[i]->type == NUMERIC)
828 const double diffnum = a->v[i].f - b->v[i].f;
831 else if (diffnum > 0)
836 assert (x->v[i]->type == ALPHA);
838 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
848 /* Calculate a hash value from table_entry PA. */
850 hash_table_entry (const void *pa, void *foo unused)
852 const struct table_entry *a = pa;
853 unsigned long hash = a->table;
856 /* Hash formula from _SPSS Statistical Algorithms_. */
857 for (i = 0; i < xtab[a->table]->nvar; i++)
859 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
860 hash ^= a->v[i].hash[0];
861 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
862 hash ^= a->v[i].hash[1];
869 /* Post-data reading calculations. */
871 static struct table_entry **find_pivot_extent (struct table_entry **, int *cnt, int pivot);
872 static void enum_var_values (struct table_entry **beg, int cnt,
873 union value **values, int *nvalues,
875 static void output_pivot_table (struct table_entry **, struct table_entry **,
876 double **, double **, double **,
877 int *, int *, int *);
878 static void make_summary_table (void);
885 n_sorted_tab = hsh_count (gen_tab);
886 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
888 print_table_entries (sorted_tab);
892 make_summary_table ();
894 /* Identify all the individual crosstabulation tables, and deal with
897 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
898 int pc = n_sorted_tab; /* Pivot count. */
900 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
901 int maxrows = 0, maxcols = 0, maxcells = 0;
905 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
909 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
910 &maxrows, &maxcols, &maxcells);
919 hsh_destroy (gen_tab);
922 static void insert_summary (struct tab_table *, int tab_index, double valid);
924 /* Output a table summarizing the cases processed. */
926 make_summary_table (void)
928 struct tab_table *summary;
930 struct table_entry **pb = sorted_tab, **pe;
931 int pc = n_sorted_tab;
934 summary = tab_create (7, 3 + nxtab, 1);
935 tab_title (summary, 0, _("Summary."));
936 tab_headers (summary, 1, 0, 3, 0);
937 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
938 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
939 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
940 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
941 tab_hline (summary, TAL_1, 1, 6, 1);
942 tab_hline (summary, TAL_1, 1, 6, 2);
943 tab_vline (summary, TAL_1, 3, 1, 1);
944 tab_vline (summary, TAL_1, 5, 1, 1);
948 for (i = 0; i < 3; i++)
950 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
951 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
954 tab_offset (summary, 0, 3);
960 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
964 while (cur_tab < (*pb)->table)
965 insert_summary (summary, cur_tab++, 0.);
968 for (valid = 0.; pb < pe; pb++)
969 valid += (*pb)->u.freq;
972 const struct crosstab *const x = xtab[(*pb)->table];
973 const int n_cols = x->v[COL_VAR]->p.crs.count;
974 const int n_rows = x->v[ROW_VAR]->p.crs.count;
975 const int count = n_cols * n_rows;
977 for (valid = 0.; pb < pe; pb++)
979 const double *data = (*pb)->u.data;
982 for (i = 0; i < count; i++)
986 insert_summary (summary, cur_tab++, valid);
991 while (cur_tab < nxtab)
992 insert_summary (summary, cur_tab++, 0.);
997 /* Inserts a line into T describing the crosstabulation at index
998 TAB_INDEX, which has VALID valid observations. */
1000 insert_summary (struct tab_table *t, int tab_index, double valid)
1002 struct crosstab *x = xtab[tab_index];
1004 tab_hline (t, TAL_1, 0, 6, 0);
1006 /* Crosstabulation name. */
1008 char *buf = local_alloc (128 * x->nvar);
1012 for (i = 0; i < x->nvar; i++)
1015 cp = stpcpy (cp, " * ");
1017 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1019 tab_text (t, 0, 0, TAB_LEFT, buf);
1024 /* Counts and percentages. */
1034 for (i = 0; i < 3; i++)
1036 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
1037 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
1038 n[i] / n[2] * 100.);
1048 static struct tab_table *table; /* Crosstabulation table. */
1049 static struct tab_table *chisq; /* Chi-square table. */
1050 static struct tab_table *sym; /* Symmetric measures table. */
1051 static struct tab_table *risk; /* Risk estimate table. */
1052 static struct tab_table *direct; /* Directional measures table. */
1055 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
1057 /* Column values, number of columns. */
1058 static union value *cols;
1061 /* Row values, number of rows. */
1062 static union value *rows;
1065 /* Number of statistically interesting columns/rows (columns/rows with
1067 static int ns_cols, ns_rows;
1069 /* Crosstabulation. */
1070 static struct crosstab *x;
1072 /* Number of variables from the crosstabulation to consider. This is
1073 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1076 /* Matrix contents. */
1077 static double *mat; /* Matrix proper. */
1078 static double *row_tot; /* Row totals. */
1079 static double *col_tot; /* Column totals. */
1080 static double W; /* Grand total. */
1082 static void display_dimensions (struct tab_table *, int first_difference,
1083 struct table_entry *);
1084 static void display_crosstabulation (void);
1085 static void display_chisq (void);
1086 static void display_symmetric (void);
1087 static void display_risk (void);
1088 static void display_directional (void);
1089 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1090 static void table_value_missing (struct tab_table *table, int c, int r,
1091 unsigned char opt, const union value *v,
1092 const struct variable *var);
1093 static void delete_missing (void);
1095 /* Output pivot table beginning at PB and continuing until PE,
1096 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1097 hold *MAXROWS entries. */
1099 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1100 double **matp, double **row_totp, double **col_totp,
1101 int *maxrows, int *maxcols, int *maxcells)
1104 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1105 int tc = pe - pb; /* Table count. */
1107 /* Table entry for header comparison. */
1108 struct table_entry *cmp;
1110 x = xtab[(*pb)->table];
1111 enum_var_values (pb, pe - pb, &cols, &n_cols, COL_VAR);
1113 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1115 /* Crosstabulation table initialization. */
1118 table = tab_create (nvar + n_cols,
1119 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1120 tab_headers (table, nvar - 1, 0, 2, 0);
1122 /* First header line. */
1123 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1124 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1126 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1128 /* Second header line. */
1132 for (i = 2; i < nvar; i++)
1133 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1134 TAB_RIGHT | TAT_TITLE,
1135 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1136 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1137 x->v[ROW_VAR]->name);
1138 for (i = 0; i < n_cols; i++)
1139 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1141 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1144 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1145 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1149 char *title = local_alloc (x->nvar * 64 + 128);
1153 if (cmd.pivot == CRS_PIVOT)
1154 for (i = 0; i < nvar; i++)
1157 cp = stpcpy (cp, " by ");
1158 cp = stpcpy (cp, x->v[i]->name);
1162 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1163 for (i = 2; i < nvar; i++)
1165 char buf[64], *bufp;
1170 cp = stpcpy (cp, x->v[i]->name);
1172 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1173 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1175 cp = stpcpy (cp, bufp);
1179 cp = stpcpy (cp, " [");
1180 for (i = 0; i < num_cells; i++)
1188 static const struct tuple cell_names[] =
1190 {CRS_CL_COUNT, N_("count")},
1191 {CRS_CL_ROW, N_("row %")},
1192 {CRS_CL_COLUMN, N_("column %")},
1193 {CRS_CL_TOTAL, N_("total %")},
1194 {CRS_CL_EXPECTED, N_("expected")},
1195 {CRS_CL_RESIDUAL, N_("residual")},
1196 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1197 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1201 const struct tuple *t;
1203 for (t = cell_names; t->value != cells[i]; t++)
1204 assert (t->value != -1);
1206 cp = stpcpy (cp, ", ");
1207 cp = stpcpy (cp, gettext (t->name));
1211 tab_title (table, 0, title);
1215 tab_offset (table, 0, 2);
1220 /* Chi-square table initialization. */
1221 if (cmd.a_statistics[CRS_ST_CHISQ])
1223 chisq = tab_create (6 + (nvar - 2),
1224 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1225 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1227 tab_title (chisq, 0, "Chi-square tests.");
1229 tab_offset (chisq, nvar - 2, 0);
1230 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1231 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1232 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1233 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1234 _("Asymp. Sig. (2-sided)"));
1235 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1236 _("Exact. Sig. (2-sided)"));
1237 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1238 _("Exact. Sig. (1-sided)"));
1240 tab_offset (chisq, 0, 1);
1245 /* Symmetric measures. */
1246 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1247 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1248 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1249 || cmd.a_statistics[CRS_ST_KAPPA])
1251 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1252 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1253 tab_title (sym, 0, "Symmetric measures.");
1255 tab_offset (sym, nvar - 2, 0);
1256 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1257 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1258 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1259 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1260 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1261 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1262 tab_offset (sym, 0, 1);
1267 /* Risk estimate. */
1268 if (cmd.a_statistics[CRS_ST_RISK])
1270 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1271 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1272 tab_title (risk, 0, "Risk estimate.");
1274 tab_offset (risk, nvar - 2, 0);
1275 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1276 _(" 95%% Confidence Interval"));
1277 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1278 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1279 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1280 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1281 tab_hline (risk, TAL_1, 2, 3, 1);
1282 tab_vline (risk, TAL_1, 2, 0, 1);
1283 tab_offset (risk, 0, 2);
1288 /* Directional measures. */
1289 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1290 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1292 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1293 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1294 tab_title (direct, 0, "Directional measures.");
1296 tab_offset (direct, nvar - 2, 0);
1297 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1298 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1299 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1300 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1301 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1302 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1303 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1304 tab_offset (direct, 0, 1);
1311 /* Find pivot subtable if applicable. */
1312 te = find_pivot_extent (tb, &tc, 0);
1316 /* Find all the row variable values. */
1317 enum_var_values (tb, te - tb, &rows, &n_rows, ROW_VAR);
1319 /* Allocate memory space for the column and row totals. */
1320 if (n_rows > *maxrows)
1322 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1323 row_tot = *row_totp;
1326 if (n_cols > *maxcols)
1328 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1329 col_tot = *col_totp;
1333 /* Allocate table space for the matrix. */
1334 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1335 tab_realloc (table, -1,
1336 max (tab_nr (table) + (n_rows + 1) * num_cells,
1337 tab_nr (table) * (pe - pb) / (te - tb)));
1339 if (mode == GENERAL)
1341 /* Allocate memory space for the matrix. */
1342 if (n_cols * n_rows > *maxcells)
1344 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1345 *maxcells = n_cols * n_rows;
1350 /* Build the matrix and calculate column totals. */
1352 union value *cur_col = cols;
1353 union value *cur_row = rows;
1355 double *cp = col_tot;
1356 struct table_entry **p;
1359 for (p = &tb[0]; p < te; p++)
1361 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1365 for (; cur_row < &rows[n_rows]; cur_row++)
1371 mp = &mat[cur_col - cols];
1374 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1381 *cp += *mp = (*p)->u.freq;
1386 /* Zero out the rest of the matrix. */
1387 for (; cur_row < &rows[n_rows]; cur_row++)
1393 if (cur_col < &cols[n_cols])
1395 const int rem_cols = n_cols - (cur_col - cols);
1398 for (c = 0; c < rem_cols; c++)
1400 mp = &mat[cur_col - cols];
1401 for (r = 0; r < n_rows; r++)
1403 for (c = 0; c < rem_cols; c++)
1405 mp += n_cols - rem_cols;
1413 double *tp = col_tot;
1415 assert (mode == INTEGER);
1416 mat = (*tb)->u.data;
1419 /* Calculate column totals. */
1420 for (c = 0; c < n_cols; c++)
1423 double *cp = &mat[c];
1425 for (r = 0; r < n_rows; r++)
1426 cum += cp[r * n_cols];
1434 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1435 ns_cols += *cp != 0.;
1438 /* Calculate row totals. */
1441 double *rp = row_tot;
1444 for (ns_rows = 0, r = n_rows; r--; )
1447 for (c = n_cols; c--; )
1455 /* Calculate grand total. */
1461 if (n_rows < n_cols)
1462 tp = row_tot, n = n_rows;
1464 tp = col_tot, n = n_cols;
1471 /* Print the matrix. */
1475 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1476 for (i = 2; i < nvar; i++)
1477 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1480 for (c = 0; c < n_cols; c++)
1481 printf ("%4g", cols[c].f);
1483 for (r = 0; r < n_rows; r++)
1485 printf ("%4g:", rows[r].f);
1486 for (c = 0; c < n_cols; c++)
1487 printf ("%4g", mat[c + r * n_cols]);
1488 printf ("%4g", row_tot[r]);
1492 for (c = 0; c < n_cols; c++)
1493 printf ("%4g", col_tot[c]);
1499 /* Find the first variable that differs from the last subtable,
1500 then display the values of the dimensioning variables for
1501 each table that needs it. */
1503 int first_difference = nvar - 1;
1506 for (; ; first_difference--)
1508 assert (first_difference >= 2);
1509 if (memcmp (&cmp->v[first_difference],
1510 &(*tb)->v[first_difference], sizeof *cmp->v))
1516 display_dimensions (table, first_difference, *tb);
1518 display_dimensions (chisq, first_difference, *tb);
1520 display_dimensions (sym, first_difference, *tb);
1522 display_dimensions (risk, first_difference, *tb);
1524 display_dimensions (direct, first_difference, *tb);
1528 display_crosstabulation ();
1529 if (cmd.miss == CRS_REPORT)
1534 display_symmetric ();
1538 display_directional ();
1549 tab_resize (chisq, 4 + (nvar - 2), -1);
1560 /* Delete missing rows and columns for statistical analysis when
1563 delete_missing (void)
1568 for (r = 0; r < n_rows; r++)
1569 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1573 for (c = 0; c < n_cols; c++)
1574 mat[c + r * n_cols] = 0.;
1582 for (c = 0; c < n_cols; c++)
1583 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1587 for (r = 0; r < n_rows; r++)
1588 mat[c + r * n_cols] = 0.;
1594 /* Prepare table T for submission, and submit it. */
1596 submit (struct tab_table *t)
1603 tab_resize (t, -1, 0);
1604 if (tab_nr (t) == tab_t (t))
1609 tab_offset (t, 0, 0);
1611 for (i = 2; i < nvar; i++)
1612 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1613 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1614 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1615 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1617 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1619 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1620 tab_dim (t, crosstabs_dim);
1624 /* Sets the widths of all the columns and heights of all the rows in
1625 table T for driver D. */
1627 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1631 /* Width of a numerical column. */
1632 int c = outp_string_width (d, "0.000000");
1633 if (cmd.miss == CRS_REPORT)
1634 c += outp_string_width (d, "M");
1636 /* Set width for header columns. */
1639 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1641 if (w < d->prop_em_width * 8)
1642 w = d->prop_em_width * 8;
1644 if (w > d->prop_em_width * 15)
1645 w = d->prop_em_width * 15;
1647 for (i = 0; i < t->l; i++)
1651 for (i = t->l; i < t->nc; i++)
1654 for (i = 0; i < t->nr; i++)
1655 t->h[i] = tab_natural_height (t, d, i);
1658 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1659 int *cnt, int pivot);
1660 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1661 int *cnt, int pivot);
1663 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1665 static struct table_entry **
1666 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1668 return (mode == GENERAL
1669 ? find_pivot_extent_general (tp, cnt, pivot)
1670 : find_pivot_extent_integer (tp, cnt, pivot));
1673 /* Find the extent of a region in TP that contains one table. If
1674 PIVOT != 0 that means a set of table entries with identical table
1675 number; otherwise they also have to have the same values for every
1676 dimension after the row and column dimensions. The table that is
1677 searched starts at TP and has length CNT. Returns the first entry
1678 after the last one in the table; sets *CNT to the number of
1679 remaining values. If there are no entries in TP at all, returns
1680 NULL. A yucky interface, admittedly, but it works. */
1681 static struct table_entry **
1682 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1684 struct table_entry *fp = *tp;
1689 x = xtab[(*tp)->table];
1697 if ((*tp)->table != fp->table)
1702 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1709 /* Integer mode correspondent to find_pivot_extent_general(). This
1710 could be optimized somewhat, but I just don't give a crap about
1711 CROSSTABS performance in integer mode, which is just a wart on
1712 CROSSTABS' ass as far as I'm concerned.
1714 That said, feel free to send optimization patches to me. */
1715 static struct table_entry **
1716 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1718 struct table_entry *fp = *tp;
1723 x = xtab[(*tp)->table];
1731 if ((*tp)->table != fp->table)
1736 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1743 /* Compare value * A and B, where WIDTH is the string width or 0 for
1744 numerics, and return a strcmp()-type result. */
1746 compare_value (const void *pa, const void *pb, void *pwidth)
1748 const union value *a = pa;
1749 const union value *b = pb;
1750 const int width = (int) pwidth;
1753 return strncmp (a->s, b->s, width);
1755 return a->f < b->f ? -1 : (a->f > b->f ? 1 : 0);
1758 /* Given a list of CNT table_entry's starting at BEG, creates a list
1759 of *NVALUES values *VALUES of variable with index VAR_INDEX. */
1761 enum_var_values (struct table_entry **beg, int cnt, union value **values, int *nvalues,
1764 if (mode == GENERAL)
1768 tree = avl_create (pl_col, compare_value,
1769 (void *) (xtab[(*beg)->table]->v[var_index]->width));
1774 for (i = 0; i < cnt; i++)
1775 avl_insert (tree, &beg[i]->v[var_index]);
1776 *values = xmalloc (sizeof **values * avl_count (tree));
1784 avl_traverser_init(trav);
1786 while (NULL != (v = avl_traverse (tree, &trav)))
1787 (*values)[i++] = *v;
1792 pool_destroy (pl_col);
1793 pl_col = pool_create ();
1797 struct crosstab_proc *crs = &xtab[(*beg)->table]->v[var_index]->p.crs;
1800 assert (mode == INTEGER);
1801 *values = xmalloc (sizeof **values * crs->count);
1802 for (i = 0; i < crs->count; i++)
1803 (*values)[i].f = i + crs->min;
1804 *nvalues = crs->count;
1808 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1809 from V, displayed with print format spec from variable VAR. When
1810 in REPORT missing-value mode, missing values have an M appended. */
1812 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1813 const union value *v, const struct variable *var)
1815 struct len_string s;
1817 char *label = get_val_lab (var, *v, 0);
1820 tab_text (table, c, r, TAB_LEFT, label);
1824 s.length = var->print.w;
1825 s.string = tab_alloc (table, s.length + 1);
1826 data_out (s.string, &var->print, v);
1827 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1828 s.string[s.length++] = 'M';
1829 while (s.length && *s.string == ' ')
1834 tab_raw (table, c, r, opt, &s);
1837 /* Draws a line across TABLE at the current row to indicate the most
1838 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1839 that changed, and puts the values that changed into the table. TB
1840 and X must be the corresponding table_entry and crosstab,
1843 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1845 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1847 for (; first_difference >= 2; first_difference--)
1848 table_value_missing (table, nvar - first_difference - 1, 0,
1849 TAB_RIGHT, &tb->v[first_difference],
1850 x->v[first_difference]);
1853 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1855 float_M_suffix (struct tab_table *table, int c, int r, double v)
1857 static const struct fmt_spec f = {FMT_F, 8, 0};
1858 struct len_string s;
1861 s.string = tab_alloc (table, 9);
1863 data_out (s.string, &f, (union value *) &v);
1864 while (*s.string == ' ')
1869 tab_raw (table, c, r, TAB_RIGHT, &s);
1872 /* Displays the crosstabulation table. */
1874 display_crosstabulation (void)
1879 for (r = 0; r < n_rows; r++)
1880 table_value_missing (table, nvar - 2, r * num_cells,
1881 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1883 tab_text (table, nvar - 2, n_rows * num_cells,
1884 TAB_LEFT, _("Total"));
1886 /* Put in the actual cells. */
1891 tab_offset (table, nvar - 1, -1);
1892 for (r = 0; r < n_rows; r++)
1895 tab_hline (table, TAL_1, -1, n_cols, 0);
1896 for (c = 0; c < n_cols; c++)
1898 double expected_value;
1901 expected_value = row_tot[r] * col_tot[c] / W;
1902 for (i = 0; i < num_cells; i++)
1912 v = *mp / row_tot[r] * 100.;
1915 v = *mp / col_tot[c] * 100.;
1920 case CRS_CL_EXPECTED:
1923 case CRS_CL_RESIDUAL:
1924 v = *mp - expected_value;
1926 case CRS_CL_SRESIDUAL:
1927 v = (*mp - expected_value) / sqrt (expected_value);
1929 case CRS_CL_ASRESIDUAL:
1930 v = ((*mp - expected_value)
1931 / sqrt (expected_value
1932 * (1. - row_tot[r] / W)
1933 * (1. - col_tot[c] / W)));
1939 if (cmd.miss == CRS_REPORT
1940 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1941 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1942 float_M_suffix (table, c, i, v);
1944 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1950 tab_offset (table, -1, tab_row (table) + num_cells);
1958 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1959 for (r = 0; r < n_rows; r++)
1960 for (i = 0; i < num_cells; i++)
1973 v = row_tot[r] / W * 100.;
1976 v = row_tot[r] / W * 100.;
1978 case CRS_CL_EXPECTED:
1979 case CRS_CL_RESIDUAL:
1980 case CRS_CL_SRESIDUAL:
1981 case CRS_CL_ASRESIDUAL:
1988 if (cmd.miss == CRS_REPORT
1989 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1990 float_M_suffix (table, n_cols, 0, v);
1992 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1994 tab_next_row (table);
1998 /* Column totals, grand total. */
2003 tab_hline (table, TAL_1, -1, n_cols, 0);
2004 for (c = 0; c <= n_cols; c++)
2006 double ct = c < n_cols ? col_tot[c] : W;
2009 for (i = j = 0; i < num_cells; i++)
2027 case CRS_CL_EXPECTED:
2028 case CRS_CL_RESIDUAL:
2029 case CRS_CL_SRESIDUAL:
2030 case CRS_CL_ASRESIDUAL:
2036 if (cmd.miss == CRS_REPORT && c < n_cols
2037 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
2038 float_M_suffix (table, c, j, v);
2040 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
2046 tab_offset (table, -1, tab_row (table) + j);
2049 tab_offset (table, 0, -1);
2052 static void calc_r (double *X, double *Y, double *, double *, double *);
2053 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
2055 /* Display chi-square statistics. */
2057 display_chisq (void)
2059 static const char *chisq_stats[N_CHISQ] =
2061 N_("Pearson Chi-Square"),
2062 N_("Likelihood Ratio"),
2063 N_("Fisher's Exact Test"),
2064 N_("Continuity Correction"),
2065 N_("Linear-by-Linear Association"),
2067 double chisq_v[N_CHISQ];
2068 double fisher1, fisher2;
2074 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2076 tab_offset (chisq, nvar - 2, -1);
2078 for (i = 0; i < N_CHISQ; i++)
2080 if ((i != 2 && chisq_v[i] == SYSMIS)
2081 || (i == 2 && fisher1 == SYSMIS))
2085 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2088 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2089 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2090 tab_float (chisq, 3, 0, TAB_RIGHT,
2091 chisq_sig (chisq_v[i], df[i]), 8, 3);
2096 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2097 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2099 tab_next_row (chisq);
2102 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2103 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2104 tab_next_row (chisq);
2106 tab_offset (chisq, 0, -1);
2109 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2110 double[N_SYMMETRIC]);
2112 /* Display symmetric measures. */
2114 display_symmetric (void)
2116 static const char *categories[] =
2118 N_("Nominal by Nominal"),
2119 N_("Ordinal by Ordinal"),
2120 N_("Interval by Interval"),
2121 N_("Measure of Agreement"),
2124 static const char *stats[N_SYMMETRIC] =
2128 N_("Contingency Coefficient"),
2129 N_("Kendall's tau-b"),
2130 N_("Kendall's tau-c"),
2132 N_("Spearman Correlation"),
2137 static const int stats_categories[N_SYMMETRIC] =
2139 0, 0, 0, 1, 1, 1, 1, 2, 3,
2143 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2146 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2149 tab_offset (sym, nvar - 2, -1);
2151 for (i = 0; i < N_SYMMETRIC; i++)
2153 if (sym_v[i] == SYSMIS)
2156 if (stats_categories[i] != last_cat)
2158 last_cat = stats_categories[i];
2159 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2162 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2163 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2164 if (sym_ase[i] != SYSMIS)
2165 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2166 if (sym_t[i] != SYSMIS)
2167 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2168 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2172 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2173 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2176 tab_offset (sym, 0, -1);
2179 static int calc_risk (double[], double[], double[], union value *);
2181 /* Display risk estimate. */
2186 double risk_v[3], lower[3], upper[3];
2190 if (!calc_risk (risk_v, upper, lower, c))
2193 tab_offset (risk, nvar - 2, -1);
2195 for (i = 0; i < 3; i++)
2197 if (risk_v[i] == SYSMIS)
2203 if (x->v[COL_VAR]->type == NUMERIC)
2204 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2205 x->v[COL_VAR]->name, c[0].f, c[1].f);
2207 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2208 x->v[COL_VAR]->name,
2209 x->v[COL_VAR]->width, c[0].s,
2210 x->v[COL_VAR]->width, c[1].s);
2214 if (x->v[ROW_VAR]->type == NUMERIC)
2215 sprintf (buf, _("For cohort %s = %g"),
2216 x->v[ROW_VAR]->name, rows[i - 1].f);
2218 sprintf (buf, _("For cohort %s = %.*s"),
2219 x->v[ROW_VAR]->name,
2220 x->v[ROW_VAR]->width, rows[i - 1].s);
2224 tab_text (risk, 0, 0, TAB_LEFT, buf);
2225 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2226 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2227 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2228 tab_next_row (risk);
2231 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2232 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2233 tab_next_row (risk);
2235 tab_offset (risk, 0, -1);
2238 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2239 double[N_DIRECTIONAL]);
2241 /* Display directional measures. */
2243 display_directional (void)
2245 static const char *categories[] =
2247 N_("Nominal by Nominal"),
2248 N_("Ordinal by Ordinal"),
2249 N_("Nominal by Interval"),
2252 static const char *stats[] =
2255 N_("Goodman and Kruskal tau"),
2256 N_("Uncertainty Coefficient"),
2261 static const char *types[] =
2268 static const int stats_categories[N_DIRECTIONAL] =
2270 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2273 static const int stats_stats[N_DIRECTIONAL] =
2275 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2278 static const int stats_types[N_DIRECTIONAL] =
2280 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2283 static const int *stats_lookup[] =
2290 static const char **stats_names[] =
2302 double direct_v[N_DIRECTIONAL];
2303 double direct_ase[N_DIRECTIONAL];
2304 double direct_t[N_DIRECTIONAL];
2308 if (!calc_directional (direct_v, direct_ase, direct_t))
2311 tab_offset (direct, nvar - 2, -1);
2313 for (i = 0; i < N_DIRECTIONAL; i++)
2315 if (direct_v[i] == SYSMIS)
2321 for (j = 0; j < 3; j++)
2322 if (last[j] != stats_lookup[j][i])
2325 tab_hline (direct, TAL_1, j, 6, 0);
2330 int k = last[j] = stats_lookup[j][i];
2335 string = x->v[0]->name;
2337 string = x->v[1]->name;
2339 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2340 gettext (stats_names[j][k]), string);
2345 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2346 if (direct_ase[i] != SYSMIS)
2347 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2348 if (direct_t[i] != SYSMIS)
2349 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2350 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2351 tab_next_row (direct);
2354 tab_offset (direct, 0, -1);
2357 /* Statistical calculations. */
2359 /* Returns the value of the gamma (factorial) function for an integer
2362 gamma_int (double x)
2367 for (i = 2; i < x; i++)
2372 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2374 static inline double
2375 Pr (int a, int b, int c, int d)
2377 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2378 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2379 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2380 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2381 / gamma_int (a + b + c + d + 1.));
2384 /* Swap the contents of A and B. */
2386 swap (int *a, int *b)
2393 /* Calculate significance for Fisher's exact test as specified in
2394 _SPSS Statistical Algorithms_, Appendix 5. */
2396 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2400 if (min (c, d) < min (a, b))
2401 swap (&a, &c), swap (&b, &d);
2402 if (min (b, d) < min (a, c))
2403 swap (&a, &b), swap (&c, &d);
2407 swap (&a, &b), swap (&c, &d);
2409 swap (&a, &c), swap (&b, &d);
2413 for (x = 0; x <= a; x++)
2414 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2416 *fisher2 = *fisher1;
2417 for (x = 1; x <= b; x++)
2418 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2421 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2422 columns with values COLS and N_ROWS rows with values ROWS. Values
2423 in the matrix sum to W. */
2425 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2426 double *fisher1, double *fisher2)
2430 chisq[0] = chisq[1] = 0.;
2431 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2432 *fisher1 = *fisher2 = SYSMIS;
2434 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2436 if (ns_rows <= 1 || ns_cols <= 1)
2438 chisq[0] = chisq[1] = SYSMIS;
2442 for (r = 0; r < n_rows; r++)
2443 for (c = 0; c < n_cols; c++)
2445 const double expected = row_tot[r] * col_tot[c] / W;
2446 const double freq = mat[n_cols * r + c];
2447 const double residual = freq - expected;
2450 chisq[0] += residual * residual / expected;
2452 chisq[1] += freq * log (expected / freq);
2463 /* Calculate Yates and Fisher exact test. */
2464 if (ns_cols == 2 && ns_rows == 2)
2466 double f11, f12, f21, f22;
2472 for (i = j = 0; i < n_cols; i++)
2473 if (col_tot[i] != 0.)
2482 f11 = mat[nz_cols[0]];
2483 f12 = mat[nz_cols[1]];
2484 f21 = mat[nz_cols[0] + n_cols];
2485 f22 = mat[nz_cols[1] + n_cols];
2490 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2493 chisq[3] = (W * x * x
2494 / (f11 + f12) / (f21 + f22)
2495 / (f11 + f21) / (f12 + f22));
2503 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2504 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2507 /* Calculate Mantel-Haenszel. */
2508 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2510 double r, ase_0, ase_1;
2511 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2513 chisq[4] = (W - 1.) * r * r;
2518 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2519 ASE_1, and ase_0 into ASE_0. The row and column values must be
2520 passed in X and Y. */
2522 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2524 double SX, SY, S, T;
2526 double sum_XYf, sum_X2Y2f;
2527 double sum_Xr, sum_X2r;
2528 double sum_Yc, sum_Y2c;
2531 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2532 for (j = 0; j < n_cols; j++)
2534 double fij = mat[j + i * n_cols];
2535 double product = X[i] * Y[j];
2536 double temp = fij * product;
2538 sum_X2Y2f += temp * product;
2541 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2543 sum_Xr += X[i] * row_tot[i];
2544 sum_X2r += X[i] * X[i] * row_tot[i];
2548 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2550 sum_Yc += Y[i] * col_tot[i];
2551 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2555 S = sum_XYf - sum_Xr * sum_Yc / W;
2556 SX = sum_X2r - sum_Xr * sum_Xr / W;
2557 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2560 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2565 for (s = c = 0., i = 0; i < n_rows; i++)
2566 for (j = 0; j < n_cols; j++)
2568 double Xresid, Yresid;
2571 Xresid = X[i] - Xbar;
2572 Yresid = Y[j] - Ybar;
2573 temp = (T * Xresid * Yresid
2575 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2576 y = mat[j + i * n_cols] * temp * temp - c;
2581 *ase_1 = sqrt (s) / (T * T);
2585 static double somers_d_v[3];
2586 static double somers_d_ase[3];
2587 static double somers_d_t[3];
2589 /* Calculate symmetric statistics and their asymptotic standard
2590 errors. Returns 0 if none could be calculated. */
2592 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2593 double t[N_SYMMETRIC])
2595 int q = min (ns_rows, ns_cols);
2604 for (i = 0; i < N_SYMMETRIC; i++)
2605 v[i] = ase[i] = t[i] = SYSMIS;
2608 /* Phi, Cramer's V, contingency coefficient. */
2609 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2611 double Xp = 0.; /* Pearson chi-square. */
2616 for (r = 0; r < n_rows; r++)
2617 for (c = 0; c < n_cols; c++)
2619 const double expected = row_tot[r] * col_tot[c] / W;
2620 const double freq = mat[n_cols * r + c];
2621 const double residual = freq - expected;
2624 Xp += residual * residual / expected;
2628 if (cmd.a_statistics[CRS_ST_PHI])
2630 v[0] = sqrt (Xp / W);
2631 v[1] = sqrt (Xp / (W * (q - 1)));
2633 if (cmd.a_statistics[CRS_ST_CC])
2634 v[2] = sqrt (Xp / (Xp + W));
2637 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2638 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2643 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2650 for (r = 0; r < n_rows; r++)
2651 Dr -= row_tot[r] * row_tot[r];
2652 for (c = 0; c < n_cols; c++)
2653 Dc -= col_tot[c] * col_tot[c];
2659 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2660 for (c = 0; c < n_cols; c++)
2664 for (r = 0; r < n_rows; r++)
2665 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2675 for (i = 0; i < n_rows; i++)
2679 for (j = 1; j < n_cols; j++)
2680 Cij += col_tot[j] - cum[j + i * n_cols];
2683 for (j = 1; j < n_cols; j++)
2684 Dij += cum[j + (i - 1) * n_cols];
2688 double fij = mat[j + i * n_cols];
2694 assert (j < n_cols);
2696 Cij -= col_tot[j] - cum[j + i * n_cols];
2697 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2701 Cij += cum[j - 1 + (i - 1) * n_cols];
2702 Dij -= cum[j + (i - 1) * n_cols];
2708 if (cmd.a_statistics[CRS_ST_BTAU])
2709 v[3] = (P - Q) / sqrt (Dr * Dc);
2710 if (cmd.a_statistics[CRS_ST_CTAU])
2711 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2712 if (cmd.a_statistics[CRS_ST_GAMMA])
2713 v[5] = (P - Q) / (P + Q);
2715 /* ASE for tau-b, tau-c, gamma. Calculations could be
2716 eliminated here, at expense of memory. */
2721 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2722 for (i = 0; i < n_rows; i++)
2726 for (j = 1; j < n_cols; j++)
2727 Cij += col_tot[j] - cum[j + i * n_cols];
2730 for (j = 1; j < n_cols; j++)
2731 Dij += cum[j + (i - 1) * n_cols];
2735 double fij = mat[j + i * n_cols];
2737 if (cmd.a_statistics[CRS_ST_BTAU])
2739 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2740 + v[3] * (row_tot[i] * Dc
2741 + col_tot[j] * Dr));
2742 btau_cum += fij * temp * temp;
2746 const double temp = Cij - Dij;
2747 ctau_cum += fij * temp * temp;
2750 if (cmd.a_statistics[CRS_ST_GAMMA])
2752 const double temp = Q * Cij - P * Dij;
2753 gamma_cum += fij * temp * temp;
2756 if (cmd.a_statistics[CRS_ST_D])
2758 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2759 - (P - Q) * (W - row_tot[i]));
2760 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2761 - (Q - P) * (W - col_tot[j]));
2766 assert (j < n_cols);
2768 Cij -= col_tot[j] - cum[j + i * n_cols];
2769 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2773 Cij += cum[j - 1 + (i - 1) * n_cols];
2774 Dij -= cum[j + (i - 1) * n_cols];
2780 btau_var = ((btau_cum
2781 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2783 if (cmd.a_statistics[CRS_ST_BTAU])
2785 ase[3] = sqrt (btau_var);
2786 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2789 if (cmd.a_statistics[CRS_ST_CTAU])
2791 ase[4] = ((2 * q / ((q - 1) * W * W))
2792 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2793 t[4] = v[4] / ase[4];
2795 if (cmd.a_statistics[CRS_ST_GAMMA])
2797 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2798 t[5] = v[5] / (2. / (P + Q)
2799 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2801 if (cmd.a_statistics[CRS_ST_D])
2803 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2804 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2805 somers_d_t[0] = (somers_d_v[0]
2807 * sqrt (ctau_cum - sqr (P - Q) / W)));
2808 somers_d_v[1] = (P - Q) / Dc;
2809 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2810 somers_d_t[1] = (somers_d_v[1]
2812 * sqrt (ctau_cum - sqr (P - Q) / W)));
2813 somers_d_v[2] = (P - Q) / Dr;
2814 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2815 somers_d_t[2] = (somers_d_v[2]
2817 * sqrt (ctau_cum - sqr (P - Q) / W)));
2823 /* Spearman correlation, Pearson's r. */
2824 if (cmd.a_statistics[CRS_ST_CORR])
2826 double *R = local_alloc (sizeof *R * n_rows);
2827 double *C = local_alloc (sizeof *C * n_cols);
2830 double y, t, c = 0., s = 0.;
2835 R[i] = s + (row_tot[i] + 1.) / 2.;
2842 assert (i < n_rows);
2847 double y, t, c = 0., s = 0.;
2852 C[j] = s + (col_tot[j] + 1.) / 2;
2859 assert (j < n_cols);
2863 calc_r (R, C, &v[6], &t[6], &ase[6]);
2869 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2873 /* Cohen's kappa. */
2874 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2876 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2879 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2880 i < ns_rows; i++, j++)
2884 while (col_tot[j] == 0.)
2887 prod = row_tot[i] * col_tot[j];
2888 sum = row_tot[i] + col_tot[j];
2890 sum_fii += mat[j + i * n_cols];
2892 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2893 sum_riciri_ci += prod * sum;
2895 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2896 for (j = 0; j < ns_cols; j++)
2898 double sum = row_tot[i] + col_tot[j];
2899 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2902 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2904 ase[8] = sqrt ((W * W * sum_rici
2905 + sum_rici * sum_rici
2906 - W * sum_riciri_ci)
2907 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2909 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2910 / sqr (W * W - sum_rici))
2911 + ((2. * (W - sum_fii)
2912 * (2. * sum_fii * sum_rici
2913 - W * sum_fiiri_ci))
2914 / cube (W * W - sum_rici))
2915 + (sqr (W - sum_fii)
2916 * (W * sum_fijri_ci2 - 4.
2917 * sum_rici * sum_rici)
2918 / hypercube (W * W - sum_rici))));
2920 t[8] = v[8] / ase[8];
2927 /* Calculate risk estimate. */
2929 calc_risk (double *value, double *upper, double *lower, union value *c)
2931 double f11, f12, f21, f22;
2937 for (i = 0; i < 3; i++)
2938 value[i] = upper[i] = lower[i] = SYSMIS;
2941 if (ns_rows != 2 || ns_cols != 2)
2948 for (i = j = 0; i < n_cols; i++)
2949 if (col_tot[i] != 0.)
2958 f11 = mat[nz_cols[0]];
2959 f12 = mat[nz_cols[1]];
2960 f21 = mat[nz_cols[0] + n_cols];
2961 f22 = mat[nz_cols[1] + n_cols];
2963 c[0] = cols[nz_cols[0]];
2964 c[1] = cols[nz_cols[1]];
2967 value[0] = (f11 * f22) / (f12 * f21);
2968 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2969 lower[0] = value[0] * exp (-1.960 * v);
2970 upper[0] = value[0] * exp (1.960 * v);
2972 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2973 v = sqrt ((f12 / (f11 * (f11 + f12)))
2974 + (f22 / (f21 * (f21 + f22))));
2975 lower[1] = value[1] * exp (-1.960 * v);
2976 upper[1] = value[1] * exp (1.960 * v);
2978 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2979 v = sqrt ((f11 / (f12 * (f11 + f12)))
2980 + (f21 / (f22 * (f21 + f22))));
2981 lower[2] = value[2] * exp (-1.960 * v);
2982 upper[2] = value[2] * exp (1.960 * v);
2987 /* Calculate directional measures. */
2989 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2990 double t[N_DIRECTIONAL])
2995 for (i = 0; i < N_DIRECTIONAL; i++)
2996 v[i] = ase[i] = t[i] = SYSMIS;
3000 if (cmd.a_statistics[CRS_ST_LAMBDA])
3002 double *fim = xmalloc (sizeof *fim * n_rows);
3003 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
3004 double *fmj = xmalloc (sizeof *fmj * n_cols);
3005 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
3006 double sum_fim, sum_fmj;
3008 int rm_index, cm_index;
3011 /* Find maximum for each row and their sum. */
3012 for (sum_fim = 0., i = 0; i < n_rows; i++)
3014 double max = mat[i * n_cols];
3017 for (j = 1; j < n_cols; j++)
3018 if (mat[j + i * n_cols] > max)
3020 max = mat[j + i * n_cols];
3024 sum_fim += fim[i] = max;
3025 fim_index[i] = index;
3028 /* Find maximum for each column. */
3029 for (sum_fmj = 0., j = 0; j < n_cols; j++)
3031 double max = mat[j];
3034 for (i = 1; i < n_rows; i++)
3035 if (mat[j + i * n_cols] > max)
3037 max = mat[j + i * n_cols];
3041 sum_fmj += fmj[j] = max;
3042 fmj_index[j] = index;
3045 /* Find maximum row total. */
3048 for (i = 1; i < n_rows; i++)
3049 if (row_tot[i] > rm)
3055 /* Find maximum column total. */
3058 for (j = 1; j < n_cols; j++)
3059 if (col_tot[j] > cm)
3065 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3066 v[1] = (sum_fmj - rm) / (W - rm);
3067 v[2] = (sum_fim - cm) / (W - cm);
3069 /* ASE1 for Y given X. */
3073 for (accum = 0., i = 0; i < n_rows; i++)
3074 for (j = 0; j < n_cols; j++)
3076 const int deltaj = j == cm_index;
3077 accum += (mat[j + i * n_cols]
3078 * sqr ((j == fim_index[i])
3083 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3086 /* ASE0 for Y given X. */
3090 for (accum = 0., i = 0; i < n_rows; i++)
3091 if (cm_index != fim_index[i])
3092 accum += (mat[i * n_cols + fim_index[i]]
3093 + mat[i * n_cols + cm_index]);
3094 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3097 /* ASE1 for X given Y. */
3101 for (accum = 0., i = 0; i < n_rows; i++)
3102 for (j = 0; j < n_cols; j++)
3104 const int deltaj = i == rm_index;
3105 accum += (mat[j + i * n_cols]
3106 * sqr ((i == fmj_index[j])
3111 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3114 /* ASE0 for X given Y. */
3118 for (accum = 0., j = 0; j < n_cols; j++)
3119 if (rm_index != fmj_index[j])
3120 accum += (mat[j + n_cols * fmj_index[j]]
3121 + mat[j + n_cols * rm_index]);
3122 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3125 /* Symmetric ASE0 and ASE1. */
3130 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3131 for (j = 0; j < n_cols; j++)
3133 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3134 int temp1 = (i == rm_index) + (j == cm_index);
3135 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3136 accum1 += (mat[j + i * n_cols]
3137 * sqr (temp0 + (v[0] - 1.) * temp1));
3139 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3140 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3141 / (2. * W - rm - cm));
3150 double sum_fij2_ri, sum_fij2_ci;
3151 double sum_ri2, sum_cj2;
3153 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3154 for (j = 0; j < n_cols; j++)
3156 double temp = sqr (mat[j + i * n_cols]);
3157 sum_fij2_ri += temp / row_tot[i];
3158 sum_fij2_ci += temp / col_tot[j];
3161 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3162 sum_ri2 += row_tot[i] * row_tot[i];
3164 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3165 sum_cj2 += col_tot[j] * col_tot[j];
3167 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3168 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3172 if (cmd.a_statistics[CRS_ST_UC])
3174 double UX, UY, UXY, P;
3175 double ase1_yx, ase1_xy, ase1_sym;
3178 for (UX = 0., i = 0; i < n_rows; i++)
3179 if (row_tot[i] > 0.)
3180 UX -= row_tot[i] / W * log (row_tot[i] / W);
3182 for (UY = 0., j = 0; j < n_cols; j++)
3183 if (col_tot[j] > 0.)
3184 UY -= col_tot[j] / W * log (col_tot[j] / W);
3186 for (UXY = P = 0., i = 0; i < n_rows; i++)
3187 for (j = 0; j < n_cols; j++)
3189 double entry = mat[j + i * n_cols];
3194 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3195 UXY -= entry / W * log (entry / W);
3198 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3199 for (j = 0; j < n_cols; j++)
3201 double entry = mat[j + i * n_cols];
3206 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3207 + (UX - UXY) * log (col_tot[j] / W));
3208 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3209 + (UY - UXY) * log (row_tot[i] / W));
3210 ase1_sym += entry * sqr ((UXY
3211 * log (row_tot[i] * col_tot[j] / (W * W)))
3212 - (UX + UY) * log (entry / W));
3215 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3216 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3217 t[5] = v[5] / ((2. / (W * (UX + UY)))
3218 * sqrt (P - sqr (UX + UY - UXY) / W));
3220 v[6] = (UX + UY - UXY) / UX;
3221 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3222 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3224 v[7] = (UX + UY - UXY) / UY;
3225 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3226 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3230 if (cmd.a_statistics[CRS_ST_D])
3235 calc_symmetric (NULL, NULL, NULL);
3236 for (i = 0; i < 3; i++)
3238 v[8 + i] = somers_d_v[i];
3239 ase[8 + i] = somers_d_ase[i];
3240 t[8 + i] = somers_d_t[i];
3245 if (cmd.a_statistics[CRS_ST_ETA])
3248 double sum_Xr, sum_X2r;
3252 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3254 sum_Xr += rows[i].f * row_tot[i];
3255 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3257 SX = sum_X2r - sum_Xr * sum_Xr / W;
3259 for (SXW = 0., j = 0; j < n_cols; j++)
3263 for (cum = 0., i = 0; i < n_rows; i++)
3265 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3266 cum += rows[i].f * mat[j + i * n_cols];
3269 SXW -= cum * cum / col_tot[j];
3271 v[11] = sqrt (1. - SXW / SX);
3275 double sum_Yc, sum_Y2c;
3279 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3281 sum_Yc += cols[i].f * col_tot[i];
3282 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3284 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3286 for (SYW = 0., i = 0; i < n_rows; i++)
3290 for (cum = 0., j = 0; j < n_cols; j++)
3292 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3293 cum += cols[j].f * mat[j + i * n_cols];
3296 SYW -= cum * cum / row_tot[i];
3298 v[12] = sqrt (1. - SYW / SY);