1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
32 /* AIX requires this to be the first thing in the file. */
35 #define alloca __builtin_alloca
43 #ifndef alloca /* predefined by HP cc +Olibcalls */
58 #include "dcdflib/cdflib.h"
71 /*#define DEBUGGING 1*/
72 #include "debug-print.h"
78 +missing=miss:!table/include/report;
79 +write[wr_]=none,cells,all;
80 +format=fmt:!labels/nolabels/novallabs,
83 tabl:!tables/notables,
86 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
88 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
94 /* Number of chi-square statistics. */
97 /* Number of symmetric statistics. */
100 /* Number of directional statistics. */
101 #define N_DIRECTIONAL 13
103 /* A single table entry for general mode. */
106 int table; /* Flattened table number. */
109 double freq; /* Frequency count. */
110 double *data; /* Crosstabulation table for integer mode. */
113 union value v[1]; /* Values. */
116 /* A crosstabulation. */
119 int nvar; /* Number of variables. */
120 double missing; /* Missing cases count. */
121 int ofs; /* Integer mode: Offset into sorted_tab[]. */
122 struct variable *v[2]; /* At least two variables; sorted by
123 larger indices first. */
126 /* Indexes into crosstab.v. */
133 /* General mode crosstabulation table. */
134 static struct hsh_table *gen_tab; /* Hash table. */
135 static int n_sorted_tab; /* Number of entries in sorted_tab. */
136 static struct table_entry **sorted_tab; /* Sorted table. */
138 /* VARIABLES dictionary. */
139 static struct dictionary *var_dict;
142 static struct crosstab **xtab;
145 /* Integer or general mode? */
154 static int num_cells; /* Number of cells requested. */
155 static int cells[8]; /* Cells requested. */
156 static int expected; /* Nonzero if expected value is needed. */
159 static int write; /* One of WR_* that specifies the WRITE style. */
161 /* Command parsing info. */
162 static struct cmd_crosstabs cmd;
165 static struct pool *pl_tc; /* For table cells. */
166 static struct pool *pl_col; /* For column data. */
168 static int internal_cmd_crosstabs (void);
169 static void free_var_dict (void);
170 static void precalc (void);
171 static int calc_general (struct ccase *);
172 static int calc_integer (struct ccase *);
173 static void postcalc (void);
174 static void submit (struct tab_table *);
177 static void debug_print (void);
178 static void print_table_entries (struct table_entry **tab);
181 /* Parse and execute CROSSTABS, then clean up. */
185 int result = internal_cmd_crosstabs ();
188 pool_destroy (pl_tc);
189 pool_destroy (pl_col);
194 /* Parses and executes the CROSSTABS procedure. */
196 internal_cmd_crosstabs (void)
201 pl_tc = pool_create ();
202 pl_col = pool_create ();
204 lex_match_id ("CROSSTABS");
205 if (!parse_crosstabs (&cmd))
209 /* Needs var_dict. */
213 mode = var_dict ? INTEGER : GENERAL;
220 cmd.a_cells[CRS_CL_COUNT] = 1;
228 for (i = 0; i < CRS_CL_count; i++)
233 cmd.a_cells[CRS_CL_COUNT] = 1;
234 cmd.a_cells[CRS_CL_ROW] = 1;
235 cmd.a_cells[CRS_CL_COLUMN] = 1;
236 cmd.a_cells[CRS_CL_TOTAL] = 1;
238 if (cmd.a_cells[CRS_CL_ALL])
240 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_ALL] = 0;
244 cmd.a_cells[CRS_CL_NONE] = 0;
245 for (num_cells = i = 0; i < CRS_CL_count; i++)
248 if (i >= CRS_CL_EXPECTED)
250 cmd.a_cells[num_cells++] = i;
255 if (cmd.sbc_statistics)
260 for (i = 0; i < CRS_ST_count; i++)
261 if (cmd.a_statistics[i])
264 cmd.a_statistics[CRS_ST_CHISQ] = 1;
265 if (cmd.a_statistics[CRS_ST_ALL])
266 for (i = 0; i < CRS_ST_count; i++)
267 cmd.a_statistics[i] = 1;
271 if (cmd.miss == CRS_REPORT && mode == GENERAL)
273 msg (SE, _("Missing mode REPORT not allowed in general mode. "
274 "Assuming MISSING=TABLE."));
275 cmd.miss = CRS_TABLE;
279 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
280 cmd.a_write[CRS_WR_ALL] = 0;
281 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
283 msg (SE, _("Write mode ALL not allowed in general mode. "
284 "Assuming WRITE=CELLS."));
285 cmd.a_write[CRS_WR_CELLS] = 1;
288 && (cmd.a_write[CRS_WR_NONE]
289 + cmd.a_write[CRS_WR_ALL]
290 + cmd.a_write[CRS_WR_CELLS] == 0))
291 cmd.a_write[CRS_WR_CELLS] = 1;
292 if (cmd.a_write[CRS_WR_CELLS])
293 write = CRS_WR_CELLS;
294 else if (cmd.a_write[CRS_WR_ALL])
299 update_weighting (&default_dict);
300 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
305 /* Frees var_dict once it's no longer needed. */
315 if (var_dict->var_by_name)
317 avl_destroy (var_dict->var_by_name, NULL);
318 var_dict->var_by_name = NULL;
321 for (i = 0; i < var_dict->nvar; i++)
322 free (var_dict->var[i]);
323 free (var_dict->var);
324 var_dict->var = NULL;
327 free_dictionary (var_dict);
333 /* Parses the TABLES subcommand. */
335 crs_custom_tables (struct cmd_crosstabs *cmd unused)
337 struct dictionary *dict;
339 struct variable ***by = NULL;
344 /* Ensure that this is a TABLES subcommand. */
345 if (!lex_match_id ("TABLES")
346 && (token != T_ID || !is_varname (tokid))
351 dict = var_dict ? var_dict : &default_dict;
355 by = xrealloc (by, sizeof *by * (n_by + 1));
356 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
357 if (!parse_variables (dict, &by[n_by], &by_nvar[n_by],
358 PV_NO_DUPLICATE | PV_NO_SCRATCH))
363 if (!lex_match (T_BY))
367 lex_error (_("expecting BY"));
376 int *by_iter = xcalloc (sizeof *by_iter * n_by);
379 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
380 for (i = 0; i < nx; i++)
384 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
391 if (var_dict == NULL)
392 for (i = 0; i < n_by; i++)
393 x->v[i] = by[i][by_iter[i]];
395 for (i = 0; i < n_by; i++)
396 x->v[i] = default_dict.var[by[i][by_iter[i]]->foo];
402 for (i = n_by - 1; i >= 0; i--)
404 if (++by_iter[i] < by_nvar[i])
416 /* Despite the name, we come here whether we're successful or
422 for (i = 0; i < n_by; i++)
431 /* Parses the VARIABLES subcommand. */
433 crs_custom_variables (struct cmd_crosstabs *cmd unused)
435 struct variable **v = NULL;
440 msg (SE, _("VARIABLES must be specified before TABLES."));
453 if (!parse_variables (&default_dict, &v, &nv,
454 (PV_APPEND | PV_NUMERIC
455 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
460 lex_error ("expecting `('");
465 if (!lex_force_int ())
467 min = lex_integer ();
472 if (!lex_force_int ())
474 max = lex_integer ();
477 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
485 lex_error ("expecting `)'");
490 for (i = orig_nv; i < nv; i++)
492 v[i]->p.crs.min = min;
493 v[i]->p.crs.max = max + 1.;
494 v[i]->p.crs.count = max - min + 1;
504 var_dict = new_dictionary (0);
505 var_dict->var = xmalloc (sizeof *var_dict->var * nv);
507 for (i = 0; i < nv; i++)
509 struct variable *var = xmalloc (offsetof (struct variable, width));
510 strcpy (var->name, v[i]->name);
512 var->type = v[i]->type;
513 var->foo = v[i]->index;
514 var_dict->var[i] = var;
515 avl_force_insert (var_dict->var_by_name, var);
531 printf ("CROSSTABS\n");
537 printf ("\t/VARIABLES=");
538 for (i = 0; i < var_dict->nvar; i++)
540 struct variable *v = var_dict->var[i];
541 struct variable *iv = default_dict.var[v->foo];
543 printf ("%s ", v->name);
544 if (i < var_dict->nvar - 1)
546 struct variable *nv = var_dict->var[i + 1];
547 struct variable *niv = default_dict.var[nv->foo];
549 if (iv->p.crs.min == niv->p.crs.min
550 && iv->p.crs.max == niv->p.crs.max)
553 printf ("(%d,%d) ", iv->p.crs.min, iv->p.crs.max - 1);
561 printf ("\t/TABLES=");
562 for (i = 0; i < nxtab; i++)
564 struct crosstab *x = xtab[i];
569 for (j = 0; j < x->nvar; j++)
573 printf ("%s", x->v[j]->name);
579 #endif /* DEBUGGING */
581 /* Data file processing. */
583 static int compare_table_entry (const void *, const void *, void *);
584 static unsigned hash_table_entry (const void *, void *);
586 /* Set up the crosstabulation tables for processing. */
592 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
602 for (i = 0; i < nxtab; i++)
604 struct crosstab *x = xtab[i];
609 x->ofs = n_sorted_tab;
611 for (j = 2; j < x->nvar; j++)
612 count *= x->v[j - 2]->p.crs.count;
614 sorted_tab = xrealloc (sorted_tab,
615 sizeof *sorted_tab * (n_sorted_tab + count));
616 v = local_alloc (sizeof *v * x->nvar);
617 for (j = 2; j < x->nvar; j++)
618 v[j] = x->v[j]->p.crs.min;
619 for (j = 0; j < count; j++)
621 struct table_entry *te;
624 te = sorted_tab[n_sorted_tab++]
625 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
629 const int mat_size = (x->v[0]->p.crs.count
630 * x->v[1]->p.crs.count);
633 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
634 for (m = 0; m < mat_size; m++)
638 for (k = 2; k < x->nvar; k++)
640 for (k = 2; k < x->nvar; k++)
641 if (++v[k] >= x->v[k]->p.crs.max)
642 v[k] = x->v[k]->p.crs.min;
649 sorted_tab = xrealloc (sorted_tab,
650 sizeof *sorted_tab * (n_sorted_tab + 1));
651 sorted_tab[n_sorted_tab] = NULL;
655 /* Form crosstabulations for general mode. */
657 calc_general (struct ccase *c)
660 double w = (default_dict.weight_index != -1
661 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
664 /* Flattened current table index. */
667 for (t = 0; t < nxtab; t++)
669 struct crosstab *x = xtab[t];
670 const size_t entry_size = (sizeof (struct table_entry)
671 + sizeof (union value) * (x->nvar - 1));
672 struct table_entry *te = local_alloc (entry_size);
674 /* Construct table entry for the current record and table. */
680 for (j = 0; j < x->nvar; j++)
682 if ((cmd.miss == CRS_TABLE
683 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
684 || (cmd.miss == CRS_INCLUDE
685 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
691 if (x->v[j]->type == NUMERIC)
692 te->v[j].f = c->data[x->v[j]->fv].f;
695 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
697 /* Necessary in order to simplify comparisons. */
698 memset (&te->v[j].s[x->v[j]->width], 0,
699 sizeof (union value) - x->v[j]->width);
704 /* Add record to hash table. */
706 struct table_entry **tepp = (struct table_entry **) hsh_probe (gen_tab, te);
709 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
712 memcpy (tep, te, entry_size);
717 (*tepp)->u.freq += w;
728 calc_integer (struct ccase *c)
731 double w = (default_dict.weight_index != -1
732 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
735 /* Flattened current table index. */
738 for (t = 0; t < nxtab; t++)
740 struct crosstab *x = xtab[t];
745 for (i = 0; i < x->nvar; i++)
747 struct variable *const v = x->v[i];
748 double value = c->data[v->fv].f;
750 /* Note that the first test also rules out SYSMIS. */
751 if ((value < v->p.crs.min || value >= v->p.crs.max)
752 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
760 ofs += fact * ((int) value - v->p.crs.min);
761 fact *= v->p.crs.count;
766 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
767 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
768 const int col_dim = x->v[COL_VAR]->p.crs.count;
770 sorted_tab[ofs]->u.data[col + row * col_dim] += w;
780 /* Print out all table entries in NULL-terminated TAB for use by a
781 debugger (a person, not a program). */
783 print_table_entries (struct table_entry **tab)
785 printf ("raw crosstabulation data:\n");
788 const struct crosstab *x = xtab[(*tab)->table];
791 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
792 for (i = 0; i < x->nvar; i++)
796 printf ("%s:", x->v[i]->name);
798 if (x->v[i]->type == NUMERIC)
799 printf ("%g", (*tab)->v[i].f);
801 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
809 /* Compare the table_entry's at PA and PB and return a strcmp()-type
812 compare_table_entry (const void *pa, const void *pb, void *foo unused)
814 const struct table_entry *a = pa;
815 const struct table_entry *b = pb;
818 const int difftable = a->table - b->table;
824 const struct crosstab *x = xtab[a->table];
827 for (i = x->nvar - 1; i >= 0; i--)
828 if (x->v[i]->type == NUMERIC)
830 const double diffnum = a->v[i].f - b->v[i].f;
833 else if (diffnum > 0)
838 assert (x->v[i]->type == ALPHA);
840 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
850 /* Calculate a hash value from table_entry PA. */
852 hash_table_entry (const void *pa, void *foo unused)
854 const struct table_entry *a = pa;
855 unsigned long hash = a->table;
858 /* Hash formula from _SPSS Statistical Algorithms_. */
859 for (i = 0; i < xtab[a->table]->nvar; i++)
861 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
862 hash ^= a->v[i].hash[0];
863 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
864 hash ^= a->v[i].hash[1];
871 /* Post-data reading calculations. */
873 static struct table_entry **find_pivot_extent (struct table_entry **, int *cnt, int pivot);
874 static void enum_var_values (struct table_entry **beg, int cnt,
875 union value **values, int *nvalues,
877 static void output_pivot_table (struct table_entry **, struct table_entry **,
878 double **, double **, double **,
879 int *, int *, int *);
880 static void make_summary_table (void);
887 n_sorted_tab = hsh_count (gen_tab);
888 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
890 print_table_entries (sorted_tab);
894 make_summary_table ();
896 /* Identify all the individual crosstabulation tables, and deal with
899 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
900 int pc = n_sorted_tab; /* Pivot count. */
902 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
903 int maxrows = 0, maxcols = 0, maxcells = 0;
907 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
911 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
912 &maxrows, &maxcols, &maxcells);
921 hsh_destroy (gen_tab);
924 static void insert_summary (struct tab_table *, int tab_index, double valid);
926 /* Output a table summarizing the cases processed. */
928 make_summary_table (void)
930 struct tab_table *summary;
932 struct table_entry **pb = sorted_tab, **pe;
933 int pc = n_sorted_tab;
936 summary = tab_create (7, 3 + nxtab, 1);
937 tab_title (summary, 0, _("Summary."));
938 tab_headers (summary, 1, 0, 3, 0);
939 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
940 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
941 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
942 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
943 tab_hline (summary, TAL_1, 1, 6, 1);
944 tab_hline (summary, TAL_1, 1, 6, 2);
945 tab_vline (summary, TAL_1, 3, 1, 1);
946 tab_vline (summary, TAL_1, 5, 1, 1);
950 for (i = 0; i < 3; i++)
952 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
953 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
956 tab_offset (summary, 0, 3);
962 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
966 while (cur_tab < (*pb)->table)
967 insert_summary (summary, cur_tab++, 0.);
970 for (valid = 0.; pb < pe; pb++)
971 valid += (*pb)->u.freq;
974 const struct crosstab *const x = xtab[(*pb)->table];
975 const int n_cols = x->v[COL_VAR]->p.crs.count;
976 const int n_rows = x->v[ROW_VAR]->p.crs.count;
977 const int count = n_cols * n_rows;
979 for (valid = 0.; pb < pe; pb++)
981 const double *data = (*pb)->u.data;
984 for (i = 0; i < count; i++)
988 insert_summary (summary, cur_tab++, valid);
993 while (cur_tab < nxtab)
994 insert_summary (summary, cur_tab++, 0.);
999 /* Inserts a line into T describing the crosstabulation at index
1000 TAB_INDEX, which has VALID valid observations. */
1002 insert_summary (struct tab_table *t, int tab_index, double valid)
1004 struct crosstab *x = xtab[tab_index];
1006 tab_hline (t, TAL_1, 0, 6, 0);
1008 /* Crosstabulation name. */
1010 char *buf = local_alloc (128 * x->nvar);
1014 for (i = 0; i < x->nvar; i++)
1017 cp = stpcpy (cp, " * ");
1019 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1021 tab_text (t, 0, 0, TAB_LEFT, buf);
1026 /* Counts and percentages. */
1036 for (i = 0; i < 3; i++)
1038 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
1039 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
1040 n[i] / n[2] * 100.);
1050 static struct tab_table *table; /* Crosstabulation table. */
1051 static struct tab_table *chisq; /* Chi-square table. */
1052 static struct tab_table *sym; /* Symmetric measures table. */
1053 static struct tab_table *risk; /* Risk estimate table. */
1054 static struct tab_table *direct; /* Directional measures table. */
1057 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
1059 /* Column values, number of columns. */
1060 static union value *cols;
1063 /* Row values, number of rows. */
1064 static union value *rows;
1067 /* Number of statistically interesting columns/rows (columns/rows with
1069 static int ns_cols, ns_rows;
1071 /* Crosstabulation. */
1072 static struct crosstab *x;
1074 /* Number of variables from the crosstabulation to consider. This is
1075 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1078 /* Matrix contents. */
1079 static double *mat; /* Matrix proper. */
1080 static double *row_tot; /* Row totals. */
1081 static double *col_tot; /* Column totals. */
1082 static double W; /* Grand total. */
1084 static void display_dimensions (struct tab_table *, int first_difference,
1085 struct table_entry *);
1086 static void display_crosstabulation (void);
1087 static void display_chisq (void);
1088 static void display_symmetric (void);
1089 static void display_risk (void);
1090 static void display_directional (void);
1091 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1092 static void table_value_missing (struct tab_table *table, int c, int r,
1093 unsigned char opt, const union value *v,
1094 const struct variable *var);
1095 static void delete_missing (void);
1097 /* Output pivot table beginning at PB and continuing until PE,
1098 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1099 hold *MAXROWS entries. */
1101 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1102 double **matp, double **row_totp, double **col_totp,
1103 int *maxrows, int *maxcols, int *maxcells)
1106 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1107 int tc = pe - pb; /* Table count. */
1109 /* Table entry for header comparison. */
1110 struct table_entry *cmp;
1112 x = xtab[(*pb)->table];
1113 enum_var_values (pb, pe - pb, &cols, &n_cols, COL_VAR);
1115 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1117 /* Crosstabulation table initialization. */
1120 table = tab_create (nvar + n_cols,
1121 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1122 tab_headers (table, nvar - 1, 0, 2, 0);
1124 /* First header line. */
1125 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1126 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1128 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1130 /* Second header line. */
1134 for (i = 2; i < nvar; i++)
1135 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1136 TAB_RIGHT | TAT_TITLE,
1137 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1138 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1139 x->v[ROW_VAR]->name);
1140 for (i = 0; i < n_cols; i++)
1141 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1143 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1146 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1147 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1151 char *title = local_alloc (x->nvar * 64 + 128);
1155 if (cmd.pivot == CRS_PIVOT)
1156 for (i = 0; i < nvar; i++)
1159 cp = stpcpy (cp, " by ");
1160 cp = stpcpy (cp, x->v[i]->name);
1164 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1165 for (i = 2; i < nvar; i++)
1167 char buf[64], *bufp;
1172 cp = stpcpy (cp, x->v[i]->name);
1174 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1175 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1177 cp = stpcpy (cp, bufp);
1181 cp = stpcpy (cp, " [");
1182 for (i = 0; i < num_cells; i++)
1190 static const struct tuple cell_names[] =
1192 {CRS_CL_COUNT, N_("count")},
1193 {CRS_CL_ROW, N_("row %")},
1194 {CRS_CL_COLUMN, N_("column %")},
1195 {CRS_CL_TOTAL, N_("total %")},
1196 {CRS_CL_EXPECTED, N_("expected")},
1197 {CRS_CL_RESIDUAL, N_("residual")},
1198 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1199 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1203 const struct tuple *t;
1205 for (t = cell_names; t->value != cells[i]; t++)
1206 assert (t->value != -1);
1208 cp = stpcpy (cp, ", ");
1209 cp = stpcpy (cp, gettext (t->name));
1213 tab_title (table, 0, title);
1217 tab_offset (table, 0, 2);
1222 /* Chi-square table initialization. */
1223 if (cmd.a_statistics[CRS_ST_CHISQ])
1225 chisq = tab_create (6 + (nvar - 2),
1226 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1227 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1229 tab_title (chisq, 0, "Chi-square tests.");
1231 tab_offset (chisq, nvar - 2, 0);
1232 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1233 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1234 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1235 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1236 _("Asymp. Sig. (2-sided)"));
1237 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1238 _("Exact. Sig. (2-sided)"));
1239 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1240 _("Exact. Sig. (1-sided)"));
1242 tab_offset (chisq, 0, 1);
1247 /* Symmetric measures. */
1248 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1249 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1250 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1251 || cmd.a_statistics[CRS_ST_KAPPA])
1253 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1254 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1255 tab_title (sym, 0, "Symmetric measures.");
1257 tab_offset (sym, nvar - 2, 0);
1258 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1259 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1260 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1261 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1262 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1263 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1264 tab_offset (sym, 0, 1);
1269 /* Risk estimate. */
1270 if (cmd.a_statistics[CRS_ST_RISK])
1272 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1273 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1274 tab_title (risk, 0, "Risk estimate.");
1276 tab_offset (risk, nvar - 2, 0);
1277 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1278 _(" 95%% Confidence Interval"));
1279 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1280 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1281 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1282 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1283 tab_hline (risk, TAL_1, 2, 3, 1);
1284 tab_vline (risk, TAL_1, 2, 0, 1);
1285 tab_offset (risk, 0, 2);
1290 /* Directional measures. */
1291 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1292 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1294 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1295 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1296 tab_title (direct, 0, "Directional measures.");
1298 tab_offset (direct, nvar - 2, 0);
1299 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1300 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1301 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1302 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1303 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1304 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1305 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1306 tab_offset (direct, 0, 1);
1313 /* Find pivot subtable if applicable. */
1314 te = find_pivot_extent (tb, &tc, 0);
1318 /* Find all the row variable values. */
1319 enum_var_values (tb, te - tb, &rows, &n_rows, ROW_VAR);
1321 /* Allocate memory space for the column and row totals. */
1322 if (n_rows > *maxrows)
1324 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1325 row_tot = *row_totp;
1328 if (n_cols > *maxcols)
1330 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1331 col_tot = *col_totp;
1335 /* Allocate table space for the matrix. */
1336 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1337 tab_realloc (table, -1,
1338 max (tab_nr (table) + (n_rows + 1) * num_cells,
1339 tab_nr (table) * (pe - pb) / (te - tb)));
1341 if (mode == GENERAL)
1343 /* Allocate memory space for the matrix. */
1344 if (n_cols * n_rows > *maxcells)
1346 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1347 *maxcells = n_cols * n_rows;
1352 /* Build the matrix and calculate column totals. */
1354 union value *cur_col = cols;
1355 union value *cur_row = rows;
1357 double *cp = col_tot;
1358 struct table_entry **p;
1361 for (p = &tb[0]; p < te; p++)
1363 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1367 for (; cur_row < &rows[n_rows]; cur_row++)
1373 mp = &mat[cur_col - cols];
1376 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1383 *cp += *mp = (*p)->u.freq;
1388 /* Zero out the rest of the matrix. */
1389 for (; cur_row < &rows[n_rows]; cur_row++)
1395 if (cur_col < &cols[n_cols])
1397 const int rem_cols = n_cols - (cur_col - cols);
1400 for (c = 0; c < rem_cols; c++)
1402 mp = &mat[cur_col - cols];
1403 for (r = 0; r < n_rows; r++)
1405 for (c = 0; c < rem_cols; c++)
1407 mp += n_cols - rem_cols;
1415 double *tp = col_tot;
1417 assert (mode == INTEGER);
1418 mat = (*tb)->u.data;
1421 /* Calculate column totals. */
1422 for (c = 0; c < n_cols; c++)
1425 double *cp = &mat[c];
1427 for (r = 0; r < n_rows; r++)
1428 cum += cp[r * n_cols];
1436 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1437 ns_cols += *cp != 0.;
1440 /* Calculate row totals. */
1443 double *rp = row_tot;
1446 for (ns_rows = 0, r = n_rows; r--; )
1449 for (c = n_cols; c--; )
1457 /* Calculate grand total. */
1463 if (n_rows < n_cols)
1464 tp = row_tot, n = n_rows;
1466 tp = col_tot, n = n_cols;
1473 /* Print the matrix. */
1477 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1478 for (i = 2; i < nvar; i++)
1479 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1482 for (c = 0; c < n_cols; c++)
1483 printf ("%4g", cols[c].f);
1485 for (r = 0; r < n_rows; r++)
1487 printf ("%4g:", rows[r].f);
1488 for (c = 0; c < n_cols; c++)
1489 printf ("%4g", mat[c + r * n_cols]);
1490 printf ("%4g", row_tot[r]);
1494 for (c = 0; c < n_cols; c++)
1495 printf ("%4g", col_tot[c]);
1501 /* Find the first variable that differs from the last subtable,
1502 then display the values of the dimensioning variables for
1503 each table that needs it. */
1505 int first_difference = nvar - 1;
1508 for (; ; first_difference--)
1510 assert (first_difference >= 2);
1511 if (memcmp (&cmp->v[first_difference],
1512 &(*tb)->v[first_difference], sizeof *cmp->v))
1518 display_dimensions (table, first_difference, *tb);
1520 display_dimensions (chisq, first_difference, *tb);
1522 display_dimensions (sym, first_difference, *tb);
1524 display_dimensions (risk, first_difference, *tb);
1526 display_dimensions (direct, first_difference, *tb);
1530 display_crosstabulation ();
1531 if (cmd.miss == CRS_REPORT)
1536 display_symmetric ();
1540 display_directional ();
1551 tab_resize (chisq, 4 + (nvar - 2), -1);
1562 /* Delete missing rows and columns for statistical analysis when
1565 delete_missing (void)
1570 for (r = 0; r < n_rows; r++)
1571 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1575 for (c = 0; c < n_cols; c++)
1576 mat[c + r * n_cols] = 0.;
1584 for (c = 0; c < n_cols; c++)
1585 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1589 for (r = 0; r < n_rows; r++)
1590 mat[c + r * n_cols] = 0.;
1596 /* Prepare table T for submission, and submit it. */
1598 submit (struct tab_table *t)
1605 tab_resize (t, -1, 0);
1606 if (tab_nr (t) == tab_t (t))
1611 tab_offset (t, 0, 0);
1613 for (i = 2; i < nvar; i++)
1614 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1615 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1616 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1617 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1619 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1621 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1622 tab_dim (t, crosstabs_dim);
1626 /* Sets the widths of all the columns and heights of all the rows in
1627 table T for driver D. */
1629 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1633 /* Width of a numerical column. */
1634 int c = outp_string_width (d, "0.000000");
1635 if (cmd.miss == CRS_REPORT)
1636 c += outp_string_width (d, "M");
1638 /* Set width for header columns. */
1641 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1643 if (w < d->prop_em_width * 8)
1644 w = d->prop_em_width * 8;
1646 if (w > d->prop_em_width * 15)
1647 w = d->prop_em_width * 15;
1649 for (i = 0; i < t->l; i++)
1653 for (i = t->l; i < t->nc; i++)
1656 for (i = 0; i < t->nr; i++)
1657 t->h[i] = tab_natural_height (t, d, i);
1660 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1661 int *cnt, int pivot);
1662 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1663 int *cnt, int pivot);
1665 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1667 static struct table_entry **
1668 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1670 return (mode == GENERAL
1671 ? find_pivot_extent_general (tp, cnt, pivot)
1672 : find_pivot_extent_integer (tp, cnt, pivot));
1675 /* Find the extent of a region in TP that contains one table. If
1676 PIVOT != 0 that means a set of table entries with identical table
1677 number; otherwise they also have to have the same values for every
1678 dimension after the row and column dimensions. The table that is
1679 searched starts at TP and has length CNT. Returns the first entry
1680 after the last one in the table; sets *CNT to the number of
1681 remaining values. If there are no entries in TP at all, returns
1682 NULL. A yucky interface, admittedly, but it works. */
1683 static struct table_entry **
1684 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1686 struct table_entry *fp = *tp;
1691 x = xtab[(*tp)->table];
1699 if ((*tp)->table != fp->table)
1704 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1711 /* Integer mode correspondent to find_pivot_extent_general(). This
1712 could be optimized somewhat, but I just don't give a crap about
1713 CROSSTABS performance in integer mode, which is just a wart on
1714 CROSSTABS' ass as far as I'm concerned.
1716 That said, feel free to send optimization patches to me. */
1717 static struct table_entry **
1718 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1720 struct table_entry *fp = *tp;
1725 x = xtab[(*tp)->table];
1733 if ((*tp)->table != fp->table)
1738 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1745 /* Compare value * A and B, where WIDTH is the string width or 0 for
1746 numerics, and return a strcmp()-type result. */
1748 compare_value (const void *pa, const void *pb, void *pwidth)
1750 const union value *a = pa;
1751 const union value *b = pb;
1752 const int width = (int) pwidth;
1755 return strncmp (a->s, b->s, width);
1757 return a->f < b->f ? -1 : (a->f > b->f ? 1 : 0);
1760 /* Given a list of CNT table_entry's starting at BEG, creates a list
1761 of *NVALUES values *VALUES of variable with index VAR_INDEX. */
1763 enum_var_values (struct table_entry **beg, int cnt, union value **values, int *nvalues,
1766 if (mode == GENERAL)
1770 tree = avl_create (pl_col, compare_value,
1771 (void *) (xtab[(*beg)->table]->v[var_index]->width));
1776 for (i = 0; i < cnt; i++)
1777 avl_insert (tree, &beg[i]->v[var_index]);
1778 *values = xmalloc (sizeof **values * avl_count (tree));
1786 avl_traverser_init(trav);
1788 while (NULL != (v = avl_traverse (tree, &trav)))
1789 (*values)[i++] = *v;
1794 pool_destroy (pl_col);
1795 pl_col = pool_create ();
1799 struct crosstab_proc *crs = &xtab[(*beg)->table]->v[var_index]->p.crs;
1802 assert (mode == INTEGER);
1803 *values = xmalloc (sizeof **values * crs->count);
1804 for (i = 0; i < crs->count; i++)
1805 (*values)[i].f = i + crs->min;
1806 *nvalues = crs->count;
1810 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1811 from V, displayed with print format spec from variable VAR. When
1812 in REPORT missing-value mode, missing values have an M appended. */
1814 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1815 const union value *v, const struct variable *var)
1817 struct len_string s;
1819 char *label = get_val_lab (var, *v, 0);
1822 tab_text (table, c, r, TAB_LEFT, label);
1826 s.length = var->print.w;
1827 s.string = tab_alloc (table, s.length + 1);
1828 data_out (s.string, &var->print, v);
1829 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1830 s.string[s.length++] = 'M';
1831 while (s.length && *s.string == ' ')
1836 tab_raw (table, c, r, opt, &s);
1839 /* Draws a line across TABLE at the current row to indicate the most
1840 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1841 that changed, and puts the values that changed into the table. TB
1842 and X must be the corresponding table_entry and crosstab,
1845 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1847 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1849 for (; first_difference >= 2; first_difference--)
1850 table_value_missing (table, nvar - first_difference - 1, 0,
1851 TAB_RIGHT, &tb->v[first_difference],
1852 x->v[first_difference]);
1855 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1857 float_M_suffix (struct tab_table *table, int c, int r, double v)
1859 static const struct fmt_spec f = {FMT_F, 8, 0};
1860 struct len_string s;
1863 s.string = tab_alloc (table, 9);
1865 data_out (s.string, &f, (union value *) &v);
1866 while (*s.string == ' ')
1871 tab_raw (table, c, r, TAB_RIGHT, &s);
1874 /* Displays the crosstabulation table. */
1876 display_crosstabulation (void)
1881 for (r = 0; r < n_rows; r++)
1882 table_value_missing (table, nvar - 2, r * num_cells,
1883 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1885 tab_text (table, nvar - 2, n_rows * num_cells,
1886 TAB_LEFT, _("Total"));
1888 /* Put in the actual cells. */
1893 tab_offset (table, nvar - 1, -1);
1894 for (r = 0; r < n_rows; r++)
1897 tab_hline (table, TAL_1, -1, n_cols, 0);
1898 for (c = 0; c < n_cols; c++)
1900 double expected_value;
1903 expected_value = row_tot[r] * col_tot[c] / W;
1904 for (i = 0; i < num_cells; i++)
1914 v = *mp / row_tot[r] * 100.;
1917 v = *mp / col_tot[c] * 100.;
1922 case CRS_CL_EXPECTED:
1925 case CRS_CL_RESIDUAL:
1926 v = *mp - expected_value;
1928 case CRS_CL_SRESIDUAL:
1929 v = (*mp - expected_value) / sqrt (expected_value);
1931 case CRS_CL_ASRESIDUAL:
1932 v = ((*mp - expected_value)
1933 / sqrt (expected_value
1934 * (1. - row_tot[r] / W)
1935 * (1. - col_tot[c] / W)));
1941 if (cmd.miss == CRS_REPORT
1942 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1943 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1944 float_M_suffix (table, c, i, v);
1946 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1952 tab_offset (table, -1, tab_row (table) + num_cells);
1960 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1961 for (r = 0; r < n_rows; r++)
1962 for (i = 0; i < num_cells; i++)
1975 v = row_tot[r] / W * 100.;
1978 v = row_tot[r] / W * 100.;
1980 case CRS_CL_EXPECTED:
1981 case CRS_CL_RESIDUAL:
1982 case CRS_CL_SRESIDUAL:
1983 case CRS_CL_ASRESIDUAL:
1990 if (cmd.miss == CRS_REPORT
1991 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1992 float_M_suffix (table, n_cols, 0, v);
1994 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1996 tab_next_row (table);
2000 /* Column totals, grand total. */
2005 tab_hline (table, TAL_1, -1, n_cols, 0);
2006 for (c = 0; c <= n_cols; c++)
2008 double ct = c < n_cols ? col_tot[c] : W;
2011 for (i = j = 0; i < num_cells; i++)
2029 case CRS_CL_EXPECTED:
2030 case CRS_CL_RESIDUAL:
2031 case CRS_CL_SRESIDUAL:
2032 case CRS_CL_ASRESIDUAL:
2038 if (cmd.miss == CRS_REPORT && c < n_cols
2039 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
2040 float_M_suffix (table, c, j, v);
2042 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
2048 tab_offset (table, -1, tab_row (table) + j);
2051 tab_offset (table, 0, -1);
2054 static void calc_r (double *X, double *Y, double *, double *, double *);
2055 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
2057 /* Display chi-square statistics. */
2059 display_chisq (void)
2061 static const char *chisq_stats[N_CHISQ] =
2063 N_("Pearson Chi-Square"),
2064 N_("Likelihood Ratio"),
2065 N_("Fisher's Exact Test"),
2066 N_("Continuity Correction"),
2067 N_("Linear-by-Linear Association"),
2069 double chisq_v[N_CHISQ];
2070 double fisher1, fisher2;
2076 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2078 tab_offset (chisq, nvar - 2, -1);
2080 for (i = 0; i < N_CHISQ; i++)
2082 if ((i != 2 && chisq_v[i] == SYSMIS)
2083 || (i == 2 && fisher1 == SYSMIS))
2087 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2090 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2091 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2092 tab_float (chisq, 3, 0, TAB_RIGHT,
2093 chisq_sig (chisq_v[i], df[i]), 8, 3);
2098 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2099 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2101 tab_next_row (chisq);
2104 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2105 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2106 tab_next_row (chisq);
2108 tab_offset (chisq, 0, -1);
2111 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2112 double[N_SYMMETRIC]);
2114 /* Display symmetric measures. */
2116 display_symmetric (void)
2118 static const char *categories[] =
2120 N_("Nominal by Nominal"),
2121 N_("Ordinal by Ordinal"),
2122 N_("Interval by Interval"),
2123 N_("Measure of Agreement"),
2126 static const char *stats[N_SYMMETRIC] =
2130 N_("Contingency Coefficient"),
2131 N_("Kendall's tau-b"),
2132 N_("Kendall's tau-c"),
2134 N_("Spearman Correlation"),
2139 static const int stats_categories[N_SYMMETRIC] =
2141 0, 0, 0, 1, 1, 1, 1, 2, 3,
2145 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2148 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2151 tab_offset (sym, nvar - 2, -1);
2153 for (i = 0; i < N_SYMMETRIC; i++)
2155 if (sym_v[i] == SYSMIS)
2158 if (stats_categories[i] != last_cat)
2160 last_cat = stats_categories[i];
2161 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2164 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2165 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2166 if (sym_ase[i] != SYSMIS)
2167 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2168 if (sym_t[i] != SYSMIS)
2169 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2170 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2174 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2175 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2178 tab_offset (sym, 0, -1);
2181 static int calc_risk (double[], double[], double[], union value *);
2183 /* Display risk estimate. */
2188 double risk_v[3], lower[3], upper[3];
2192 if (!calc_risk (risk_v, upper, lower, c))
2195 tab_offset (risk, nvar - 2, -1);
2197 for (i = 0; i < 3; i++)
2199 if (risk_v[i] == SYSMIS)
2205 if (x->v[COL_VAR]->type == NUMERIC)
2206 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2207 x->v[COL_VAR]->name, c[0].f, c[1].f);
2209 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2210 x->v[COL_VAR]->name,
2211 x->v[COL_VAR]->width, c[0].s,
2212 x->v[COL_VAR]->width, c[1].s);
2216 if (x->v[ROW_VAR]->type == NUMERIC)
2217 sprintf (buf, _("For cohort %s = %g"),
2218 x->v[ROW_VAR]->name, rows[i - 1].f);
2220 sprintf (buf, _("For cohort %s = %.*s"),
2221 x->v[ROW_VAR]->name,
2222 x->v[ROW_VAR]->width, rows[i - 1].s);
2226 tab_text (risk, 0, 0, TAB_LEFT, buf);
2227 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2228 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2229 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2230 tab_next_row (risk);
2233 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2234 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2235 tab_next_row (risk);
2237 tab_offset (risk, 0, -1);
2240 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2241 double[N_DIRECTIONAL]);
2243 /* Display directional measures. */
2245 display_directional (void)
2247 static const char *categories[] =
2249 N_("Nominal by Nominal"),
2250 N_("Ordinal by Ordinal"),
2251 N_("Nominal by Interval"),
2254 static const char *stats[] =
2257 N_("Goodman and Kruskal tau"),
2258 N_("Uncertainty Coefficient"),
2263 static const char *types[] =
2270 static const int stats_categories[N_DIRECTIONAL] =
2272 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2275 static const int stats_stats[N_DIRECTIONAL] =
2277 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2280 static const int stats_types[N_DIRECTIONAL] =
2282 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2285 static const int *stats_lookup[] =
2292 static const char **stats_names[] =
2304 double direct_v[N_DIRECTIONAL];
2305 double direct_ase[N_DIRECTIONAL];
2306 double direct_t[N_DIRECTIONAL];
2310 if (!calc_directional (direct_v, direct_ase, direct_t))
2313 tab_offset (direct, nvar - 2, -1);
2315 for (i = 0; i < N_DIRECTIONAL; i++)
2317 if (direct_v[i] == SYSMIS)
2323 for (j = 0; j < 3; j++)
2324 if (last[j] != stats_lookup[j][i])
2327 tab_hline (direct, TAL_1, j, 6, 0);
2332 int k = last[j] = stats_lookup[j][i];
2337 string = x->v[0]->name;
2339 string = x->v[1]->name;
2341 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2342 gettext (stats_names[j][k]), string);
2347 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2348 if (direct_ase[i] != SYSMIS)
2349 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2350 if (direct_t[i] != SYSMIS)
2351 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2352 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2353 tab_next_row (direct);
2356 tab_offset (direct, 0, -1);
2359 /* Statistical calculations. */
2361 /* Returns the value of the gamma (factorial) function for an integer
2364 gamma_int (double x)
2369 for (i = 2; i < x; i++)
2374 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2376 static inline double
2377 Pr (int a, int b, int c, int d)
2379 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2380 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2381 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2382 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2383 / gamma_int (a + b + c + d + 1.));
2386 /* Swap the contents of A and B. */
2388 swap (int *a, int *b)
2395 /* Calculate significance for Fisher's exact test as specified in
2396 _SPSS Statistical Algorithms_, Appendix 5. */
2398 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2402 if (min (c, d) < min (a, b))
2403 swap (&a, &c), swap (&b, &d);
2404 if (min (b, d) < min (a, c))
2405 swap (&a, &b), swap (&c, &d);
2409 swap (&a, &b), swap (&c, &d);
2411 swap (&a, &c), swap (&b, &d);
2415 for (x = 0; x <= a; x++)
2416 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2418 *fisher2 = *fisher1;
2419 for (x = 1; x <= b; x++)
2420 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2423 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2424 columns with values COLS and N_ROWS rows with values ROWS. Values
2425 in the matrix sum to W. */
2427 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2428 double *fisher1, double *fisher2)
2432 chisq[0] = chisq[1] = 0.;
2433 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2434 *fisher1 = *fisher2 = SYSMIS;
2436 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2438 if (ns_rows <= 1 || ns_cols <= 1)
2440 chisq[0] = chisq[1] = SYSMIS;
2444 for (r = 0; r < n_rows; r++)
2445 for (c = 0; c < n_cols; c++)
2447 const double expected = row_tot[r] * col_tot[c] / W;
2448 const double freq = mat[n_cols * r + c];
2449 const double residual = freq - expected;
2452 chisq[0] += residual * residual / expected;
2454 chisq[1] += freq * log (expected / freq);
2465 /* Calculate Yates and Fisher exact test. */
2466 if (ns_cols == 2 && ns_rows == 2)
2468 double f11, f12, f21, f22;
2474 for (i = j = 0; i < n_cols; i++)
2475 if (col_tot[i] != 0.)
2484 f11 = mat[nz_cols[0]];
2485 f12 = mat[nz_cols[1]];
2486 f21 = mat[nz_cols[0] + n_cols];
2487 f22 = mat[nz_cols[1] + n_cols];
2492 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2495 chisq[3] = (W * x * x
2496 / (f11 + f12) / (f21 + f22)
2497 / (f11 + f21) / (f12 + f22));
2505 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2506 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2509 /* Calculate Mantel-Haenszel. */
2510 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2512 double r, ase_0, ase_1;
2513 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2515 chisq[4] = (W - 1.) * r * r;
2520 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2521 ASE_1, and ase_0 into ASE_0. The row and column values must be
2522 passed in X and Y. */
2524 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2526 double SX, SY, S, T;
2528 double sum_XYf, sum_X2Y2f;
2529 double sum_Xr, sum_X2r;
2530 double sum_Yc, sum_Y2c;
2533 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2534 for (j = 0; j < n_cols; j++)
2536 double fij = mat[j + i * n_cols];
2537 double product = X[i] * Y[j];
2538 double temp = fij * product;
2540 sum_X2Y2f += temp * product;
2543 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2545 sum_Xr += X[i] * row_tot[i];
2546 sum_X2r += X[i] * X[i] * row_tot[i];
2550 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2552 sum_Yc += Y[i] * col_tot[i];
2553 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2557 S = sum_XYf - sum_Xr * sum_Yc / W;
2558 SX = sum_X2r - sum_Xr * sum_Xr / W;
2559 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2562 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2567 for (s = c = 0., i = 0; i < n_rows; i++)
2568 for (j = 0; j < n_cols; j++)
2570 double Xresid, Yresid;
2573 Xresid = X[i] - Xbar;
2574 Yresid = Y[j] - Ybar;
2575 temp = (T * Xresid * Yresid
2577 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2578 y = mat[j + i * n_cols] * temp * temp - c;
2583 *ase_1 = sqrt (s) / (T * T);
2587 static double somers_d_v[3];
2588 static double somers_d_ase[3];
2589 static double somers_d_t[3];
2591 /* Calculate symmetric statistics and their asymptotic standard
2592 errors. Returns 0 if none could be calculated. */
2594 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2595 double t[N_SYMMETRIC])
2597 int q = min (ns_rows, ns_cols);
2606 for (i = 0; i < N_SYMMETRIC; i++)
2607 v[i] = ase[i] = t[i] = SYSMIS;
2610 /* Phi, Cramer's V, contingency coefficient. */
2611 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2613 double Xp = 0.; /* Pearson chi-square. */
2618 for (r = 0; r < n_rows; r++)
2619 for (c = 0; c < n_cols; c++)
2621 const double expected = row_tot[r] * col_tot[c] / W;
2622 const double freq = mat[n_cols * r + c];
2623 const double residual = freq - expected;
2626 Xp += residual * residual / expected;
2630 if (cmd.a_statistics[CRS_ST_PHI])
2632 v[0] = sqrt (Xp / W);
2633 v[1] = sqrt (Xp / (W * (q - 1)));
2635 if (cmd.a_statistics[CRS_ST_CC])
2636 v[2] = sqrt (Xp / (Xp + W));
2639 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2640 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2645 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2652 for (r = 0; r < n_rows; r++)
2653 Dr -= row_tot[r] * row_tot[r];
2654 for (c = 0; c < n_cols; c++)
2655 Dc -= col_tot[c] * col_tot[c];
2661 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2662 for (c = 0; c < n_cols; c++)
2666 for (r = 0; r < n_rows; r++)
2667 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2677 for (i = 0; i < n_rows; i++)
2681 for (j = 1; j < n_cols; j++)
2682 Cij += col_tot[j] - cum[j + i * n_cols];
2685 for (j = 1; j < n_cols; j++)
2686 Dij += cum[j + (i - 1) * n_cols];
2690 double fij = mat[j + i * n_cols];
2696 assert (j < n_cols);
2698 Cij -= col_tot[j] - cum[j + i * n_cols];
2699 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2703 Cij += cum[j - 1 + (i - 1) * n_cols];
2704 Dij -= cum[j + (i - 1) * n_cols];
2710 if (cmd.a_statistics[CRS_ST_BTAU])
2711 v[3] = (P - Q) / sqrt (Dr * Dc);
2712 if (cmd.a_statistics[CRS_ST_CTAU])
2713 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2714 if (cmd.a_statistics[CRS_ST_GAMMA])
2715 v[5] = (P - Q) / (P + Q);
2717 /* ASE for tau-b, tau-c, gamma. Calculations could be
2718 eliminated here, at expense of memory. */
2723 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2724 for (i = 0; i < n_rows; i++)
2728 for (j = 1; j < n_cols; j++)
2729 Cij += col_tot[j] - cum[j + i * n_cols];
2732 for (j = 1; j < n_cols; j++)
2733 Dij += cum[j + (i - 1) * n_cols];
2737 double fij = mat[j + i * n_cols];
2739 if (cmd.a_statistics[CRS_ST_BTAU])
2741 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2742 + v[3] * (row_tot[i] * Dc
2743 + col_tot[j] * Dr));
2744 btau_cum += fij * temp * temp;
2748 const double temp = Cij - Dij;
2749 ctau_cum += fij * temp * temp;
2752 if (cmd.a_statistics[CRS_ST_GAMMA])
2754 const double temp = Q * Cij - P * Dij;
2755 gamma_cum += fij * temp * temp;
2758 if (cmd.a_statistics[CRS_ST_D])
2760 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2761 - (P - Q) * (W - row_tot[i]));
2762 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2763 - (Q - P) * (W - col_tot[j]));
2768 assert (j < n_cols);
2770 Cij -= col_tot[j] - cum[j + i * n_cols];
2771 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2775 Cij += cum[j - 1 + (i - 1) * n_cols];
2776 Dij -= cum[j + (i - 1) * n_cols];
2782 btau_var = ((btau_cum
2783 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2785 if (cmd.a_statistics[CRS_ST_BTAU])
2787 ase[3] = sqrt (btau_var);
2788 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2791 if (cmd.a_statistics[CRS_ST_CTAU])
2793 ase[4] = ((2 * q / ((q - 1) * W * W))
2794 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2795 t[4] = v[4] / ase[4];
2797 if (cmd.a_statistics[CRS_ST_GAMMA])
2799 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2800 t[5] = v[5] / (2. / (P + Q)
2801 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2803 if (cmd.a_statistics[CRS_ST_D])
2805 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2806 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2807 somers_d_t[0] = (somers_d_v[0]
2809 * sqrt (ctau_cum - sqr (P - Q) / W)));
2810 somers_d_v[1] = (P - Q) / Dc;
2811 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2812 somers_d_t[1] = (somers_d_v[1]
2814 * sqrt (ctau_cum - sqr (P - Q) / W)));
2815 somers_d_v[2] = (P - Q) / Dr;
2816 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2817 somers_d_t[2] = (somers_d_v[2]
2819 * sqrt (ctau_cum - sqr (P - Q) / W)));
2825 /* Spearman correlation, Pearson's r. */
2826 if (cmd.a_statistics[CRS_ST_CORR])
2828 double *R = local_alloc (sizeof *R * n_rows);
2829 double *C = local_alloc (sizeof *C * n_cols);
2832 double y, t, c = 0., s = 0.;
2837 R[i] = s + (row_tot[i] + 1.) / 2.;
2844 assert (i < n_rows);
2849 double y, t, c = 0., s = 0.;
2854 C[j] = s + (col_tot[j] + 1.) / 2;
2861 assert (j < n_cols);
2865 calc_r (R, C, &v[6], &t[6], &ase[6]);
2871 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2875 /* Cohen's kappa. */
2876 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2878 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2881 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2882 i < ns_rows; i++, j++)
2886 while (col_tot[j] == 0.)
2889 prod = row_tot[i] * col_tot[j];
2890 sum = row_tot[i] + col_tot[j];
2892 sum_fii += mat[j + i * n_cols];
2894 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2895 sum_riciri_ci += prod * sum;
2897 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2898 for (j = 0; j < ns_cols; j++)
2900 double sum = row_tot[i] + col_tot[j];
2901 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2904 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2906 ase[8] = sqrt ((W * W * sum_rici
2907 + sum_rici * sum_rici
2908 - W * sum_riciri_ci)
2909 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2911 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2912 / sqr (W * W - sum_rici))
2913 + ((2. * (W - sum_fii)
2914 * (2. * sum_fii * sum_rici
2915 - W * sum_fiiri_ci))
2916 / cube (W * W - sum_rici))
2917 + (sqr (W - sum_fii)
2918 * (W * sum_fijri_ci2 - 4.
2919 * sum_rici * sum_rici)
2920 / hypercube (W * W - sum_rici))));
2922 t[8] = v[8] / ase[8];
2929 /* Calculate risk estimate. */
2931 calc_risk (double *value, double *upper, double *lower, union value *c)
2933 double f11, f12, f21, f22;
2939 for (i = 0; i < 3; i++)
2940 value[i] = upper[i] = lower[i] = SYSMIS;
2943 if (ns_rows != 2 || ns_cols != 2)
2950 for (i = j = 0; i < n_cols; i++)
2951 if (col_tot[i] != 0.)
2960 f11 = mat[nz_cols[0]];
2961 f12 = mat[nz_cols[1]];
2962 f21 = mat[nz_cols[0] + n_cols];
2963 f22 = mat[nz_cols[1] + n_cols];
2965 c[0] = cols[nz_cols[0]];
2966 c[1] = cols[nz_cols[1]];
2969 value[0] = (f11 * f22) / (f12 * f21);
2970 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2971 lower[0] = value[0] * exp (-1.960 * v);
2972 upper[0] = value[0] * exp (1.960 * v);
2974 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2975 v = sqrt ((f12 / (f11 * (f11 + f12)))
2976 + (f22 / (f21 * (f21 + f22))));
2977 lower[1] = value[1] * exp (-1.960 * v);
2978 upper[1] = value[1] * exp (1.960 * v);
2980 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2981 v = sqrt ((f11 / (f12 * (f11 + f12)))
2982 + (f21 / (f22 * (f21 + f22))));
2983 lower[2] = value[2] * exp (-1.960 * v);
2984 upper[2] = value[2] * exp (1.960 * v);
2989 /* Calculate directional measures. */
2991 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2992 double t[N_DIRECTIONAL])
2997 for (i = 0; i < N_DIRECTIONAL; i++)
2998 v[i] = ase[i] = t[i] = SYSMIS;
3002 if (cmd.a_statistics[CRS_ST_LAMBDA])
3004 double *fim = xmalloc (sizeof *fim * n_rows);
3005 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
3006 double *fmj = xmalloc (sizeof *fmj * n_cols);
3007 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
3008 double sum_fim, sum_fmj;
3010 int rm_index, cm_index;
3013 /* Find maximum for each row and their sum. */
3014 for (sum_fim = 0., i = 0; i < n_rows; i++)
3016 double max = mat[i * n_cols];
3019 for (j = 1; j < n_cols; j++)
3020 if (mat[j + i * n_cols] > max)
3022 max = mat[j + i * n_cols];
3026 sum_fim += fim[i] = max;
3027 fim_index[i] = index;
3030 /* Find maximum for each column. */
3031 for (sum_fmj = 0., j = 0; j < n_cols; j++)
3033 double max = mat[j];
3036 for (i = 1; i < n_rows; i++)
3037 if (mat[j + i * n_cols] > max)
3039 max = mat[j + i * n_cols];
3043 sum_fmj += fmj[j] = max;
3044 fmj_index[j] = index;
3047 /* Find maximum row total. */
3050 for (i = 1; i < n_rows; i++)
3051 if (row_tot[i] > rm)
3057 /* Find maximum column total. */
3060 for (j = 1; j < n_cols; j++)
3061 if (col_tot[j] > cm)
3067 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3068 v[1] = (sum_fmj - rm) / (W - rm);
3069 v[2] = (sum_fim - cm) / (W - cm);
3071 /* ASE1 for Y given X. */
3075 for (accum = 0., i = 0; i < n_rows; i++)
3076 for (j = 0; j < n_cols; j++)
3078 const int deltaj = j == cm_index;
3079 accum += (mat[j + i * n_cols]
3080 * sqr ((j == fim_index[i])
3085 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3088 /* ASE0 for Y given X. */
3092 for (accum = 0., i = 0; i < n_rows; i++)
3093 if (cm_index != fim_index[i])
3094 accum += (mat[i * n_cols + fim_index[i]]
3095 + mat[i * n_cols + cm_index]);
3096 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3099 /* ASE1 for X given Y. */
3103 for (accum = 0., i = 0; i < n_rows; i++)
3104 for (j = 0; j < n_cols; j++)
3106 const int deltaj = i == rm_index;
3107 accum += (mat[j + i * n_cols]
3108 * sqr ((i == fmj_index[j])
3113 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3116 /* ASE0 for X given Y. */
3120 for (accum = 0., j = 0; j < n_cols; j++)
3121 if (rm_index != fmj_index[j])
3122 accum += (mat[j + n_cols * fmj_index[j]]
3123 + mat[j + n_cols * rm_index]);
3124 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3127 /* Symmetric ASE0 and ASE1. */
3132 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3133 for (j = 0; j < n_cols; j++)
3135 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3136 int temp1 = (i == rm_index) + (j == cm_index);
3137 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3138 accum1 += (mat[j + i * n_cols]
3139 * sqr (temp0 + (v[0] - 1.) * temp1));
3141 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3142 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3143 / (2. * W - rm - cm));
3152 double sum_fij2_ri, sum_fij2_ci;
3153 double sum_ri2, sum_cj2;
3155 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3156 for (j = 0; j < n_cols; j++)
3158 double temp = sqr (mat[j + i * n_cols]);
3159 sum_fij2_ri += temp / row_tot[i];
3160 sum_fij2_ci += temp / col_tot[j];
3163 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3164 sum_ri2 += row_tot[i] * row_tot[i];
3166 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3167 sum_cj2 += col_tot[j] * col_tot[j];
3169 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3170 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3174 if (cmd.a_statistics[CRS_ST_UC])
3176 double UX, UY, UXY, P;
3177 double ase1_yx, ase1_xy, ase1_sym;
3180 for (UX = 0., i = 0; i < n_rows; i++)
3181 if (row_tot[i] > 0.)
3182 UX -= row_tot[i] / W * log (row_tot[i] / W);
3184 for (UY = 0., j = 0; j < n_cols; j++)
3185 if (col_tot[j] > 0.)
3186 UY -= col_tot[j] / W * log (col_tot[j] / W);
3188 for (UXY = P = 0., i = 0; i < n_rows; i++)
3189 for (j = 0; j < n_cols; j++)
3191 double entry = mat[j + i * n_cols];
3196 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3197 UXY -= entry / W * log (entry / W);
3200 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3201 for (j = 0; j < n_cols; j++)
3203 double entry = mat[j + i * n_cols];
3208 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3209 + (UX - UXY) * log (col_tot[j] / W));
3210 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3211 + (UY - UXY) * log (row_tot[i] / W));
3212 ase1_sym += entry * sqr ((UXY
3213 * log (row_tot[i] * col_tot[j] / (W * W)))
3214 - (UX + UY) * log (entry / W));
3217 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3218 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3219 t[5] = v[5] / ((2. / (W * (UX + UY)))
3220 * sqrt (P - sqr (UX + UY - UXY) / W));
3222 v[6] = (UX + UY - UXY) / UX;
3223 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3224 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3226 v[7] = (UX + UY - UXY) / UY;
3227 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3228 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3232 if (cmd.a_statistics[CRS_ST_D])
3237 calc_symmetric (NULL, NULL, NULL);
3238 for (i = 0; i < 3; i++)
3240 v[8 + i] = somers_d_v[i];
3241 ase[8 + i] = somers_d_ase[i];
3242 t[8 + i] = somers_d_t[i];
3247 if (cmd.a_statistics[CRS_ST_ETA])
3250 double sum_Xr, sum_X2r;
3254 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3256 sum_Xr += rows[i].f * row_tot[i];
3257 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3259 SX = sum_X2r - sum_Xr * sum_Xr / W;
3261 for (SXW = 0., j = 0; j < n_cols; j++)
3265 for (cum = 0., i = 0; i < n_rows; i++)
3267 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3268 cum += rows[i].f * mat[j + i * n_cols];
3271 SXW -= cum * cum / col_tot[j];
3273 v[11] = sqrt (1. - SXW / SX);
3277 double sum_Yc, sum_Y2c;
3281 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3283 sum_Yc += cols[i].f * col_tot[i];
3284 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3286 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3288 for (SYW = 0., i = 0; i < n_rows; i++)
3292 for (cum = 0., j = 0; j < n_cols; j++)
3294 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3295 cum += cols[j].f * mat[j + i * n_cols];
3298 SYW -= cum * cum / row_tot[i];
3300 v[12] = sqrt (1. - SYW / SY);