1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
32 /* AIX requires this to be the first thing in the file. */
35 #define alloca __builtin_alloca
43 #ifndef alloca /* predefined by HP cc +Olibcalls */
58 #include "dcdflib/cdflib.h"
71 /*#define DEBUGGING 1*/
72 #include "debug-print.h"
78 +missing=miss:!table/include/report;
79 +write[wr_]=none,cells,all;
80 +format=fmt:!labels/nolabels/novallabs,
83 tabl:!tables/notables,
86 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
88 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
94 /* Number of chi-square statistics. */
97 /* Number of symmetric statistics. */
100 /* Number of directional statistics. */
101 #define N_DIRECTIONAL 13
103 /* A single table entry for general mode. */
106 int table; /* Flattened table number. */
109 double freq; /* Frequency count. */
110 double *data; /* Crosstabulation table for integer mode. */
113 union value v[1]; /* Values. */
116 /* A crosstabulation. */
119 int nvar; /* Number of variables. */
120 double missing; /* Missing cases count. */
121 int ofs; /* Integer mode: Offset into sorted_tab[]. */
122 struct variable *v[2]; /* At least two variables; sorted by
123 larger indices first. */
126 /* Indexes into crosstab.v. */
133 /* General mode crosstabulation table. */
134 static struct hsh_table *gen_tab; /* Hash table. */
135 static int n_sorted_tab; /* Number of entries in sorted_tab. */
136 static struct table_entry **sorted_tab; /* Sorted table. */
138 /* VARIABLES dictionary. */
139 static struct dictionary *var_dict;
142 static struct crosstab **xtab;
145 /* Integer or general mode? */
154 static int num_cells; /* Number of cells requested. */
155 static int cells[8]; /* Cells requested. */
156 static int expected; /* Nonzero if expected value is needed. */
159 static int write; /* One of WR_* that specifies the WRITE style. */
161 /* Command parsing info. */
162 static struct cmd_crosstabs cmd;
165 static struct pool *pl_tc; /* For table cells. */
166 static struct pool *pl_col; /* For column data. */
168 static int internal_cmd_crosstabs (void);
169 static void free_var_dict (void);
170 static void precalc (void);
171 static int calc_general (struct ccase *);
172 static int calc_integer (struct ccase *);
173 static void postcalc (void);
174 static void submit (struct tab_table *);
177 static void debug_print (void);
178 static void print_table_entries (struct table_entry **tab);
181 /* Parse and execute CROSSTABS, then clean up. */
185 int result = internal_cmd_crosstabs ();
188 pool_destroy (pl_tc);
189 pool_destroy (pl_col);
194 /* Parses and executes the CROSSTABS procedure. */
196 internal_cmd_crosstabs (void)
201 pl_tc = pool_create ();
202 pl_col = pool_create ();
204 lex_match_id ("CROSSTABS");
205 if (!parse_crosstabs (&cmd))
209 /* Needs var_dict. */
213 mode = var_dict ? INTEGER : GENERAL;
220 cmd.a_cells[CRS_CL_COUNT] = 1;
228 for (i = 0; i < CRS_CL_count; i++)
233 cmd.a_cells[CRS_CL_COUNT] = 1;
234 cmd.a_cells[CRS_CL_ROW] = 1;
235 cmd.a_cells[CRS_CL_COLUMN] = 1;
236 cmd.a_cells[CRS_CL_TOTAL] = 1;
238 if (cmd.a_cells[CRS_CL_ALL])
240 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_ALL] = 0;
244 cmd.a_cells[CRS_CL_NONE] = 0;
245 for (num_cells = i = 0; i < CRS_CL_count; i++)
248 if (i >= CRS_CL_EXPECTED)
250 cmd.a_cells[num_cells++] = i;
255 if (cmd.sbc_statistics)
260 for (i = 0; i < CRS_ST_count; i++)
261 if (cmd.a_statistics[i])
264 cmd.a_statistics[CRS_ST_CHISQ] = 1;
265 if (cmd.a_statistics[CRS_ST_ALL])
266 for (i = 0; i < CRS_ST_count; i++)
267 cmd.a_statistics[i] = 1;
271 if (cmd.miss == CRS_REPORT && mode == GENERAL)
273 msg (SE, _("Missing mode REPORT not allowed in general mode. "
274 "Assuming MISSING=TABLE."));
275 cmd.miss = CRS_TABLE;
279 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
280 cmd.a_write[CRS_WR_ALL] = 0;
281 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
283 msg (SE, _("Write mode ALL not allowed in general mode. "
284 "Assuming WRITE=CELLS."));
285 cmd.a_write[CRS_WR_CELLS] = 1;
288 && (cmd.a_write[CRS_WR_NONE]
289 + cmd.a_write[CRS_WR_ALL]
290 + cmd.a_write[CRS_WR_CELLS] == 0))
291 cmd.a_write[CRS_WR_CELLS] = 1;
292 if (cmd.a_write[CRS_WR_CELLS])
293 write = CRS_WR_CELLS;
294 else if (cmd.a_write[CRS_WR_ALL])
299 update_weighting (&default_dict);
300 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
305 /* Frees var_dict once it's no longer needed. */
315 if (var_dict->var_by_name)
317 avl_destroy (var_dict->var_by_name, NULL);
318 var_dict->var_by_name = NULL;
321 for (i = 0; i < var_dict->nvar; i++)
322 free (var_dict->var[i]);
323 free (var_dict->var);
324 var_dict->var = NULL;
327 free_dictionary (var_dict);
333 /* Parses the TABLES subcommand. */
335 crs_custom_tables (struct cmd_crosstabs *cmd unused)
337 struct dictionary *dict;
339 struct variable ***by = NULL;
344 /* Ensure that this is a TABLES subcommand. */
345 if (!lex_match_id ("TABLES")
346 && (token != T_ID || !is_varname (tokid))
351 dict = var_dict ? var_dict : &default_dict;
355 by = xrealloc (by, sizeof *by * (n_by + 1));
356 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
357 if (!parse_variables (dict, &by[n_by], &by_nvar[n_by],
358 PV_NO_DUPLICATE | PV_NO_SCRATCH))
363 if (!lex_match (T_BY))
367 lex_error (_("expecting BY"));
376 int *by_iter = xcalloc (sizeof *by_iter * n_by);
379 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
380 for (i = 0; i < nx; i++)
384 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
391 if (var_dict == NULL)
392 for (i = 0; i < n_by; i++)
393 x->v[i] = by[i][by_iter[i]];
395 for (i = 0; i < n_by; i++)
396 x->v[i] = default_dict.var[by[i][by_iter[i]]->foo];
402 for (i = n_by - 1; i >= 0; i--)
404 if (++by_iter[i] < by_nvar[i])
416 /* Despite the name, we come here whether we're successful or
422 for (i = 0; i < n_by; i++)
431 /* Parses the VARIABLES subcommand. */
433 crs_custom_variables (struct cmd_crosstabs *cmd unused)
435 struct variable **v = NULL;
440 msg (SE, _("VARIABLES must be specified before TABLES."));
453 if (!parse_variables (&default_dict, &v, &nv,
454 (PV_APPEND | PV_NUMERIC
455 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
460 lex_error ("expecting `('");
465 if (!lex_force_int ())
467 min = lex_integer ();
472 if (!lex_force_int ())
474 max = lex_integer ();
477 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
485 lex_error ("expecting `)'");
490 for (i = orig_nv; i < nv; i++)
492 v[i]->p.crs.min = min;
493 v[i]->p.crs.max = max + 1.;
494 v[i]->p.crs.count = max - min + 1;
504 var_dict = new_dictionary (0);
505 var_dict->var = xmalloc (sizeof *var_dict->var * nv);
507 for (i = 0; i < nv; i++)
509 struct variable *var = xmalloc (offsetof (struct variable, width));
510 strcpy (var->name, v[i]->name);
512 var->type = v[i]->type;
513 var->foo = v[i]->index;
514 var_dict->var[i] = var;
515 avl_force_insert (var_dict->var_by_name, var);
531 printf ("CROSSTABS\n");
537 printf ("\t/VARIABLES=");
538 for (i = 0; i < var_dict->nvar; i++)
540 struct variable *v = var_dict->var[i];
541 struct variable *iv = default_dict.var[v->foo];
543 printf ("%s ", v->name);
544 if (i < var_dict->nvar - 1)
546 struct variable *nv = var_dict->var[i + 1];
547 struct variable *niv = default_dict.var[nv->foo];
549 if (iv->p.crs.min == niv->p.crs.min
550 && iv->p.crs.max == niv->p.crs.max)
553 printf ("(%d,%d) ", iv->p.crs.min, iv->p.crs.max - 1);
561 printf ("\t/TABLES=");
562 for (i = 0; i < nxtab; i++)
564 struct crosstab *x = xtab[i];
569 for (j = 0; j < x->nvar; j++)
573 printf ("%s", x->v[j]->name);
579 #endif /* DEBUGGING */
581 /* Data file processing. */
583 static int compare_table_entry (const void *, const void *, void *);
584 static unsigned hash_table_entry (const void *, void *);
586 /* Set up the crosstabulation tables for processing. */
592 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
602 for (i = 0; i < nxtab; i++)
604 struct crosstab *x = xtab[i];
609 x->ofs = n_sorted_tab;
611 for (j = 2; j < x->nvar; j++)
612 count *= x->v[j - 2]->p.crs.count;
614 sorted_tab = xrealloc (sorted_tab,
615 sizeof *sorted_tab * (n_sorted_tab + count));
616 v = local_alloc (sizeof *v * x->nvar);
617 for (j = 2; j < x->nvar; j++)
618 v[j] = x->v[j]->p.crs.min;
619 for (j = 0; j < count; j++)
621 struct table_entry *te;
624 te = sorted_tab[n_sorted_tab++]
625 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
629 const int mat_size = (x->v[0]->p.crs.count
630 * x->v[1]->p.crs.count);
633 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
634 for (m = 0; m < mat_size; m++)
638 for (k = 2; k < x->nvar; k++)
640 for (k = 2; k < x->nvar; k++)
641 if (++v[k] >= x->v[k]->p.crs.max)
642 v[k] = x->v[k]->p.crs.min;
649 sorted_tab = xrealloc (sorted_tab,
650 sizeof *sorted_tab * (n_sorted_tab + 1));
651 sorted_tab[n_sorted_tab] = NULL;
655 /* Form crosstabulations for general mode. */
657 calc_general (struct ccase *c)
660 double w = (default_dict.weight_index != -1
661 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
664 /* Flattened current table index. */
667 for (t = 0; t < nxtab; t++)
669 struct crosstab *x = xtab[t];
670 const size_t entry_size = (sizeof (struct table_entry)
671 + sizeof (union value) * (x->nvar - 1));
672 struct table_entry *te = local_alloc (entry_size);
674 /* Construct table entry for the current record and table. */
680 for (j = 0; j < x->nvar; j++)
682 if ((cmd.miss == CRS_TABLE
683 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
684 || (cmd.miss == CRS_INCLUDE
685 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
691 if (x->v[j]->type == NUMERIC)
692 te->v[j].f = c->data[x->v[j]->fv].f;
695 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
697 /* Necessary in order to simplify comparisons. */
698 memset (&te->v[j].s[x->v[j]->width], 0,
699 sizeof (union value) - x->v[j]->width);
704 /* Add record to hash table. */
706 struct table_entry **tepp = (struct table_entry **) hsh_probe (gen_tab, te);
709 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
712 memcpy (tep, te, entry_size);
717 (*tepp)->u.freq += w;
728 calc_integer (struct ccase *c)
731 double w = (default_dict.weight_index != -1
732 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
735 /* Flattened current table index. */
738 for (t = 0; t < nxtab; t++)
740 struct crosstab *x = xtab[t];
745 for (i = 0; i < x->nvar; i++)
747 struct variable *const v = x->v[i];
748 double value = c->data[v->fv].f;
750 /* Note that the first test also rules out SYSMIS. */
751 if ((value < v->p.crs.min || value >= v->p.crs.max)
752 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
760 ofs += fact * ((int) value - v->p.crs.min);
761 fact *= v->p.crs.count;
766 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
767 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
768 const int col_dim = x->v[COL_VAR]->p.crs.count;
770 sorted_tab[ofs]->u.data[col + row * col_dim] += w;
780 /* Print out all table entries in NULL-terminated TAB for use by a
781 debugger (a person, not a program). */
783 print_table_entries (struct table_entry **tab)
785 printf ("raw crosstabulation data:\n");
788 const struct crosstab *x = xtab[(*tab)->table];
791 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
792 for (i = 0; i < x->nvar; i++)
796 printf ("%s:", x->v[i]->name);
798 if (x->v[i]->type == NUMERIC)
799 printf ("%g", (*tab)->v[i].f);
801 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
809 /* Compare the table_entry's at PA and PB and return a strcmp()-type
812 compare_table_entry (const void *pa, const void *pb, void *foo unused)
814 const struct table_entry *a = pa;
815 const struct table_entry *b = pb;
818 const int difftable = a->table - b->table;
824 const struct crosstab *x = xtab[a->table];
827 for (i = x->nvar - 1; i >= 0; i--)
828 if (x->v[i]->type == NUMERIC)
830 const double diffnum = a->v[i].f - b->v[i].f;
833 else if (diffnum > 0)
838 assert (x->v[i]->type == ALPHA);
840 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
850 /* Calculate a hash value from table_entry PA. */
852 hash_table_entry (const void *pa, void *foo unused)
854 const struct table_entry *a = pa;
855 unsigned long hash = a->table;
858 /* Hash formula from _SPSS Statistical Algorithms_. */
859 for (i = 0; i < xtab[a->table]->nvar; i++)
861 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
862 hash ^= a->v[i].hash[0];
863 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
864 hash ^= a->v[i].hash[1];
871 /* Post-data reading calculations. */
873 static struct table_entry **find_pivot_extent (struct table_entry **, int *cnt, int pivot);
874 static void enum_var_values (struct table_entry **beg, int cnt,
875 union value **values, int *nvalues,
877 static void output_pivot_table (struct table_entry **, struct table_entry **,
878 double **, double **, double **,
879 int *, int *, int *);
880 static void make_summary_table (void);
887 n_sorted_tab = hsh_count (gen_tab);
888 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
890 print_table_entries (sorted_tab);
894 make_summary_table ();
896 /* Identify all the individual crosstabulation tables, and deal with
899 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
900 int pc = n_sorted_tab; /* Pivot count. */
902 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
903 int maxrows = 0, maxcols = 0, maxcells = 0;
907 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
911 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
912 &maxrows, &maxcols, &maxcells);
921 hsh_destroy (gen_tab);
924 static void insert_summary (struct tab_table *, int tab_index, double valid);
926 /* Output a table summarizing the cases processed. */
928 make_summary_table (void)
930 struct tab_table *summary;
932 struct table_entry **pb = sorted_tab, **pe;
933 int pc = n_sorted_tab;
936 summary = tab_create (7, 3 + nxtab, 1);
937 tab_title (summary, 0, _("Summary."));
938 tab_headers (summary, 1, 0, 3, 0);
939 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
940 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
941 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
942 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
943 tab_hline (summary, TAL_1, 1, 6, 1);
944 tab_hline (summary, TAL_1, 1, 6, 2);
945 tab_vline (summary, TAL_1, 3, 1, 1);
946 tab_vline (summary, TAL_1, 5, 1, 1);
950 for (i = 0; i < 3; i++)
952 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
953 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
956 tab_offset (summary, 0, 3);
962 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
966 while (cur_tab < (*pb)->table)
967 insert_summary (summary, cur_tab++, 0.);
970 for (valid = 0.; pb < pe; pb++)
971 valid += (*pb)->u.freq;
974 const struct crosstab *const x = xtab[(*pb)->table];
975 const int n_cols = x->v[COL_VAR]->p.crs.count;
976 const int n_rows = x->v[ROW_VAR]->p.crs.count;
977 const int count = n_cols * n_rows;
979 for (valid = 0.; pb < pe; pb++)
981 const double *data = (*pb)->u.data;
984 for (i = 0; i < count; i++)
988 insert_summary (summary, cur_tab++, valid);
993 while (cur_tab < nxtab)
994 insert_summary (summary, cur_tab++, 0.);
999 /* Inserts a line into T describing the crosstabulation at index
1000 TAB_INDEX, which has VALID valid observations. */
1002 insert_summary (struct tab_table *t, int tab_index, double valid)
1004 struct crosstab *x = xtab[tab_index];
1006 tab_hline (t, TAL_1, 0, 6, 0);
1008 /* Crosstabulation name. */
1010 char *buf = local_alloc (128 * x->nvar);
1014 for (i = 0; i < x->nvar; i++)
1017 cp = stpcpy (cp, " * ");
1019 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1021 tab_text (t, 0, 0, TAB_LEFT, buf);
1026 /* Counts and percentages. */
1036 for (i = 0; i < 3; i++)
1038 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
1039 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
1040 n[i] / n[2] * 100.);
1050 static struct tab_table *table; /* Crosstabulation table. */
1051 static struct tab_table *chisq; /* Chi-square table. */
1052 static struct tab_table *sym; /* Symmetric measures table. */
1053 static struct tab_table *risk; /* Risk estimate table. */
1054 static struct tab_table *direct; /* Directional measures table. */
1057 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
1059 /* Column values, number of columns. */
1060 static union value *cols;
1063 /* Row values, number of rows. */
1064 static union value *rows;
1067 /* Number of statistically interesting columns/rows (columns/rows with
1069 static int ns_cols, ns_rows;
1071 /* Crosstabulation. */
1072 static struct crosstab *x;
1074 /* Number of variables from the crosstabulation to consider. This is
1075 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1078 /* Matrix contents. */
1079 static double *mat; /* Matrix proper. */
1080 static double *row_tot; /* Row totals. */
1081 static double *col_tot; /* Column totals. */
1082 static double W; /* Grand total. */
1084 static void display_dimensions (struct tab_table *, int first_difference,
1085 struct table_entry *);
1086 static void display_crosstabulation (void);
1087 static void display_chisq (void);
1088 static void display_symmetric (void);
1089 static void display_risk (void);
1090 static void display_directional (void);
1091 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1092 static void table_value_missing (struct tab_table *table, int c, int r,
1093 unsigned char opt, const union value *v,
1094 const struct variable *var);
1095 static void delete_missing (void);
1097 /* Output pivot table beginning at PB and continuing until PE,
1098 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1099 hold *MAXROWS entries. */
1101 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1102 double **matp, double **row_totp, double **col_totp,
1103 int *maxrows, int *maxcols, int *maxcells)
1106 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1107 int tc = pe - pb; /* Table count. */
1109 /* Table entry for header comparison. */
1110 struct table_entry *cmp;
1112 x = xtab[(*pb)->table];
1113 enum_var_values (pb, pe - pb, &cols, &n_cols, COL_VAR);
1115 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1117 /* Crosstabulation table initialization. */
1120 table = tab_create (nvar + n_cols,
1121 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1122 tab_headers (table, nvar - 1, 0, 2, 0);
1124 /* First header line. */
1125 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1126 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1128 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1130 /* Second header line. */
1134 for (i = 2; i < nvar; i++)
1135 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1136 TAB_RIGHT | TAT_TITLE,
1137 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1138 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1139 x->v[ROW_VAR]->name);
1140 for (i = 0; i < n_cols; i++)
1141 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1143 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1146 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1147 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1151 char *title = local_alloc (x->nvar * 64 + 128);
1155 if (cmd.pivot == CRS_PIVOT)
1156 for (i = 0; i < nvar; i++)
1159 cp = stpcpy (cp, " by ");
1160 cp = stpcpy (cp, x->v[i]->name);
1164 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1165 for (i = 2; i < nvar; i++)
1167 char buf[64], *bufp;
1172 cp = stpcpy (cp, x->v[i]->name);
1174 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1175 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1177 cp = stpcpy (cp, bufp);
1181 cp = stpcpy (cp, " [");
1182 for (i = 0; i < num_cells; i++)
1190 static const struct tuple cell_names[] =
1192 {CRS_CL_COUNT, N_("count")},
1193 {CRS_CL_ROW, N_("row %")},
1194 {CRS_CL_COLUMN, N_("column %")},
1195 {CRS_CL_TOTAL, N_("total %")},
1196 {CRS_CL_EXPECTED, N_("expected")},
1197 {CRS_CL_RESIDUAL, N_("residual")},
1198 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1199 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1203 const struct tuple *t;
1205 for (t = cell_names; t->value != cells[i]; t++)
1206 assert (t->value != -1);
1208 cp = stpcpy (cp, ", ");
1209 cp = stpcpy (cp, gettext (t->name));
1213 tab_title (table, 0, title);
1217 tab_offset (table, 0, 2);
1222 /* Chi-square table initialization. */
1223 if (cmd.a_statistics[CRS_ST_CHISQ])
1225 chisq = tab_create (6 + (nvar - 2),
1226 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1227 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1229 tab_title (chisq, 0, "Chi-square tests.");
1231 tab_offset (chisq, nvar - 2, 0);
1232 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1233 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1234 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1235 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1236 _("Asymp. Sig. (2-sided)"));
1237 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1238 _("Exact. Sig. (2-sided)"));
1239 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1240 _("Exact. Sig. (1-sided)"));
1242 tab_offset (chisq, 0, 1);
1247 /* Symmetric measures. */
1248 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1249 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1250 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1251 || cmd.a_statistics[CRS_ST_KAPPA])
1253 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1254 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1255 tab_title (sym, 0, "Symmetric measures.");
1257 tab_offset (sym, nvar - 2, 0);
1258 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1259 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1260 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1261 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1262 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1263 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1264 tab_offset (sym, 0, 1);
1269 /* Risk estimate. */
1270 if (cmd.a_statistics[CRS_ST_RISK])
1272 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1273 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1274 tab_title (risk, 0, "Risk estimate.");
1276 tab_offset (risk, nvar - 2, 0);
1277 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1278 _(" 95%% Confidence Interval"));
1279 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1280 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1281 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1282 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1283 tab_hline (risk, TAL_1, 2, 3, 1);
1284 tab_vline (risk, TAL_1, 2, 0, 1);
1285 tab_offset (risk, 0, 2);
1290 /* Directional measures. */
1291 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1292 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1294 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1295 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1296 tab_title (direct, 0, "Directional measures.");
1298 tab_offset (direct, nvar - 2, 0);
1299 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1300 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1301 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1302 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1303 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1304 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1305 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1306 tab_offset (direct, 0, 1);
1313 /* Find pivot subtable if applicable. */
1314 te = find_pivot_extent (tb, &tc, 0);
1318 /* Find all the row variable values. */
1319 enum_var_values (tb, te - tb, &rows, &n_rows, ROW_VAR);
1321 /* Allocate memory space for the column and row totals. */
1322 if (n_rows > *maxrows)
1324 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1325 row_tot = *row_totp;
1328 if (n_cols > *maxcols)
1330 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1331 col_tot = *col_totp;
1335 /* Allocate table space for the matrix. */
1336 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1337 tab_realloc (table, -1,
1338 max (tab_nr (table) + (n_rows + 1) * num_cells,
1339 tab_nr (table) * (pe - pb) / (te - tb)));
1341 if (mode == GENERAL)
1343 /* Allocate memory space for the matrix. */
1344 if (n_cols * n_rows > *maxcells)
1346 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1347 *maxcells = n_cols * n_rows;
1352 /* Build the matrix and calculate column totals. */
1354 union value *cur_col = cols;
1355 union value *cur_row = rows;
1357 double *cp = col_tot;
1358 struct table_entry **p;
1361 for (p = &tb[0]; p < te; p++)
1363 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1367 for (; cur_row < &rows[n_rows]; cur_row++)
1373 mp = &mat[cur_col - cols];
1376 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1383 *cp += *mp = (*p)->u.freq;
1388 /* Zero out the rest of the matrix. */
1389 for (; cur_row < &rows[n_rows]; cur_row++)
1395 if (cur_col < &cols[n_cols])
1397 const int rem_cols = n_cols - (cur_col - cols);
1400 for (c = 0; c < rem_cols; c++)
1402 mp = &mat[cur_col - cols];
1403 for (r = 0; r < n_rows; r++)
1405 for (c = 0; c < rem_cols; c++)
1407 mp += n_cols - rem_cols;
1415 double *tp = col_tot;
1417 assert (mode == INTEGER);
1418 mat = (*tb)->u.data;
1421 /* Calculate column totals. */
1422 for (c = 0; c < n_cols; c++)
1425 double *cp = &mat[c];
1427 for (r = 0; r < n_rows; r++)
1428 cum += cp[r * n_cols];
1436 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1437 ns_cols += *cp != 0.;
1440 /* Calculate row totals. */
1443 double *rp = row_tot;
1446 for (ns_rows = 0, r = n_rows; r--; )
1449 for (c = n_cols; c--; )
1457 /* Calculate grand total. */
1463 if (n_rows < n_cols)
1464 tp = row_tot, n = n_rows;
1466 tp = col_tot, n = n_cols;
1473 /* Print the matrix. */
1477 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1478 for (i = 2; i < nvar; i++)
1479 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1482 for (c = 0; c < n_cols; c++)
1483 printf ("%4g", cols[c].f);
1485 for (r = 0; r < n_rows; r++)
1487 printf ("%4g:", rows[r].f);
1488 for (c = 0; c < n_cols; c++)
1489 printf ("%4g", mat[c + r * n_cols]);
1490 printf ("%4g", row_tot[r]);
1494 for (c = 0; c < n_cols; c++)
1495 printf ("%4g", col_tot[c]);
1501 /* Find the first variable that differs from the last subtable,
1502 then display the values of the dimensioning variables for
1503 each table that needs it. */
1505 int first_difference = nvar - 1;
1508 for (; ; first_difference--)
1510 assert (first_difference >= 2);
1511 if (memcmp (&cmp->v[first_difference],
1512 &(*tb)->v[first_difference], sizeof *cmp->v))
1518 display_dimensions (table, first_difference, *tb);
1520 display_dimensions (chisq, first_difference, *tb);
1522 display_dimensions (sym, first_difference, *tb);
1524 display_dimensions (risk, first_difference, *tb);
1526 display_dimensions (direct, first_difference, *tb);
1530 display_crosstabulation ();
1531 if (cmd.miss == CRS_REPORT)
1536 display_symmetric ();
1540 display_directional ();
1551 tab_resize (chisq, 4 + (nvar - 2), -1);
1562 /* Delete missing rows and columns for statistical analysis when
1565 delete_missing (void)
1570 for (r = 0; r < n_rows; r++)
1571 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1575 for (c = 0; c < n_cols; c++)
1576 mat[c + r * n_cols] = 0.;
1584 for (c = 0; c < n_cols; c++)
1585 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1589 for (r = 0; r < n_rows; r++)
1590 mat[c + r * n_cols] = 0.;
1596 /* Prepare table T for submission, and submit it. */
1598 submit (struct tab_table *t)
1605 tab_resize (t, -1, 0);
1606 if (tab_nr (t) == tab_t (t))
1611 tab_offset (t, 0, 0);
1613 for (i = 2; i < nvar; i++)
1614 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1615 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1616 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1617 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1619 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1621 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1622 tab_dim (t, crosstabs_dim);
1626 /* Sets the widths of all the columns and heights of all the rows in
1627 table T for driver D. */
1629 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1633 /* Width of a numerical column. */
1634 int c = outp_string_width (d, "0.000000");
1635 if (cmd.miss == CRS_REPORT)
1636 c += outp_string_width (d, "M");
1638 /* Set width for header columns. */
1641 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1643 if (w < d->prop_em_width * 8)
1644 w = d->prop_em_width * 8;
1646 if (w > d->prop_em_width * 15)
1647 w = d->prop_em_width * 15;
1649 for (i = 0; i < t->l; i++)
1653 for (i = t->l; i < t->nc; i++)
1656 for (i = 0; i < t->nr; i++)
1657 t->h[i] = tab_natural_height (t, d, i);
1660 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1661 int *cnt, int pivot);
1662 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1663 int *cnt, int pivot);
1665 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1667 static struct table_entry **
1668 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1670 return (mode == GENERAL
1671 ? find_pivot_extent_general (tp, cnt, pivot)
1672 : find_pivot_extent_integer (tp, cnt, pivot));
1675 /* Find the extent of a region in TP that contains one table. If
1676 PIVOT != 0 that means a set of table entries with identical table
1677 number; otherwise they also have to have the same values for every
1678 dimension after the row and column dimensions. The table that is
1679 searched starts at TP and has length CNT. Returns the first entry
1680 after the last one in the table; sets *CNT to the number of
1681 remaining values. If there are no entries in TP at all, returns
1682 NULL. A yucky interface, admittedly, but it works. */
1683 static struct table_entry **
1684 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1686 struct table_entry *fp = *tp;
1691 x = xtab[(*tp)->table];
1699 if ((*tp)->table != fp->table)
1704 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1711 /* Integer mode correspondent to find_pivot_extent_general(). This
1712 could be optimized somewhat, but I just don't give a crap about
1713 CROSSTABS performance in integer mode, which is just a wart on
1714 CROSSTABS' ass as far as I'm concerned.
1716 That said, feel free to send optimization patches to me. */
1717 static struct table_entry **
1718 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1720 struct table_entry *fp = *tp;
1725 x = xtab[(*tp)->table];
1733 if ((*tp)->table != fp->table)
1738 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1745 /* Compare value * A and B, where WIDTH is the string width or 0 for
1746 numerics, and return a strcmp()-type result. */
1748 compare_value (const void *pa, const void *pb, void *pwidth)
1750 const union value *a = pa;
1751 const union value *b = pb;
1752 const int width = (int) pwidth;
1755 return strncmp (a->s, b->s, width);
1757 return a->f < b->f ? -1 : (a->f > b->f ? 1 : 0);
1760 /* Given a list of CNT table_entry's starting at BEG, creates a list
1761 of *NVALUES values *VALUES of variable with index VAR_INDEX. */
1763 enum_var_values (struct table_entry **beg, int cnt, union value **values, int *nvalues,
1766 if (mode == GENERAL)
1770 tree = avl_create (pl_col, compare_value,
1771 (void *) (xtab[(*beg)->table]->v[var_index]->width));
1776 for (i = 0; i < cnt; i++)
1777 avl_insert (tree, &beg[i]->v[var_index]);
1778 *values = xmalloc (sizeof **values * avl_count (tree));
1787 while (NULL != (v = avl_traverse (tree, &trav)))
1788 (*values)[i++] = *v;
1793 pool_destroy (pl_col);
1794 pl_col = pool_create ();
1798 struct crosstab_proc *crs = &xtab[(*beg)->table]->v[var_index]->p.crs;
1801 assert (mode == INTEGER);
1802 *values = xmalloc (sizeof **values * crs->count);
1803 for (i = 0; i < crs->count; i++)
1804 (*values)[i].f = i + crs->min;
1805 *nvalues = crs->count;
1809 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1810 from V, displayed with print format spec from variable VAR. When
1811 in REPORT missing-value mode, missing values have an M appended. */
1813 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1814 const union value *v, const struct variable *var)
1816 struct len_string s;
1818 char *label = get_val_lab (var, *v, 0);
1821 tab_text (table, c, r, TAB_LEFT, label);
1825 s.length = var->print.w;
1826 s.string = tab_alloc (table, s.length + 1);
1827 data_out (s.string, &var->print, v);
1828 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1829 s.string[s.length++] = 'M';
1830 while (s.length && *s.string == ' ')
1835 tab_raw (table, c, r, opt, &s);
1838 /* Draws a line across TABLE at the current row to indicate the most
1839 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1840 that changed, and puts the values that changed into the table. TB
1841 and X must be the corresponding table_entry and crosstab,
1844 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1846 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1848 for (; first_difference >= 2; first_difference--)
1849 table_value_missing (table, nvar - first_difference - 1, 0,
1850 TAB_RIGHT, &tb->v[first_difference],
1851 x->v[first_difference]);
1854 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1856 float_M_suffix (struct tab_table *table, int c, int r, double v)
1858 static const struct fmt_spec f = {FMT_F, 8, 0};
1859 struct len_string s;
1862 s.string = tab_alloc (table, 9);
1864 data_out (s.string, &f, (union value *) &v);
1865 while (*s.string == ' ')
1870 tab_raw (table, c, r, TAB_RIGHT, &s);
1873 /* Displays the crosstabulation table. */
1875 display_crosstabulation (void)
1880 for (r = 0; r < n_rows; r++)
1881 table_value_missing (table, nvar - 2, r * num_cells,
1882 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1884 tab_text (table, nvar - 2, n_rows * num_cells,
1885 TAB_LEFT, _("Total"));
1887 /* Put in the actual cells. */
1892 tab_offset (table, nvar - 1, -1);
1893 for (r = 0; r < n_rows; r++)
1896 tab_hline (table, TAL_1, -1, n_cols, 0);
1897 for (c = 0; c < n_cols; c++)
1899 double expected_value;
1902 expected_value = row_tot[r] * col_tot[c] / W;
1903 for (i = 0; i < num_cells; i++)
1913 v = *mp / row_tot[r] * 100.;
1916 v = *mp / col_tot[c] * 100.;
1921 case CRS_CL_EXPECTED:
1924 case CRS_CL_RESIDUAL:
1925 v = *mp - expected_value;
1927 case CRS_CL_SRESIDUAL:
1928 v = (*mp - expected_value) / sqrt (expected_value);
1930 case CRS_CL_ASRESIDUAL:
1931 v = ((*mp - expected_value)
1932 / sqrt (expected_value
1933 * (1. - row_tot[r] / W)
1934 * (1. - col_tot[c] / W)));
1940 if (cmd.miss == CRS_REPORT
1941 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1942 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1943 float_M_suffix (table, c, i, v);
1945 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1951 tab_offset (table, -1, tab_row (table) + num_cells);
1959 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1960 for (r = 0; r < n_rows; r++)
1961 for (i = 0; i < num_cells; i++)
1974 v = row_tot[r] / W * 100.;
1977 v = row_tot[r] / W * 100.;
1979 case CRS_CL_EXPECTED:
1980 case CRS_CL_RESIDUAL:
1981 case CRS_CL_SRESIDUAL:
1982 case CRS_CL_ASRESIDUAL:
1989 if (cmd.miss == CRS_REPORT
1990 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1991 float_M_suffix (table, n_cols, 0, v);
1993 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1995 tab_next_row (table);
1999 /* Column totals, grand total. */
2004 tab_hline (table, TAL_1, -1, n_cols, 0);
2005 for (c = 0; c <= n_cols; c++)
2007 double ct = c < n_cols ? col_tot[c] : W;
2010 for (i = j = 0; i < num_cells; i++)
2028 case CRS_CL_EXPECTED:
2029 case CRS_CL_RESIDUAL:
2030 case CRS_CL_SRESIDUAL:
2031 case CRS_CL_ASRESIDUAL:
2037 if (cmd.miss == CRS_REPORT && c < n_cols
2038 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
2039 float_M_suffix (table, c, j, v);
2041 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
2047 tab_offset (table, -1, tab_row (table) + j);
2050 tab_offset (table, 0, -1);
2053 static void calc_r (double *X, double *Y, double *, double *, double *);
2054 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
2056 /* Display chi-square statistics. */
2058 display_chisq (void)
2060 static const char *chisq_stats[N_CHISQ] =
2062 N_("Pearson Chi-Square"),
2063 N_("Likelihood Ratio"),
2064 N_("Fisher's Exact Test"),
2065 N_("Continuity Correction"),
2066 N_("Linear-by-Linear Association"),
2068 double chisq_v[N_CHISQ];
2069 double fisher1, fisher2;
2075 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2077 tab_offset (chisq, nvar - 2, -1);
2079 for (i = 0; i < N_CHISQ; i++)
2081 if ((i != 2 && chisq_v[i] == SYSMIS)
2082 || (i == 2 && fisher1 == SYSMIS))
2086 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2089 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2090 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2091 tab_float (chisq, 3, 0, TAB_RIGHT,
2092 chisq_sig (chisq_v[i], df[i]), 8, 3);
2097 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2098 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2100 tab_next_row (chisq);
2103 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2104 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2105 tab_next_row (chisq);
2107 tab_offset (chisq, 0, -1);
2110 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2111 double[N_SYMMETRIC]);
2113 /* Display symmetric measures. */
2115 display_symmetric (void)
2117 static const char *categories[] =
2119 N_("Nominal by Nominal"),
2120 N_("Ordinal by Ordinal"),
2121 N_("Interval by Interval"),
2122 N_("Measure of Agreement"),
2125 static const char *stats[N_SYMMETRIC] =
2129 N_("Contingency Coefficient"),
2130 N_("Kendall's tau-b"),
2131 N_("Kendall's tau-c"),
2133 N_("Spearman Correlation"),
2138 static const int stats_categories[N_SYMMETRIC] =
2140 0, 0, 0, 1, 1, 1, 1, 2, 3,
2144 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2147 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2150 tab_offset (sym, nvar - 2, -1);
2152 for (i = 0; i < N_SYMMETRIC; i++)
2154 if (sym_v[i] == SYSMIS)
2157 if (stats_categories[i] != last_cat)
2159 last_cat = stats_categories[i];
2160 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2163 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2164 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2165 if (sym_ase[i] != SYSMIS)
2166 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2167 if (sym_t[i] != SYSMIS)
2168 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2169 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2173 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2174 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2177 tab_offset (sym, 0, -1);
2180 static int calc_risk (double[], double[], double[], union value *);
2182 /* Display risk estimate. */
2187 double risk_v[3], lower[3], upper[3];
2191 if (!calc_risk (risk_v, upper, lower, c))
2194 tab_offset (risk, nvar - 2, -1);
2196 for (i = 0; i < 3; i++)
2198 if (risk_v[i] == SYSMIS)
2204 if (x->v[COL_VAR]->type == NUMERIC)
2205 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2206 x->v[COL_VAR]->name, c[0].f, c[1].f);
2208 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2209 x->v[COL_VAR]->name,
2210 x->v[COL_VAR]->width, c[0].s,
2211 x->v[COL_VAR]->width, c[1].s);
2215 if (x->v[ROW_VAR]->type == NUMERIC)
2216 sprintf (buf, _("For cohort %s = %g"),
2217 x->v[ROW_VAR]->name, rows[i - 1].f);
2219 sprintf (buf, _("For cohort %s = %.*s"),
2220 x->v[ROW_VAR]->name,
2221 x->v[ROW_VAR]->width, rows[i - 1].s);
2225 tab_text (risk, 0, 0, TAB_LEFT, buf);
2226 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2227 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2228 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2229 tab_next_row (risk);
2232 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2233 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2234 tab_next_row (risk);
2236 tab_offset (risk, 0, -1);
2239 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2240 double[N_DIRECTIONAL]);
2242 /* Display directional measures. */
2244 display_directional (void)
2246 static const char *categories[] =
2248 N_("Nominal by Nominal"),
2249 N_("Ordinal by Ordinal"),
2250 N_("Nominal by Interval"),
2253 static const char *stats[] =
2256 N_("Goodman and Kruskal tau"),
2257 N_("Uncertainty Coefficient"),
2262 static const char *types[] =
2269 static const int stats_categories[N_DIRECTIONAL] =
2271 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2274 static const int stats_stats[N_DIRECTIONAL] =
2276 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2279 static const int stats_types[N_DIRECTIONAL] =
2281 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2284 static const int *stats_lookup[] =
2291 static const char **stats_names[] =
2303 double direct_v[N_DIRECTIONAL];
2304 double direct_ase[N_DIRECTIONAL];
2305 double direct_t[N_DIRECTIONAL];
2309 if (!calc_directional (direct_v, direct_ase, direct_t))
2312 tab_offset (direct, nvar - 2, -1);
2314 for (i = 0; i < N_DIRECTIONAL; i++)
2316 if (direct_v[i] == SYSMIS)
2322 for (j = 0; j < 3; j++)
2323 if (last[j] != stats_lookup[j][i])
2326 tab_hline (direct, TAL_1, j, 6, 0);
2331 int k = last[j] = stats_lookup[j][i];
2336 string = x->v[0]->name;
2338 string = x->v[1]->name;
2340 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2341 gettext (stats_names[j][k]), string);
2346 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2347 if (direct_ase[i] != SYSMIS)
2348 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2349 if (direct_t[i] != SYSMIS)
2350 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2351 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2352 tab_next_row (direct);
2355 tab_offset (direct, 0, -1);
2358 /* Statistical calculations. */
2360 /* Returns the value of the gamma (factorial) function for an integer
2363 gamma_int (double x)
2368 for (i = 2; i < x; i++)
2373 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2375 static inline double
2376 Pr (int a, int b, int c, int d)
2378 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2379 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2380 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2381 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2382 / gamma_int (a + b + c + d + 1.));
2385 /* Swap the contents of A and B. */
2387 swap (int *a, int *b)
2394 /* Calculate significance for Fisher's exact test as specified in
2395 _SPSS Statistical Algorithms_, Appendix 5. */
2397 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2401 if (min (c, d) < min (a, b))
2402 swap (&a, &c), swap (&b, &d);
2403 if (min (b, d) < min (a, c))
2404 swap (&a, &b), swap (&c, &d);
2408 swap (&a, &b), swap (&c, &d);
2410 swap (&a, &c), swap (&b, &d);
2414 for (x = 0; x <= a; x++)
2415 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2417 *fisher2 = *fisher1;
2418 for (x = 1; x <= b; x++)
2419 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2422 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2423 columns with values COLS and N_ROWS rows with values ROWS. Values
2424 in the matrix sum to W. */
2426 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2427 double *fisher1, double *fisher2)
2431 chisq[0] = chisq[1] = 0.;
2432 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2433 *fisher1 = *fisher2 = SYSMIS;
2435 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2437 if (ns_rows <= 1 || ns_cols <= 1)
2439 chisq[0] = chisq[1] = SYSMIS;
2443 for (r = 0; r < n_rows; r++)
2444 for (c = 0; c < n_cols; c++)
2446 const double expected = row_tot[r] * col_tot[c] / W;
2447 const double freq = mat[n_cols * r + c];
2448 const double residual = freq - expected;
2451 chisq[0] += residual * residual / expected;
2453 chisq[1] += freq * log (expected / freq);
2464 /* Calculate Yates and Fisher exact test. */
2465 if (ns_cols == 2 && ns_rows == 2)
2467 double f11, f12, f21, f22;
2473 for (i = j = 0; i < n_cols; i++)
2474 if (col_tot[i] != 0.)
2483 f11 = mat[nz_cols[0]];
2484 f12 = mat[nz_cols[1]];
2485 f21 = mat[nz_cols[0] + n_cols];
2486 f22 = mat[nz_cols[1] + n_cols];
2491 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2494 chisq[3] = (W * x * x
2495 / (f11 + f12) / (f21 + f22)
2496 / (f11 + f21) / (f12 + f22));
2504 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2505 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2508 /* Calculate Mantel-Haenszel. */
2509 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2511 double r, ase_0, ase_1;
2512 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2514 chisq[4] = (W - 1.) * r * r;
2519 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2520 ASE_1, and ase_0 into ASE_0. The row and column values must be
2521 passed in X and Y. */
2523 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2525 double SX, SY, S, T;
2527 double sum_XYf, sum_X2Y2f;
2528 double sum_Xr, sum_X2r;
2529 double sum_Yc, sum_Y2c;
2532 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2533 for (j = 0; j < n_cols; j++)
2535 double fij = mat[j + i * n_cols];
2536 double product = X[i] * Y[j];
2537 double temp = fij * product;
2539 sum_X2Y2f += temp * product;
2542 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2544 sum_Xr += X[i] * row_tot[i];
2545 sum_X2r += X[i] * X[i] * row_tot[i];
2549 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2551 sum_Yc += Y[i] * col_tot[i];
2552 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2556 S = sum_XYf - sum_Xr * sum_Yc / W;
2557 SX = sum_X2r - sum_Xr * sum_Xr / W;
2558 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2561 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2566 for (s = c = 0., i = 0; i < n_rows; i++)
2567 for (j = 0; j < n_cols; j++)
2569 double Xresid, Yresid;
2572 Xresid = X[i] - Xbar;
2573 Yresid = Y[j] - Ybar;
2574 temp = (T * Xresid * Yresid
2576 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2577 y = mat[j + i * n_cols] * temp * temp - c;
2582 *ase_1 = sqrt (s) / (T * T);
2586 static double somers_d_v[3];
2587 static double somers_d_ase[3];
2588 static double somers_d_t[3];
2590 /* Calculate symmetric statistics and their asymptotic standard
2591 errors. Returns 0 if none could be calculated. */
2593 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2594 double t[N_SYMMETRIC])
2596 int q = min (ns_rows, ns_cols);
2605 for (i = 0; i < N_SYMMETRIC; i++)
2606 v[i] = ase[i] = t[i] = SYSMIS;
2609 /* Phi, Cramer's V, contingency coefficient. */
2610 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2612 double Xp = 0.; /* Pearson chi-square. */
2617 for (r = 0; r < n_rows; r++)
2618 for (c = 0; c < n_cols; c++)
2620 const double expected = row_tot[r] * col_tot[c] / W;
2621 const double freq = mat[n_cols * r + c];
2622 const double residual = freq - expected;
2625 Xp += residual * residual / expected;
2629 if (cmd.a_statistics[CRS_ST_PHI])
2631 v[0] = sqrt (Xp / W);
2632 v[1] = sqrt (Xp / (W * (q - 1)));
2634 if (cmd.a_statistics[CRS_ST_CC])
2635 v[2] = sqrt (Xp / (Xp + W));
2638 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2639 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2644 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2651 for (r = 0; r < n_rows; r++)
2652 Dr -= row_tot[r] * row_tot[r];
2653 for (c = 0; c < n_cols; c++)
2654 Dc -= col_tot[c] * col_tot[c];
2660 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2661 for (c = 0; c < n_cols; c++)
2665 for (r = 0; r < n_rows; r++)
2666 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2676 for (i = 0; i < n_rows; i++)
2680 for (j = 1; j < n_cols; j++)
2681 Cij += col_tot[j] - cum[j + i * n_cols];
2684 for (j = 1; j < n_cols; j++)
2685 Dij += cum[j + (i - 1) * n_cols];
2689 double fij = mat[j + i * n_cols];
2695 assert (j < n_cols);
2697 Cij -= col_tot[j] - cum[j + i * n_cols];
2698 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2702 Cij += cum[j - 1 + (i - 1) * n_cols];
2703 Dij -= cum[j + (i - 1) * n_cols];
2709 if (cmd.a_statistics[CRS_ST_BTAU])
2710 v[3] = (P - Q) / sqrt (Dr * Dc);
2711 if (cmd.a_statistics[CRS_ST_CTAU])
2712 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2713 if (cmd.a_statistics[CRS_ST_GAMMA])
2714 v[5] = (P - Q) / (P + Q);
2716 /* ASE for tau-b, tau-c, gamma. Calculations could be
2717 eliminated here, at expense of memory. */
2722 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2723 for (i = 0; i < n_rows; i++)
2727 for (j = 1; j < n_cols; j++)
2728 Cij += col_tot[j] - cum[j + i * n_cols];
2731 for (j = 1; j < n_cols; j++)
2732 Dij += cum[j + (i - 1) * n_cols];
2736 double fij = mat[j + i * n_cols];
2738 if (cmd.a_statistics[CRS_ST_BTAU])
2740 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2741 + v[3] * (row_tot[i] * Dc
2742 + col_tot[j] * Dr));
2743 btau_cum += fij * temp * temp;
2747 const double temp = Cij - Dij;
2748 ctau_cum += fij * temp * temp;
2751 if (cmd.a_statistics[CRS_ST_GAMMA])
2753 const double temp = Q * Cij - P * Dij;
2754 gamma_cum += fij * temp * temp;
2757 if (cmd.a_statistics[CRS_ST_D])
2759 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2760 - (P - Q) * (W - row_tot[i]));
2761 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2762 - (Q - P) * (W - col_tot[j]));
2767 assert (j < n_cols);
2769 Cij -= col_tot[j] - cum[j + i * n_cols];
2770 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2774 Cij += cum[j - 1 + (i - 1) * n_cols];
2775 Dij -= cum[j + (i - 1) * n_cols];
2781 btau_var = ((btau_cum
2782 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2784 if (cmd.a_statistics[CRS_ST_BTAU])
2786 ase[3] = sqrt (btau_var);
2787 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2790 if (cmd.a_statistics[CRS_ST_CTAU])
2792 ase[4] = ((2 * q / ((q - 1) * W * W))
2793 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2794 t[4] = v[4] / ase[4];
2796 if (cmd.a_statistics[CRS_ST_GAMMA])
2798 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2799 t[5] = v[5] / (2. / (P + Q)
2800 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2802 if (cmd.a_statistics[CRS_ST_D])
2804 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2805 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2806 somers_d_t[0] = (somers_d_v[0]
2808 * sqrt (ctau_cum - sqr (P - Q) / W)));
2809 somers_d_v[1] = (P - Q) / Dc;
2810 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2811 somers_d_t[1] = (somers_d_v[1]
2813 * sqrt (ctau_cum - sqr (P - Q) / W)));
2814 somers_d_v[2] = (P - Q) / Dr;
2815 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2816 somers_d_t[2] = (somers_d_v[2]
2818 * sqrt (ctau_cum - sqr (P - Q) / W)));
2824 /* Spearman correlation, Pearson's r. */
2825 if (cmd.a_statistics[CRS_ST_CORR])
2827 double *R = local_alloc (sizeof *R * n_rows);
2828 double *C = local_alloc (sizeof *C * n_cols);
2831 double y, t, c = 0., s = 0.;
2836 R[i] = s + (row_tot[i] + 1.) / 2.;
2843 assert (i < n_rows);
2848 double y, t, c = 0., s = 0.;
2853 C[j] = s + (col_tot[j] + 1.) / 2;
2860 assert (j < n_cols);
2864 calc_r (R, C, &v[6], &t[6], &ase[6]);
2870 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2874 /* Cohen's kappa. */
2875 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2877 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2880 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2881 i < ns_rows; i++, j++)
2885 while (col_tot[j] == 0.)
2888 prod = row_tot[i] * col_tot[j];
2889 sum = row_tot[i] + col_tot[j];
2891 sum_fii += mat[j + i * n_cols];
2893 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2894 sum_riciri_ci += prod * sum;
2896 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2897 for (j = 0; j < ns_cols; j++)
2899 double sum = row_tot[i] + col_tot[j];
2900 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2903 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2905 ase[8] = sqrt ((W * W * sum_rici
2906 + sum_rici * sum_rici
2907 - W * sum_riciri_ci)
2908 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2910 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2911 / sqr (W * W - sum_rici))
2912 + ((2. * (W - sum_fii)
2913 * (2. * sum_fii * sum_rici
2914 - W * sum_fiiri_ci))
2915 / cube (W * W - sum_rici))
2916 + (sqr (W - sum_fii)
2917 * (W * sum_fijri_ci2 - 4.
2918 * sum_rici * sum_rici)
2919 / hypercube (W * W - sum_rici))));
2921 t[8] = v[8] / ase[8];
2928 /* Calculate risk estimate. */
2930 calc_risk (double *value, double *upper, double *lower, union value *c)
2932 double f11, f12, f21, f22;
2938 for (i = 0; i < 3; i++)
2939 value[i] = upper[i] = lower[i] = SYSMIS;
2942 if (ns_rows != 2 || ns_cols != 2)
2949 for (i = j = 0; i < n_cols; i++)
2950 if (col_tot[i] != 0.)
2959 f11 = mat[nz_cols[0]];
2960 f12 = mat[nz_cols[1]];
2961 f21 = mat[nz_cols[0] + n_cols];
2962 f22 = mat[nz_cols[1] + n_cols];
2964 c[0] = cols[nz_cols[0]];
2965 c[1] = cols[nz_cols[1]];
2968 value[0] = (f11 * f22) / (f12 * f21);
2969 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2970 lower[0] = value[0] * exp (-1.960 * v);
2971 upper[0] = value[0] * exp (1.960 * v);
2973 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2974 v = sqrt ((f12 / (f11 * (f11 + f12)))
2975 + (f22 / (f21 * (f21 + f22))));
2976 lower[1] = value[1] * exp (-1.960 * v);
2977 upper[1] = value[1] * exp (1.960 * v);
2979 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2980 v = sqrt ((f11 / (f12 * (f11 + f12)))
2981 + (f21 / (f22 * (f21 + f22))));
2982 lower[2] = value[2] * exp (-1.960 * v);
2983 upper[2] = value[2] * exp (1.960 * v);
2988 /* Calculate directional measures. */
2990 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2991 double t[N_DIRECTIONAL])
2996 for (i = 0; i < N_DIRECTIONAL; i++)
2997 v[i] = ase[i] = t[i] = SYSMIS;
3001 if (cmd.a_statistics[CRS_ST_LAMBDA])
3003 double *fim = xmalloc (sizeof *fim * n_rows);
3004 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
3005 double *fmj = xmalloc (sizeof *fmj * n_cols);
3006 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
3007 double sum_fim, sum_fmj;
3009 int rm_index, cm_index;
3012 /* Find maximum for each row and their sum. */
3013 for (sum_fim = 0., i = 0; i < n_rows; i++)
3015 double max = mat[i * n_cols];
3018 for (j = 1; j < n_cols; j++)
3019 if (mat[j + i * n_cols] > max)
3021 max = mat[j + i * n_cols];
3025 sum_fim += fim[i] = max;
3026 fim_index[i] = index;
3029 /* Find maximum for each column. */
3030 for (sum_fmj = 0., j = 0; j < n_cols; j++)
3032 double max = mat[j];
3035 for (i = 1; i < n_rows; i++)
3036 if (mat[j + i * n_cols] > max)
3038 max = mat[j + i * n_cols];
3042 sum_fmj += fmj[j] = max;
3043 fmj_index[j] = index;
3046 /* Find maximum row total. */
3049 for (i = 1; i < n_rows; i++)
3050 if (row_tot[i] > rm)
3056 /* Find maximum column total. */
3059 for (j = 1; j < n_cols; j++)
3060 if (col_tot[j] > cm)
3066 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3067 v[1] = (sum_fmj - rm) / (W - rm);
3068 v[2] = (sum_fim - cm) / (W - cm);
3070 /* ASE1 for Y given X. */
3074 for (accum = 0., i = 0; i < n_rows; i++)
3075 for (j = 0; j < n_cols; j++)
3077 const int deltaj = j == cm_index;
3078 accum += (mat[j + i * n_cols]
3079 * sqr ((j == fim_index[i])
3084 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3087 /* ASE0 for Y given X. */
3091 for (accum = 0., i = 0; i < n_rows; i++)
3092 if (cm_index != fim_index[i])
3093 accum += (mat[i * n_cols + fim_index[i]]
3094 + mat[i * n_cols + cm_index]);
3095 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3098 /* ASE1 for X given Y. */
3102 for (accum = 0., i = 0; i < n_rows; i++)
3103 for (j = 0; j < n_cols; j++)
3105 const int deltaj = i == rm_index;
3106 accum += (mat[j + i * n_cols]
3107 * sqr ((i == fmj_index[j])
3112 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3115 /* ASE0 for X given Y. */
3119 for (accum = 0., j = 0; j < n_cols; j++)
3120 if (rm_index != fmj_index[j])
3121 accum += (mat[j + n_cols * fmj_index[j]]
3122 + mat[j + n_cols * rm_index]);
3123 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3126 /* Symmetric ASE0 and ASE1. */
3131 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3132 for (j = 0; j < n_cols; j++)
3134 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3135 int temp1 = (i == rm_index) + (j == cm_index);
3136 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3137 accum1 += (mat[j + i * n_cols]
3138 * sqr (temp0 + (v[0] - 1.) * temp1));
3140 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3141 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3142 / (2. * W - rm - cm));
3151 double sum_fij2_ri, sum_fij2_ci;
3152 double sum_ri2, sum_cj2;
3154 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3155 for (j = 0; j < n_cols; j++)
3157 double temp = sqr (mat[j + i * n_cols]);
3158 sum_fij2_ri += temp / row_tot[i];
3159 sum_fij2_ci += temp / col_tot[j];
3162 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3163 sum_ri2 += row_tot[i] * row_tot[i];
3165 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3166 sum_cj2 += col_tot[j] * col_tot[j];
3168 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3169 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3173 if (cmd.a_statistics[CRS_ST_UC])
3175 double UX, UY, UXY, P;
3176 double ase1_yx, ase1_xy, ase1_sym;
3179 for (UX = 0., i = 0; i < n_rows; i++)
3180 if (row_tot[i] > 0.)
3181 UX -= row_tot[i] / W * log (row_tot[i] / W);
3183 for (UY = 0., j = 0; j < n_cols; j++)
3184 if (col_tot[j] > 0.)
3185 UY -= col_tot[j] / W * log (col_tot[j] / W);
3187 for (UXY = P = 0., i = 0; i < n_rows; i++)
3188 for (j = 0; j < n_cols; j++)
3190 double entry = mat[j + i * n_cols];
3195 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3196 UXY -= entry / W * log (entry / W);
3199 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3200 for (j = 0; j < n_cols; j++)
3202 double entry = mat[j + i * n_cols];
3207 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3208 + (UX - UXY) * log (col_tot[j] / W));
3209 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3210 + (UY - UXY) * log (row_tot[i] / W));
3211 ase1_sym += entry * sqr ((UXY
3212 * log (row_tot[i] * col_tot[j] / (W * W)))
3213 - (UX + UY) * log (entry / W));
3216 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3217 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3218 t[5] = v[5] / ((2. / (W * (UX + UY)))
3219 * sqrt (P - sqr (UX + UY - UXY) / W));
3221 v[6] = (UX + UY - UXY) / UX;
3222 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3223 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3225 v[7] = (UX + UY - UXY) / UY;
3226 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3227 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3231 if (cmd.a_statistics[CRS_ST_D])
3236 calc_symmetric (NULL, NULL, NULL);
3237 for (i = 0; i < 3; i++)
3239 v[8 + i] = somers_d_v[i];
3240 ase[8 + i] = somers_d_ase[i];
3241 t[8 + i] = somers_d_t[i];
3246 if (cmd.a_statistics[CRS_ST_ETA])
3249 double sum_Xr, sum_X2r;
3253 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3255 sum_Xr += rows[i].f * row_tot[i];
3256 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3258 SX = sum_X2r - sum_Xr * sum_Xr / W;
3260 for (SXW = 0., j = 0; j < n_cols; j++)
3264 for (cum = 0., i = 0; i < n_rows; i++)
3266 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3267 cum += rows[i].f * mat[j + i * n_cols];
3270 SXW -= cum * cum / col_tot[j];
3272 v[11] = sqrt (1. - SXW / SX);
3276 double sum_Yc, sum_Y2c;
3280 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3282 sum_Yc += cols[i].f * col_tot[i];
3283 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3285 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3287 for (SYW = 0., i = 0; i < n_rows; i++)
3291 for (cum = 0., j = 0; j < n_cols; j++)
3293 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3294 cum += cols[j].f * mat[j + i * n_cols];
3297 SYW -= cum * cum / row_tot[i];
3299 v[12] = sqrt (1. - SYW / SY);