1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
32 /* AIX requires this to be the first thing in the file. */
35 #define alloca __builtin_alloca
43 #ifndef alloca /* predefined by HP cc +Olibcalls */
54 #include "algorithm.h"
58 #include "dcdflib/cdflib.h"
67 #include "value-labels.h"
71 #include "debug-print.h"
77 +missing=miss:!table/include/report;
78 +write[wr_]=none,cells,all;
79 +format=fmt:!labels/nolabels/novallabs,
82 tabl:!tables/notables,
85 +cells[cl_]=count,none,row,column,total,expected,residual,sresidual,
87 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
93 /* Number of chi-square statistics. */
96 /* Number of symmetric statistics. */
99 /* Number of directional statistics. */
100 #define N_DIRECTIONAL 13
102 /* A single table entry for general mode. */
105 int table; /* Flattened table number. */
108 double freq; /* Frequency count. */
109 double *data; /* Crosstabulation table for integer mode. */
112 union value v[1]; /* Values. */
115 /* A crosstabulation. */
118 int nvar; /* Number of variables. */
119 double missing; /* Missing cases count. */
120 int ofs; /* Integer mode: Offset into sorted_tab[]. */
121 struct variable *v[2]; /* At least two variables; sorted by
122 larger indices first. */
125 /* Indexes into crosstab.v. */
132 /* General mode crosstabulation table. */
133 static struct hsh_table *gen_tab; /* Hash table. */
134 static int n_sorted_tab; /* Number of entries in sorted_tab. */
135 static struct table_entry **sorted_tab; /* Sorted table. */
137 /* VARIABLES dictionary. */
138 static struct dictionary *var_dict;
141 static struct crosstab **xtab;
144 /* Integer or general mode? */
153 static int num_cells; /* Number of cells requested. */
154 static int cells[8]; /* Cells requested. */
155 static int expected; /* Nonzero if expected value is needed. */
158 static int write; /* One of WR_* that specifies the WRITE style. */
160 /* Command parsing info. */
161 static struct cmd_crosstabs cmd;
164 static struct pool *pl_tc; /* For table cells. */
165 static struct pool *pl_col; /* For column data. */
167 static int internal_cmd_crosstabs (void);
168 static void free_var_dict (void);
169 static void precalc (void);
170 static int calc_general (struct ccase *);
171 static int calc_integer (struct ccase *);
172 static void postcalc (void);
173 static void submit (struct tab_table *);
176 static void debug_print (void);
177 static void print_table_entries (struct table_entry **tab);
180 /* Parse and execute CROSSTABS, then clean up. */
184 int result = internal_cmd_crosstabs ();
187 pool_destroy (pl_tc);
188 pool_destroy (pl_col);
193 /* Parses and executes the CROSSTABS procedure. */
195 internal_cmd_crosstabs (void)
200 pl_tc = pool_create ();
201 pl_col = pool_create ();
203 lex_match_id ("CROSSTABS");
204 if (!parse_crosstabs (&cmd))
208 /* Needs var_dict. */
212 mode = var_dict ? INTEGER : GENERAL;
219 cmd.a_cells[CRS_CL_COUNT] = 1;
227 for (i = 0; i < CRS_CL_count; i++)
232 cmd.a_cells[CRS_CL_COUNT] = 1;
233 cmd.a_cells[CRS_CL_ROW] = 1;
234 cmd.a_cells[CRS_CL_COLUMN] = 1;
235 cmd.a_cells[CRS_CL_TOTAL] = 1;
237 if (cmd.a_cells[CRS_CL_ALL])
239 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_ALL] = 0;
243 cmd.a_cells[CRS_CL_NONE] = 0;
244 for (num_cells = i = 0; i < CRS_CL_count; i++)
247 if (i >= CRS_CL_EXPECTED)
249 cmd.a_cells[num_cells++] = i;
254 if (cmd.sbc_statistics)
259 for (i = 0; i < CRS_ST_count; i++)
260 if (cmd.a_statistics[i])
263 cmd.a_statistics[CRS_ST_CHISQ] = 1;
264 if (cmd.a_statistics[CRS_ST_ALL])
265 for (i = 0; i < CRS_ST_count; i++)
266 cmd.a_statistics[i] = 1;
270 if (cmd.miss == CRS_REPORT && mode == GENERAL)
272 msg (SE, _("Missing mode REPORT not allowed in general mode. "
273 "Assuming MISSING=TABLE."));
274 cmd.miss = CRS_TABLE;
278 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
279 cmd.a_write[CRS_WR_ALL] = 0;
280 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
282 msg (SE, _("Write mode ALL not allowed in general mode. "
283 "Assuming WRITE=CELLS."));
284 cmd.a_write[CRS_WR_CELLS] = 1;
287 && (cmd.a_write[CRS_WR_NONE]
288 + cmd.a_write[CRS_WR_ALL]
289 + cmd.a_write[CRS_WR_CELLS] == 0))
290 cmd.a_write[CRS_WR_CELLS] = 1;
291 if (cmd.a_write[CRS_WR_CELLS])
292 write = CRS_WR_CELLS;
293 else if (cmd.a_write[CRS_WR_ALL])
298 update_weighting (&default_dict);
299 procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc);
304 /* Frees var_dict once it's no longer needed. */
314 if (var_dict->name_tab)
316 hsh_destroy (var_dict->name_tab);
317 var_dict->name_tab = NULL;
320 for (i = 0; i < var_dict->nvar; i++)
321 free (var_dict->var[i]);
322 free (var_dict->var);
323 var_dict->var = NULL;
326 free_dictionary (var_dict);
332 /* Parses the TABLES subcommand. */
334 crs_custom_tables (struct cmd_crosstabs *cmd unused)
336 struct dictionary *dict;
338 struct variable ***by = NULL;
343 /* Ensure that this is a TABLES subcommand. */
344 if (!lex_match_id ("TABLES")
345 && (token != T_ID || !is_varname (tokid))
350 dict = var_dict ? var_dict : &default_dict;
354 by = xrealloc (by, sizeof *by * (n_by + 1));
355 by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1));
356 if (!parse_variables (dict, &by[n_by], &by_nvar[n_by],
357 PV_NO_DUPLICATE | PV_NO_SCRATCH))
362 if (!lex_match (T_BY))
366 lex_error (_("expecting BY"));
375 int *by_iter = xcalloc (sizeof *by_iter * n_by);
378 xtab = xrealloc (xtab, sizeof *xtab * (nxtab + nx));
379 for (i = 0; i < nx; i++)
383 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
390 if (var_dict == NULL)
391 for (i = 0; i < n_by; i++)
392 x->v[i] = by[i][by_iter[i]];
394 for (i = 0; i < n_by; i++)
395 x->v[i] = default_dict.var[by[i][by_iter[i]]->foo];
401 for (i = n_by - 1; i >= 0; i--)
403 if (++by_iter[i] < by_nvar[i])
415 /* Despite the name, we come here whether we're successful or
421 for (i = 0; i < n_by; i++)
430 /* Parses the VARIABLES subcommand. */
432 crs_custom_variables (struct cmd_crosstabs *cmd unused)
434 struct variable **v = NULL;
439 msg (SE, _("VARIABLES must be specified before TABLES."));
452 if (!parse_variables (&default_dict, &v, &nv,
453 (PV_APPEND | PV_NUMERIC
454 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 lex_error ("expecting `('");
464 if (!lex_force_int ())
466 min = lex_integer ();
471 if (!lex_force_int ())
473 max = lex_integer ();
476 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 lex_error ("expecting `)'");
489 for (i = orig_nv; i < nv; i++)
491 v[i]->p.crs.min = min;
492 v[i]->p.crs.max = max + 1.;
493 v[i]->p.crs.count = max - min + 1;
503 var_dict = new_dictionary (0);
504 var_dict->var = xmalloc (sizeof *var_dict->var * nv);
506 for (i = 0; i < nv; i++)
508 struct variable *var = xmalloc (offsetof (struct variable, width));
509 strcpy (var->name, v[i]->name);
511 var->type = v[i]->type;
512 var->foo = v[i]->index;
513 var_dict->var[i] = var;
514 hsh_force_insert (var_dict->name_tab, var);
530 printf ("CROSSTABS\n");
536 printf ("\t/VARIABLES=");
537 for (i = 0; i < var_dict->nvar; i++)
539 struct variable *v = var_dict->var[i];
540 struct variable *iv = default_dict.var[v->foo];
542 printf ("%s ", v->name);
543 if (i < var_dict->nvar - 1)
545 struct variable *nv = var_dict->var[i + 1];
546 struct variable *niv = default_dict.var[nv->foo];
548 if (iv->p.crs.min == niv->p.crs.min
549 && iv->p.crs.max == niv->p.crs.max)
552 printf ("(%d,%d) ", iv->p.crs.min, iv->p.crs.max - 1);
560 printf ("\t/TABLES=");
561 for (i = 0; i < nxtab; i++)
563 struct crosstab *x = xtab[i];
568 for (j = 0; j < x->nvar; j++)
572 printf ("%s", x->v[j]->name);
578 #endif /* DEBUGGING */
580 /* Data file processing. */
582 static int compare_table_entry (const void *, const void *, void *);
583 static unsigned hash_table_entry (const void *, void *);
585 /* Set up the crosstabulation tables for processing. */
591 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
601 for (i = 0; i < nxtab; i++)
603 struct crosstab *x = xtab[i];
608 x->ofs = n_sorted_tab;
610 for (j = 2; j < x->nvar; j++)
611 count *= x->v[j - 2]->p.crs.count;
613 sorted_tab = xrealloc (sorted_tab,
614 sizeof *sorted_tab * (n_sorted_tab + count));
615 v = local_alloc (sizeof *v * x->nvar);
616 for (j = 2; j < x->nvar; j++)
617 v[j] = x->v[j]->p.crs.min;
618 for (j = 0; j < count; j++)
620 struct table_entry *te;
623 te = sorted_tab[n_sorted_tab++]
624 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
628 const int mat_size = (x->v[0]->p.crs.count
629 * x->v[1]->p.crs.count);
632 te->u.data = xmalloc (sizeof *te->u.data * mat_size);
633 for (m = 0; m < mat_size; m++)
637 for (k = 2; k < x->nvar; k++)
639 for (k = 2; k < x->nvar; k++)
640 if (++v[k] >= x->v[k]->p.crs.max)
641 v[k] = x->v[k]->p.crs.min;
648 sorted_tab = xrealloc (sorted_tab,
649 sizeof *sorted_tab * (n_sorted_tab + 1));
650 sorted_tab[n_sorted_tab] = NULL;
654 /* Form crosstabulations for general mode. */
656 calc_general (struct ccase *c)
659 double w = (default_dict.weight_index != -1
660 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
663 /* Flattened current table index. */
666 for (t = 0; t < nxtab; t++)
668 struct crosstab *x = xtab[t];
669 const size_t entry_size = (sizeof (struct table_entry)
670 + sizeof (union value) * (x->nvar - 1));
671 struct table_entry *te = local_alloc (entry_size);
673 /* Construct table entry for the current record and table. */
679 for (j = 0; j < x->nvar; j++)
681 if ((cmd.miss == CRS_TABLE
682 && is_missing (&c->data[x->v[j]->fv], x->v[j]))
683 || (cmd.miss == CRS_INCLUDE
684 && is_system_missing (&c->data[x->v[j]->fv], x->v[j])))
690 if (x->v[j]->type == NUMERIC)
691 te->v[j].f = c->data[x->v[j]->fv].f;
694 memcpy (te->v[j].s, c->data[x->v[j]->fv].s, x->v[j]->width);
696 /* Necessary in order to simplify comparisons. */
697 memset (&te->v[j].s[x->v[j]->width], 0,
698 sizeof (union value) - x->v[j]->width);
703 /* Add record to hash table. */
705 struct table_entry **tepp
706 = (struct table_entry **) hsh_probe (gen_tab, te);
709 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
712 memcpy (tep, te, entry_size);
717 (*tepp)->u.freq += w;
728 calc_integer (struct ccase *c)
731 double w = (default_dict.weight_index != -1
732 ? c->data[default_dict.var[default_dict.weight_index]->fv].f
735 /* Flattened current table index. */
738 for (t = 0; t < nxtab; t++)
740 struct crosstab *x = xtab[t];
745 for (i = 0; i < x->nvar; i++)
747 struct variable *const v = x->v[i];
748 double value = c->data[v->fv].f;
750 /* Note that the first test also rules out SYSMIS. */
751 if ((value < v->p.crs.min || value >= v->p.crs.max)
752 || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
760 ofs += fact * ((int) value - v->p.crs.min);
761 fact *= v->p.crs.count;
766 const int row = c->data[x->v[ROW_VAR]->fv].f - x->v[ROW_VAR]->p.crs.min;
767 const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min;
768 const int col_dim = x->v[COL_VAR]->p.crs.count;
770 sorted_tab[ofs]->u.data[col + row * col_dim] += w;
780 /* Print out all table entries in NULL-terminated TAB for use by a
781 debugger (a person, not a program). */
783 print_table_entries (struct table_entry **tab)
785 printf ("raw crosstabulation data:\n");
788 const struct crosstab *x = xtab[(*tab)->table];
791 printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table);
792 for (i = 0; i < x->nvar; i++)
796 printf ("%s:", x->v[i]->name);
798 if (x->v[i]->type == NUMERIC)
799 printf ("%g", (*tab)->v[i].f);
801 printf ("%.*s", x->v[i]->width, (*tab)->v[i].s);
809 /* Compare the table_entry's at A and B and return a strcmp()-type
812 compare_table_entry (const void *a_, const void *b_, void *foo unused)
814 const struct table_entry *a = a_;
815 const struct table_entry *b = b_;
817 if (a->table > b->table)
819 else if (a->table < b->table)
823 const struct crosstab *x = xtab[a->table];
826 for (i = x->nvar - 1; i >= 0; i--)
827 if (x->v[i]->type == NUMERIC)
829 const double diffnum = a->v[i].f - b->v[i].f;
832 else if (diffnum > 0)
837 assert (x->v[i]->type == ALPHA);
839 const int diffstr = strncmp (a->v[i].s, b->v[i].s, x->v[i]->width);
849 /* Calculate a hash value from table_entry PA. */
851 hash_table_entry (const void *pa, void *foo unused)
853 const struct table_entry *a = pa;
854 unsigned long hash = a->table;
857 /* Hash formula from _SPSS Statistical Algorithms_. */
858 for (i = 0; i < xtab[a->table]->nvar; i++)
860 hash = (hash << 3) | (hash >> (CHAR_BIT * SIZEOF_LONG - 3));
861 hash ^= a->v[i].hash[0];
862 #if SIZEOF_DOUBLE / SIZEOF_LONG > 1
863 hash ^= a->v[i].hash[1];
870 /* Post-data reading calculations. */
872 static struct table_entry **find_pivot_extent (struct table_entry **,
873 int *cnt, int pivot);
874 static void enum_var_values (struct table_entry **entries, int entry_cnt,
876 union value **values, int *value_cnt);
877 static void output_pivot_table (struct table_entry **, struct table_entry **,
878 double **, double **, double **,
879 int *, int *, int *);
880 static void make_summary_table (void);
887 n_sorted_tab = hsh_count (gen_tab);
888 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
890 print_table_entries (sorted_tab);
894 make_summary_table ();
896 /* Identify all the individual crosstabulation tables, and deal with
899 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
900 int pc = n_sorted_tab; /* Pivot count. */
902 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
903 int maxrows = 0, maxcols = 0, maxcells = 0;
907 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
911 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
912 &maxrows, &maxcols, &maxcells);
921 hsh_destroy (gen_tab);
924 static void insert_summary (struct tab_table *, int tab_index, double valid);
926 /* Output a table summarizing the cases processed. */
928 make_summary_table (void)
930 struct tab_table *summary;
932 struct table_entry **pb = sorted_tab, **pe;
933 int pc = n_sorted_tab;
936 summary = tab_create (7, 3 + nxtab, 1);
937 tab_title (summary, 0, _("Summary."));
938 tab_headers (summary, 1, 0, 3, 0);
939 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
940 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
941 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
942 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
943 tab_hline (summary, TAL_1, 1, 6, 1);
944 tab_hline (summary, TAL_1, 1, 6, 2);
945 tab_vline (summary, TAL_1, 3, 1, 1);
946 tab_vline (summary, TAL_1, 5, 1, 1);
950 for (i = 0; i < 3; i++)
952 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
953 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
956 tab_offset (summary, 0, 3);
962 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
966 while (cur_tab < (*pb)->table)
967 insert_summary (summary, cur_tab++, 0.);
970 for (valid = 0.; pb < pe; pb++)
971 valid += (*pb)->u.freq;
974 const struct crosstab *const x = xtab[(*pb)->table];
975 const int n_cols = x->v[COL_VAR]->p.crs.count;
976 const int n_rows = x->v[ROW_VAR]->p.crs.count;
977 const int count = n_cols * n_rows;
979 for (valid = 0.; pb < pe; pb++)
981 const double *data = (*pb)->u.data;
984 for (i = 0; i < count; i++)
988 insert_summary (summary, cur_tab++, valid);
993 while (cur_tab < nxtab)
994 insert_summary (summary, cur_tab++, 0.);
999 /* Inserts a line into T describing the crosstabulation at index
1000 TAB_INDEX, which has VALID valid observations. */
1002 insert_summary (struct tab_table *t, int tab_index, double valid)
1004 struct crosstab *x = xtab[tab_index];
1006 tab_hline (t, TAL_1, 0, 6, 0);
1008 /* Crosstabulation name. */
1010 char *buf = local_alloc (128 * x->nvar);
1014 for (i = 0; i < x->nvar; i++)
1017 cp = stpcpy (cp, " * ");
1019 cp = stpcpy (cp, x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1021 tab_text (t, 0, 0, TAB_LEFT, buf);
1026 /* Counts and percentages. */
1036 for (i = 0; i < 3; i++)
1038 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
1039 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
1040 n[i] / n[2] * 100.);
1050 static struct tab_table *table; /* Crosstabulation table. */
1051 static struct tab_table *chisq; /* Chi-square table. */
1052 static struct tab_table *sym; /* Symmetric measures table. */
1053 static struct tab_table *risk; /* Risk estimate table. */
1054 static struct tab_table *direct; /* Directional measures table. */
1057 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
1059 /* Column values, number of columns. */
1060 static union value *cols;
1063 /* Row values, number of rows. */
1064 static union value *rows;
1067 /* Number of statistically interesting columns/rows (columns/rows with
1069 static int ns_cols, ns_rows;
1071 /* Crosstabulation. */
1072 static struct crosstab *x;
1074 /* Number of variables from the crosstabulation to consider. This is
1075 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
1078 /* Matrix contents. */
1079 static double *mat; /* Matrix proper. */
1080 static double *row_tot; /* Row totals. */
1081 static double *col_tot; /* Column totals. */
1082 static double W; /* Grand total. */
1084 static void display_dimensions (struct tab_table *, int first_difference,
1085 struct table_entry *);
1086 static void display_crosstabulation (void);
1087 static void display_chisq (void);
1088 static void display_symmetric (void);
1089 static void display_risk (void);
1090 static void display_directional (void);
1091 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1092 static void table_value_missing (struct tab_table *table, int c, int r,
1093 unsigned char opt, const union value *v,
1094 const struct variable *var);
1095 static void delete_missing (void);
1097 /* Output pivot table beginning at PB and continuing until PE,
1098 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1099 hold *MAXROWS entries. */
1101 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1102 double **matp, double **row_totp, double **col_totp,
1103 int *maxrows, int *maxcols, int *maxcells)
1106 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1107 int tc = pe - pb; /* Table count. */
1109 /* Table entry for header comparison. */
1110 struct table_entry *cmp;
1112 x = xtab[(*pb)->table];
1113 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1115 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1117 /* Crosstabulation table initialization. */
1120 table = tab_create (nvar + n_cols,
1121 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1122 tab_headers (table, nvar - 1, 0, 2, 0);
1124 /* First header line. */
1125 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1126 TAB_CENTER | TAT_TITLE, x->v[COL_VAR]->name);
1128 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1130 /* Second header line. */
1134 for (i = 2; i < nvar; i++)
1135 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1136 TAB_RIGHT | TAT_TITLE,
1137 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1138 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1139 x->v[ROW_VAR]->name);
1140 for (i = 0; i < n_cols; i++)
1141 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1143 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1146 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1147 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1151 char *title = local_alloc (x->nvar * 64 + 128);
1155 if (cmd.pivot == CRS_PIVOT)
1156 for (i = 0; i < nvar; i++)
1159 cp = stpcpy (cp, " by ");
1160 cp = stpcpy (cp, x->v[i]->name);
1164 cp = spprintf (cp, "%s by %s for", x->v[0]->name, x->v[1]->name);
1165 for (i = 2; i < nvar; i++)
1167 char buf[64], *bufp;
1172 cp = stpcpy (cp, x->v[i]->name);
1174 data_out (buf, &x->v[i]->print, &(*pb)->v[i]);
1175 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1177 cp = stpcpy (cp, bufp);
1181 cp = stpcpy (cp, " [");
1182 for (i = 0; i < num_cells; i++)
1190 static const struct tuple cell_names[] =
1192 {CRS_CL_COUNT, N_("count")},
1193 {CRS_CL_ROW, N_("row %")},
1194 {CRS_CL_COLUMN, N_("column %")},
1195 {CRS_CL_TOTAL, N_("total %")},
1196 {CRS_CL_EXPECTED, N_("expected")},
1197 {CRS_CL_RESIDUAL, N_("residual")},
1198 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1199 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1203 const struct tuple *t;
1205 for (t = cell_names; t->value != cells[i]; t++)
1206 assert (t->value != -1);
1208 cp = stpcpy (cp, ", ");
1209 cp = stpcpy (cp, gettext (t->name));
1213 tab_title (table, 0, title);
1217 tab_offset (table, 0, 2);
1222 /* Chi-square table initialization. */
1223 if (cmd.a_statistics[CRS_ST_CHISQ])
1225 chisq = tab_create (6 + (nvar - 2),
1226 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1227 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1229 tab_title (chisq, 0, "Chi-square tests.");
1231 tab_offset (chisq, nvar - 2, 0);
1232 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1233 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1234 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1235 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1236 _("Asymp. Sig. (2-sided)"));
1237 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1238 _("Exact. Sig. (2-sided)"));
1239 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1240 _("Exact. Sig. (1-sided)"));
1242 tab_offset (chisq, 0, 1);
1247 /* Symmetric measures. */
1248 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1249 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1250 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1251 || cmd.a_statistics[CRS_ST_KAPPA])
1253 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1254 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1255 tab_title (sym, 0, "Symmetric measures.");
1257 tab_offset (sym, nvar - 2, 0);
1258 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1259 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1260 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1261 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1262 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1263 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1264 tab_offset (sym, 0, 1);
1269 /* Risk estimate. */
1270 if (cmd.a_statistics[CRS_ST_RISK])
1272 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1273 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1274 tab_title (risk, 0, "Risk estimate.");
1276 tab_offset (risk, nvar - 2, 0);
1277 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1278 _(" 95%% Confidence Interval"));
1279 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1280 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1281 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1282 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1283 tab_hline (risk, TAL_1, 2, 3, 1);
1284 tab_vline (risk, TAL_1, 2, 0, 1);
1285 tab_offset (risk, 0, 2);
1290 /* Directional measures. */
1291 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1292 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1294 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1295 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1296 tab_title (direct, 0, "Directional measures.");
1298 tab_offset (direct, nvar - 2, 0);
1299 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1300 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1301 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1302 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1303 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1304 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1305 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1306 tab_offset (direct, 0, 1);
1313 /* Find pivot subtable if applicable. */
1314 te = find_pivot_extent (tb, &tc, 0);
1318 /* Find all the row variable values. */
1319 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1321 /* Allocate memory space for the column and row totals. */
1322 if (n_rows > *maxrows)
1324 *row_totp = xrealloc (*row_totp, sizeof **row_totp * n_rows);
1325 row_tot = *row_totp;
1328 if (n_cols > *maxcols)
1330 *col_totp = xrealloc (*col_totp, sizeof **col_totp * n_cols);
1331 col_tot = *col_totp;
1335 /* Allocate table space for the matrix. */
1336 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1337 tab_realloc (table, -1,
1338 max (tab_nr (table) + (n_rows + 1) * num_cells,
1339 tab_nr (table) * (pe - pb) / (te - tb)));
1341 if (mode == GENERAL)
1343 /* Allocate memory space for the matrix. */
1344 if (n_cols * n_rows > *maxcells)
1346 *matp = xrealloc (*matp, sizeof **matp * n_cols * n_rows);
1347 *maxcells = n_cols * n_rows;
1352 /* Build the matrix and calculate column totals. */
1354 union value *cur_col = cols;
1355 union value *cur_row = rows;
1357 double *cp = col_tot;
1358 struct table_entry **p;
1361 for (p = &tb[0]; p < te; p++)
1363 for (; memcmp (cur_col, &(*p)->v[COL_VAR], sizeof *cur_col);
1367 for (; cur_row < &rows[n_rows]; cur_row++)
1373 mp = &mat[cur_col - cols];
1376 for (; memcmp (cur_row, &(*p)->v[ROW_VAR], sizeof *cur_row);
1383 *cp += *mp = (*p)->u.freq;
1388 /* Zero out the rest of the matrix. */
1389 for (; cur_row < &rows[n_rows]; cur_row++)
1395 if (cur_col < &cols[n_cols])
1397 const int rem_cols = n_cols - (cur_col - cols);
1400 for (c = 0; c < rem_cols; c++)
1402 mp = &mat[cur_col - cols];
1403 for (r = 0; r < n_rows; r++)
1405 for (c = 0; c < rem_cols; c++)
1407 mp += n_cols - rem_cols;
1415 double *tp = col_tot;
1417 assert (mode == INTEGER);
1418 mat = (*tb)->u.data;
1421 /* Calculate column totals. */
1422 for (c = 0; c < n_cols; c++)
1425 double *cp = &mat[c];
1427 for (r = 0; r < n_rows; r++)
1428 cum += cp[r * n_cols];
1436 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1437 ns_cols += *cp != 0.;
1440 /* Calculate row totals. */
1443 double *rp = row_tot;
1446 for (ns_rows = 0, r = n_rows; r--; )
1449 for (c = n_cols; c--; )
1457 /* Calculate grand total. */
1463 if (n_rows < n_cols)
1464 tp = row_tot, n = n_rows;
1466 tp = col_tot, n = n_cols;
1473 /* Print the matrix. */
1477 printf ("%s by %s for", x->v[0]->name, x->v[1]->name);
1478 for (i = 2; i < nvar; i++)
1479 printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f);
1482 for (c = 0; c < n_cols; c++)
1483 printf ("%4g", cols[c].f);
1485 for (r = 0; r < n_rows; r++)
1487 printf ("%4g:", rows[r].f);
1488 for (c = 0; c < n_cols; c++)
1489 printf ("%4g", mat[c + r * n_cols]);
1490 printf ("%4g", row_tot[r]);
1494 for (c = 0; c < n_cols; c++)
1495 printf ("%4g", col_tot[c]);
1501 /* Find the first variable that differs from the last subtable,
1502 then display the values of the dimensioning variables for
1503 each table that needs it. */
1505 int first_difference = nvar - 1;
1508 for (; ; first_difference--)
1510 assert (first_difference >= 2);
1511 if (memcmp (&cmp->v[first_difference],
1512 &(*tb)->v[first_difference], sizeof *cmp->v))
1518 display_dimensions (table, first_difference, *tb);
1520 display_dimensions (chisq, first_difference, *tb);
1522 display_dimensions (sym, first_difference, *tb);
1524 display_dimensions (risk, first_difference, *tb);
1526 display_dimensions (direct, first_difference, *tb);
1530 display_crosstabulation ();
1531 if (cmd.miss == CRS_REPORT)
1536 display_symmetric ();
1540 display_directional ();
1551 tab_resize (chisq, 4 + (nvar - 2), -1);
1562 /* Delete missing rows and columns for statistical analysis when
1565 delete_missing (void)
1570 for (r = 0; r < n_rows; r++)
1571 if (is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1575 for (c = 0; c < n_cols; c++)
1576 mat[c + r * n_cols] = 0.;
1584 for (c = 0; c < n_cols; c++)
1585 if (is_num_user_missing (cols[c].f, x->v[COL_VAR]))
1589 for (r = 0; r < n_rows; r++)
1590 mat[c + r * n_cols] = 0.;
1596 /* Prepare table T for submission, and submit it. */
1598 submit (struct tab_table *t)
1605 tab_resize (t, -1, 0);
1606 if (tab_nr (t) == tab_t (t))
1611 tab_offset (t, 0, 0);
1613 for (i = 2; i < nvar; i++)
1614 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1615 x->v[i]->label ? x->v[i]->label : x->v[i]->name);
1616 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1617 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1619 tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1,
1621 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1622 tab_dim (t, crosstabs_dim);
1626 /* Sets the widths of all the columns and heights of all the rows in
1627 table T for driver D. */
1629 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1633 /* Width of a numerical column. */
1634 int c = outp_string_width (d, "0.000000");
1635 if (cmd.miss == CRS_REPORT)
1636 c += outp_string_width (d, "M");
1638 /* Set width for header columns. */
1641 int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l;
1643 if (w < d->prop_em_width * 8)
1644 w = d->prop_em_width * 8;
1646 if (w > d->prop_em_width * 15)
1647 w = d->prop_em_width * 15;
1649 for (i = 0; i < t->l; i++)
1653 for (i = t->l; i < t->nc; i++)
1656 for (i = 0; i < t->nr; i++)
1657 t->h[i] = tab_natural_height (t, d, i);
1660 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1661 int *cnt, int pivot);
1662 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1663 int *cnt, int pivot);
1665 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1667 static struct table_entry **
1668 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1670 return (mode == GENERAL
1671 ? find_pivot_extent_general (tp, cnt, pivot)
1672 : find_pivot_extent_integer (tp, cnt, pivot));
1675 /* Find the extent of a region in TP that contains one table. If
1676 PIVOT != 0 that means a set of table entries with identical table
1677 number; otherwise they also have to have the same values for every
1678 dimension after the row and column dimensions. The table that is
1679 searched starts at TP and has length CNT. Returns the first entry
1680 after the last one in the table; sets *CNT to the number of
1681 remaining values. If there are no entries in TP at all, returns
1682 NULL. A yucky interface, admittedly, but it works. */
1683 static struct table_entry **
1684 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1686 struct table_entry *fp = *tp;
1691 x = xtab[(*tp)->table];
1699 if ((*tp)->table != fp->table)
1704 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1711 /* Integer mode correspondent to find_pivot_extent_general(). This
1712 could be optimized somewhat, but I just don't give a crap about
1713 CROSSTABS performance in integer mode, which is just a
1714 CROSSTABS wart as far as I'm concerned.
1716 That said, feel free to send optimization patches to me. */
1717 static struct table_entry **
1718 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1720 struct table_entry *fp = *tp;
1725 x = xtab[(*tp)->table];
1733 if ((*tp)->table != fp->table)
1738 if (memcmp (&(*tp)->v[2], &fp->v[2], sizeof (union value) * (x->nvar - 2)))
1745 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1746 result. WIDTH_ points to an int which is either 0 for a
1747 numeric value or a string width for a string value. */
1749 compare_value (const void *a_, const void *b_, void *width_)
1751 const union value *a = a_;
1752 const union value *b = b_;
1753 const int *pwidth = width_;
1754 const int width = *pwidth;
1757 return (a->f < b->f) ? -1 : (a->f > b->f);
1759 return strncmp (a->s, b->s, width);
1762 /* Given an array of ENTRY_CNT table_entry structures starting at
1763 ENTRIES, creates a sorted list of the values that the variable
1764 with index VAR_INDEX takes on. The values are returned as a
1765 malloc()'darray stored in *VALUES, with the number of values
1766 stored in *VALUE_CNT.
1769 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1770 union value **values, int *value_cnt)
1772 if (mode == GENERAL)
1774 int width = xtab[(*entries)->table]->v[var_idx]->width;
1777 *values = xmalloc (sizeof **values * entry_cnt);
1778 for (i = 0; i < entry_cnt; i++)
1779 (*values)[i] = entries[i]->v[var_idx];
1780 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1781 compare_value, &width);
1785 struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs;
1788 assert (mode == INTEGER);
1789 *values = xmalloc (sizeof **values * crs->count);
1790 for (i = 0; i < crs->count; i++)
1791 (*values)[i].f = i + crs->min;
1792 *value_cnt = crs->count;
1796 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1797 from V, displayed with print format spec from variable VAR. When
1798 in REPORT missing-value mode, missing values have an M appended. */
1800 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1801 const union value *v, const struct variable *var)
1803 struct len_string s;
1805 const char *label = val_labs_find (var->val_labs, *v);
1808 tab_text (table, c, r, TAB_LEFT, label);
1812 s.length = var->print.w;
1813 s.string = tab_alloc (table, s.length + 1);
1814 data_out (s.string, &var->print, v);
1815 if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
1816 s.string[s.length++] = 'M';
1817 while (s.length && *s.string == ' ')
1822 tab_raw (table, c, r, opt, &s);
1825 /* Draws a line across TABLE at the current row to indicate the most
1826 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1827 that changed, and puts the values that changed into the table. TB
1828 and X must be the corresponding table_entry and crosstab,
1831 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1833 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1835 for (; first_difference >= 2; first_difference--)
1836 table_value_missing (table, nvar - first_difference - 1, 0,
1837 TAB_RIGHT, &tb->v[first_difference],
1838 x->v[first_difference]);
1841 /* Put value V into cell (C,R) of TABLE, suffixed with letter M. */
1843 float_M_suffix (struct tab_table *table, int c, int r, double v)
1845 static const struct fmt_spec f = {FMT_F, 8, 0};
1846 struct len_string s;
1849 s.string = tab_alloc (table, 9);
1851 data_out (s.string, &f, (union value *) &v);
1852 while (*s.string == ' ')
1857 tab_raw (table, c, r, TAB_RIGHT, &s);
1860 /* Displays the crosstabulation table. */
1862 display_crosstabulation (void)
1867 for (r = 0; r < n_rows; r++)
1868 table_value_missing (table, nvar - 2, r * num_cells,
1869 TAB_RIGHT, &rows[r], x->v[ROW_VAR]);
1871 tab_text (table, nvar - 2, n_rows * num_cells,
1872 TAB_LEFT, _("Total"));
1874 /* Put in the actual cells. */
1879 tab_offset (table, nvar - 1, -1);
1880 for (r = 0; r < n_rows; r++)
1883 tab_hline (table, TAL_1, -1, n_cols, 0);
1884 for (c = 0; c < n_cols; c++)
1886 double expected_value;
1889 expected_value = row_tot[r] * col_tot[c] / W;
1890 for (i = 0; i < num_cells; i++)
1900 v = *mp / row_tot[r] * 100.;
1903 v = *mp / col_tot[c] * 100.;
1908 case CRS_CL_EXPECTED:
1911 case CRS_CL_RESIDUAL:
1912 v = *mp - expected_value;
1914 case CRS_CL_SRESIDUAL:
1915 v = (*mp - expected_value) / sqrt (expected_value);
1917 case CRS_CL_ASRESIDUAL:
1918 v = ((*mp - expected_value)
1919 / sqrt (expected_value
1920 * (1. - row_tot[r] / W)
1921 * (1. - col_tot[c] / W)));
1927 if (cmd.miss == CRS_REPORT
1928 && (is_num_user_missing (cols[c].f, x->v[COL_VAR])
1929 || is_num_user_missing (rows[r].f, x->v[ROW_VAR])))
1930 float_M_suffix (table, c, i, v);
1932 tab_float (table, c, i, TAB_RIGHT, v, 8, 0);
1938 tab_offset (table, -1, tab_row (table) + num_cells);
1946 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1947 for (r = 0; r < n_rows; r++)
1948 for (i = 0; i < num_cells; i++)
1961 v = row_tot[r] / W * 100.;
1964 v = row_tot[r] / W * 100.;
1966 case CRS_CL_EXPECTED:
1967 case CRS_CL_RESIDUAL:
1968 case CRS_CL_SRESIDUAL:
1969 case CRS_CL_ASRESIDUAL:
1976 if (cmd.miss == CRS_REPORT
1977 && is_num_user_missing (rows[r].f, x->v[ROW_VAR]))
1978 float_M_suffix (table, n_cols, 0, v);
1980 tab_float (table, n_cols, 0, TAB_RIGHT, v, 8, 0);
1982 tab_next_row (table);
1986 /* Column totals, grand total. */
1991 tab_hline (table, TAL_1, -1, n_cols, 0);
1992 for (c = 0; c <= n_cols; c++)
1994 double ct = c < n_cols ? col_tot[c] : W;
1997 for (i = j = 0; i < num_cells; i++)
2015 case CRS_CL_EXPECTED:
2016 case CRS_CL_RESIDUAL:
2017 case CRS_CL_SRESIDUAL:
2018 case CRS_CL_ASRESIDUAL:
2024 if (cmd.miss == CRS_REPORT && c < n_cols
2025 && is_num_user_missing (cols[c].f, x->v[COL_VAR]))
2026 float_M_suffix (table, c, j, v);
2028 tab_float (table, c, j, TAB_RIGHT, v, 8, 0);
2034 tab_offset (table, -1, tab_row (table) + j);
2037 tab_offset (table, 0, -1);
2040 static void calc_r (double *X, double *Y, double *, double *, double *);
2041 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
2043 /* Display chi-square statistics. */
2045 display_chisq (void)
2047 static const char *chisq_stats[N_CHISQ] =
2049 N_("Pearson Chi-Square"),
2050 N_("Likelihood Ratio"),
2051 N_("Fisher's Exact Test"),
2052 N_("Continuity Correction"),
2053 N_("Linear-by-Linear Association"),
2055 double chisq_v[N_CHISQ];
2056 double fisher1, fisher2;
2062 calc_chisq (chisq_v, df, &fisher1, &fisher2);
2064 tab_offset (chisq, nvar - 2, -1);
2066 for (i = 0; i < N_CHISQ; i++)
2068 if ((i != 2 && chisq_v[i] == SYSMIS)
2069 || (i == 2 && fisher1 == SYSMIS))
2073 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2076 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2077 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2078 tab_float (chisq, 3, 0, TAB_RIGHT,
2079 chisq_sig (chisq_v[i], df[i]), 8, 3);
2084 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2085 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2087 tab_next_row (chisq);
2090 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2091 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2092 tab_next_row (chisq);
2094 tab_offset (chisq, 0, -1);
2097 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2098 double[N_SYMMETRIC]);
2100 /* Display symmetric measures. */
2102 display_symmetric (void)
2104 static const char *categories[] =
2106 N_("Nominal by Nominal"),
2107 N_("Ordinal by Ordinal"),
2108 N_("Interval by Interval"),
2109 N_("Measure of Agreement"),
2112 static const char *stats[N_SYMMETRIC] =
2116 N_("Contingency Coefficient"),
2117 N_("Kendall's tau-b"),
2118 N_("Kendall's tau-c"),
2120 N_("Spearman Correlation"),
2125 static const int stats_categories[N_SYMMETRIC] =
2127 0, 0, 0, 1, 1, 1, 1, 2, 3,
2131 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2134 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2137 tab_offset (sym, nvar - 2, -1);
2139 for (i = 0; i < N_SYMMETRIC; i++)
2141 if (sym_v[i] == SYSMIS)
2144 if (stats_categories[i] != last_cat)
2146 last_cat = stats_categories[i];
2147 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2150 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2151 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2152 if (sym_ase[i] != SYSMIS)
2153 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2154 if (sym_t[i] != SYSMIS)
2155 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2156 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2160 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2161 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2164 tab_offset (sym, 0, -1);
2167 static int calc_risk (double[], double[], double[], union value *);
2169 /* Display risk estimate. */
2174 double risk_v[3], lower[3], upper[3];
2178 if (!calc_risk (risk_v, upper, lower, c))
2181 tab_offset (risk, nvar - 2, -1);
2183 for (i = 0; i < 3; i++)
2185 if (risk_v[i] == SYSMIS)
2191 if (x->v[COL_VAR]->type == NUMERIC)
2192 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2193 x->v[COL_VAR]->name, c[0].f, c[1].f);
2195 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2196 x->v[COL_VAR]->name,
2197 x->v[COL_VAR]->width, c[0].s,
2198 x->v[COL_VAR]->width, c[1].s);
2202 if (x->v[ROW_VAR]->type == NUMERIC)
2203 sprintf (buf, _("For cohort %s = %g"),
2204 x->v[ROW_VAR]->name, rows[i - 1].f);
2206 sprintf (buf, _("For cohort %s = %.*s"),
2207 x->v[ROW_VAR]->name,
2208 x->v[ROW_VAR]->width, rows[i - 1].s);
2212 tab_text (risk, 0, 0, TAB_LEFT, buf);
2213 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2214 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2215 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2216 tab_next_row (risk);
2219 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2220 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2221 tab_next_row (risk);
2223 tab_offset (risk, 0, -1);
2226 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2227 double[N_DIRECTIONAL]);
2229 /* Display directional measures. */
2231 display_directional (void)
2233 static const char *categories[] =
2235 N_("Nominal by Nominal"),
2236 N_("Ordinal by Ordinal"),
2237 N_("Nominal by Interval"),
2240 static const char *stats[] =
2243 N_("Goodman and Kruskal tau"),
2244 N_("Uncertainty Coefficient"),
2249 static const char *types[] =
2256 static const int stats_categories[N_DIRECTIONAL] =
2258 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2261 static const int stats_stats[N_DIRECTIONAL] =
2263 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2266 static const int stats_types[N_DIRECTIONAL] =
2268 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2271 static const int *stats_lookup[] =
2278 static const char **stats_names[] =
2290 double direct_v[N_DIRECTIONAL];
2291 double direct_ase[N_DIRECTIONAL];
2292 double direct_t[N_DIRECTIONAL];
2296 if (!calc_directional (direct_v, direct_ase, direct_t))
2299 tab_offset (direct, nvar - 2, -1);
2301 for (i = 0; i < N_DIRECTIONAL; i++)
2303 if (direct_v[i] == SYSMIS)
2309 for (j = 0; j < 3; j++)
2310 if (last[j] != stats_lookup[j][i])
2313 tab_hline (direct, TAL_1, j, 6, 0);
2318 int k = last[j] = stats_lookup[j][i];
2323 string = x->v[0]->name;
2325 string = x->v[1]->name;
2327 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2328 gettext (stats_names[j][k]), string);
2333 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2334 if (direct_ase[i] != SYSMIS)
2335 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2336 if (direct_t[i] != SYSMIS)
2337 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2338 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2339 tab_next_row (direct);
2342 tab_offset (direct, 0, -1);
2345 /* Statistical calculations. */
2347 /* Returns the value of the gamma (factorial) function for an integer
2350 gamma_int (double x)
2355 for (i = 2; i < x; i++)
2360 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2362 static inline double
2363 Pr (int a, int b, int c, int d)
2365 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2366 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2367 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2368 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2369 / gamma_int (a + b + c + d + 1.));
2372 /* Swap the contents of A and B. */
2374 swap (int *a, int *b)
2381 /* Calculate significance for Fisher's exact test as specified in
2382 _SPSS Statistical Algorithms_, Appendix 5. */
2384 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2388 if (min (c, d) < min (a, b))
2389 swap (&a, &c), swap (&b, &d);
2390 if (min (b, d) < min (a, c))
2391 swap (&a, &b), swap (&c, &d);
2395 swap (&a, &b), swap (&c, &d);
2397 swap (&a, &c), swap (&b, &d);
2401 for (x = 0; x <= a; x++)
2402 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2404 *fisher2 = *fisher1;
2405 for (x = 1; x <= b; x++)
2406 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2409 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2410 columns with values COLS and N_ROWS rows with values ROWS. Values
2411 in the matrix sum to W. */
2413 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2414 double *fisher1, double *fisher2)
2418 chisq[0] = chisq[1] = 0.;
2419 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2420 *fisher1 = *fisher2 = SYSMIS;
2422 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2424 if (ns_rows <= 1 || ns_cols <= 1)
2426 chisq[0] = chisq[1] = SYSMIS;
2430 for (r = 0; r < n_rows; r++)
2431 for (c = 0; c < n_cols; c++)
2433 const double expected = row_tot[r] * col_tot[c] / W;
2434 const double freq = mat[n_cols * r + c];
2435 const double residual = freq - expected;
2438 chisq[0] += residual * residual / expected;
2440 chisq[1] += freq * log (expected / freq);
2451 /* Calculate Yates and Fisher exact test. */
2452 if (ns_cols == 2 && ns_rows == 2)
2454 double f11, f12, f21, f22;
2460 for (i = j = 0; i < n_cols; i++)
2461 if (col_tot[i] != 0.)
2470 f11 = mat[nz_cols[0]];
2471 f12 = mat[nz_cols[1]];
2472 f21 = mat[nz_cols[0] + n_cols];
2473 f22 = mat[nz_cols[1] + n_cols];
2478 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2481 chisq[3] = (W * x * x
2482 / (f11 + f12) / (f21 + f22)
2483 / (f11 + f21) / (f12 + f22));
2491 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2492 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2495 /* Calculate Mantel-Haenszel. */
2496 if (x->v[ROW_VAR]->type == NUMERIC && x->v[COL_VAR]->type == NUMERIC)
2498 double r, ase_0, ase_1;
2499 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2501 chisq[4] = (W - 1.) * r * r;
2506 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2507 ASE_1, and ase_0 into ASE_0. The row and column values must be
2508 passed in X and Y. */
2510 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2512 double SX, SY, S, T;
2514 double sum_XYf, sum_X2Y2f;
2515 double sum_Xr, sum_X2r;
2516 double sum_Yc, sum_Y2c;
2519 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2520 for (j = 0; j < n_cols; j++)
2522 double fij = mat[j + i * n_cols];
2523 double product = X[i] * Y[j];
2524 double temp = fij * product;
2526 sum_X2Y2f += temp * product;
2529 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2531 sum_Xr += X[i] * row_tot[i];
2532 sum_X2r += X[i] * X[i] * row_tot[i];
2536 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2538 sum_Yc += Y[i] * col_tot[i];
2539 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2543 S = sum_XYf - sum_Xr * sum_Yc / W;
2544 SX = sum_X2r - sum_Xr * sum_Xr / W;
2545 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2548 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2553 for (s = c = 0., i = 0; i < n_rows; i++)
2554 for (j = 0; j < n_cols; j++)
2556 double Xresid, Yresid;
2559 Xresid = X[i] - Xbar;
2560 Yresid = Y[j] - Ybar;
2561 temp = (T * Xresid * Yresid
2563 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2564 y = mat[j + i * n_cols] * temp * temp - c;
2569 *ase_1 = sqrt (s) / (T * T);
2573 static double somers_d_v[3];
2574 static double somers_d_ase[3];
2575 static double somers_d_t[3];
2577 /* Calculate symmetric statistics and their asymptotic standard
2578 errors. Returns 0 if none could be calculated. */
2580 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2581 double t[N_SYMMETRIC])
2583 int q = min (ns_rows, ns_cols);
2592 for (i = 0; i < N_SYMMETRIC; i++)
2593 v[i] = ase[i] = t[i] = SYSMIS;
2596 /* Phi, Cramer's V, contingency coefficient. */
2597 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2599 double Xp = 0.; /* Pearson chi-square. */
2604 for (r = 0; r < n_rows; r++)
2605 for (c = 0; c < n_cols; c++)
2607 const double expected = row_tot[r] * col_tot[c] / W;
2608 const double freq = mat[n_cols * r + c];
2609 const double residual = freq - expected;
2612 Xp += residual * residual / expected;
2616 if (cmd.a_statistics[CRS_ST_PHI])
2618 v[0] = sqrt (Xp / W);
2619 v[1] = sqrt (Xp / (W * (q - 1)));
2621 if (cmd.a_statistics[CRS_ST_CC])
2622 v[2] = sqrt (Xp / (Xp + W));
2625 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2626 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2631 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2638 for (r = 0; r < n_rows; r++)
2639 Dr -= row_tot[r] * row_tot[r];
2640 for (c = 0; c < n_cols; c++)
2641 Dc -= col_tot[c] * col_tot[c];
2647 cum = xmalloc (sizeof *cum * n_cols * n_rows);
2648 for (c = 0; c < n_cols; c++)
2652 for (r = 0; r < n_rows; r++)
2653 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2663 for (i = 0; i < n_rows; i++)
2667 for (j = 1; j < n_cols; j++)
2668 Cij += col_tot[j] - cum[j + i * n_cols];
2671 for (j = 1; j < n_cols; j++)
2672 Dij += cum[j + (i - 1) * n_cols];
2676 double fij = mat[j + i * n_cols];
2682 assert (j < n_cols);
2684 Cij -= col_tot[j] - cum[j + i * n_cols];
2685 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2689 Cij += cum[j - 1 + (i - 1) * n_cols];
2690 Dij -= cum[j + (i - 1) * n_cols];
2696 if (cmd.a_statistics[CRS_ST_BTAU])
2697 v[3] = (P - Q) / sqrt (Dr * Dc);
2698 if (cmd.a_statistics[CRS_ST_CTAU])
2699 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2700 if (cmd.a_statistics[CRS_ST_GAMMA])
2701 v[5] = (P - Q) / (P + Q);
2703 /* ASE for tau-b, tau-c, gamma. Calculations could be
2704 eliminated here, at expense of memory. */
2709 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2710 for (i = 0; i < n_rows; i++)
2714 for (j = 1; j < n_cols; j++)
2715 Cij += col_tot[j] - cum[j + i * n_cols];
2718 for (j = 1; j < n_cols; j++)
2719 Dij += cum[j + (i - 1) * n_cols];
2723 double fij = mat[j + i * n_cols];
2725 if (cmd.a_statistics[CRS_ST_BTAU])
2727 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2728 + v[3] * (row_tot[i] * Dc
2729 + col_tot[j] * Dr));
2730 btau_cum += fij * temp * temp;
2734 const double temp = Cij - Dij;
2735 ctau_cum += fij * temp * temp;
2738 if (cmd.a_statistics[CRS_ST_GAMMA])
2740 const double temp = Q * Cij - P * Dij;
2741 gamma_cum += fij * temp * temp;
2744 if (cmd.a_statistics[CRS_ST_D])
2746 d_yx_cum += fij * sqr (Dr * (Cij - Dij)
2747 - (P - Q) * (W - row_tot[i]));
2748 d_xy_cum += fij * sqr (Dc * (Dij - Cij)
2749 - (Q - P) * (W - col_tot[j]));
2754 assert (j < n_cols);
2756 Cij -= col_tot[j] - cum[j + i * n_cols];
2757 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2761 Cij += cum[j - 1 + (i - 1) * n_cols];
2762 Dij -= cum[j + (i - 1) * n_cols];
2768 btau_var = ((btau_cum
2769 - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2771 if (cmd.a_statistics[CRS_ST_BTAU])
2773 ase[3] = sqrt (btau_var);
2774 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2777 if (cmd.a_statistics[CRS_ST_CTAU])
2779 ase[4] = ((2 * q / ((q - 1) * W * W))
2780 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2781 t[4] = v[4] / ase[4];
2783 if (cmd.a_statistics[CRS_ST_GAMMA])
2785 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2786 t[5] = v[5] / (2. / (P + Q)
2787 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2789 if (cmd.a_statistics[CRS_ST_D])
2791 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2792 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2793 somers_d_t[0] = (somers_d_v[0]
2795 * sqrt (ctau_cum - sqr (P - Q) / W)));
2796 somers_d_v[1] = (P - Q) / Dc;
2797 somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
2798 somers_d_t[1] = (somers_d_v[1]
2800 * sqrt (ctau_cum - sqr (P - Q) / W)));
2801 somers_d_v[2] = (P - Q) / Dr;
2802 somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
2803 somers_d_t[2] = (somers_d_v[2]
2805 * sqrt (ctau_cum - sqr (P - Q) / W)));
2811 /* Spearman correlation, Pearson's r. */
2812 if (cmd.a_statistics[CRS_ST_CORR])
2814 double *R = local_alloc (sizeof *R * n_rows);
2815 double *C = local_alloc (sizeof *C * n_cols);
2818 double y, t, c = 0., s = 0.;
2823 R[i] = s + (row_tot[i] + 1.) / 2.;
2830 assert (i < n_rows);
2835 double y, t, c = 0., s = 0.;
2840 C[j] = s + (col_tot[j] + 1.) / 2;
2847 assert (j < n_cols);
2851 calc_r (R, C, &v[6], &t[6], &ase[6]);
2857 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2861 /* Cohen's kappa. */
2862 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2864 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2867 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2868 i < ns_rows; i++, j++)
2872 while (col_tot[j] == 0.)
2875 prod = row_tot[i] * col_tot[j];
2876 sum = row_tot[i] + col_tot[j];
2878 sum_fii += mat[j + i * n_cols];
2880 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2881 sum_riciri_ci += prod * sum;
2883 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2884 for (j = 0; j < ns_cols; j++)
2886 double sum = row_tot[i] + col_tot[j];
2887 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2890 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2892 ase[8] = sqrt ((W * W * sum_rici
2893 + sum_rici * sum_rici
2894 - W * sum_riciri_ci)
2895 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2897 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2898 / sqr (W * W - sum_rici))
2899 + ((2. * (W - sum_fii)
2900 * (2. * sum_fii * sum_rici
2901 - W * sum_fiiri_ci))
2902 / cube (W * W - sum_rici))
2903 + (sqr (W - sum_fii)
2904 * (W * sum_fijri_ci2 - 4.
2905 * sum_rici * sum_rici)
2906 / hypercube (W * W - sum_rici))));
2908 t[8] = v[8] / ase[8];
2915 /* Calculate risk estimate. */
2917 calc_risk (double *value, double *upper, double *lower, union value *c)
2919 double f11, f12, f21, f22;
2925 for (i = 0; i < 3; i++)
2926 value[i] = upper[i] = lower[i] = SYSMIS;
2929 if (ns_rows != 2 || ns_cols != 2)
2936 for (i = j = 0; i < n_cols; i++)
2937 if (col_tot[i] != 0.)
2946 f11 = mat[nz_cols[0]];
2947 f12 = mat[nz_cols[1]];
2948 f21 = mat[nz_cols[0] + n_cols];
2949 f22 = mat[nz_cols[1] + n_cols];
2951 c[0] = cols[nz_cols[0]];
2952 c[1] = cols[nz_cols[1]];
2955 value[0] = (f11 * f22) / (f12 * f21);
2956 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2957 lower[0] = value[0] * exp (-1.960 * v);
2958 upper[0] = value[0] * exp (1.960 * v);
2960 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2961 v = sqrt ((f12 / (f11 * (f11 + f12)))
2962 + (f22 / (f21 * (f21 + f22))));
2963 lower[1] = value[1] * exp (-1.960 * v);
2964 upper[1] = value[1] * exp (1.960 * v);
2966 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2967 v = sqrt ((f11 / (f12 * (f11 + f12)))
2968 + (f21 / (f22 * (f21 + f22))));
2969 lower[2] = value[2] * exp (-1.960 * v);
2970 upper[2] = value[2] * exp (1.960 * v);
2975 /* Calculate directional measures. */
2977 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2978 double t[N_DIRECTIONAL])
2983 for (i = 0; i < N_DIRECTIONAL; i++)
2984 v[i] = ase[i] = t[i] = SYSMIS;
2988 if (cmd.a_statistics[CRS_ST_LAMBDA])
2990 double *fim = xmalloc (sizeof *fim * n_rows);
2991 int *fim_index = xmalloc (sizeof *fim_index * n_rows);
2992 double *fmj = xmalloc (sizeof *fmj * n_cols);
2993 int *fmj_index = xmalloc (sizeof *fmj_index * n_cols);
2994 double sum_fim, sum_fmj;
2996 int rm_index, cm_index;
2999 /* Find maximum for each row and their sum. */
3000 for (sum_fim = 0., i = 0; i < n_rows; i++)
3002 double max = mat[i * n_cols];
3005 for (j = 1; j < n_cols; j++)
3006 if (mat[j + i * n_cols] > max)
3008 max = mat[j + i * n_cols];
3012 sum_fim += fim[i] = max;
3013 fim_index[i] = index;
3016 /* Find maximum for each column. */
3017 for (sum_fmj = 0., j = 0; j < n_cols; j++)
3019 double max = mat[j];
3022 for (i = 1; i < n_rows; i++)
3023 if (mat[j + i * n_cols] > max)
3025 max = mat[j + i * n_cols];
3029 sum_fmj += fmj[j] = max;
3030 fmj_index[j] = index;
3033 /* Find maximum row total. */
3036 for (i = 1; i < n_rows; i++)
3037 if (row_tot[i] > rm)
3043 /* Find maximum column total. */
3046 for (j = 1; j < n_cols; j++)
3047 if (col_tot[j] > cm)
3053 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
3054 v[1] = (sum_fmj - rm) / (W - rm);
3055 v[2] = (sum_fim - cm) / (W - cm);
3057 /* ASE1 for Y given X. */
3061 for (accum = 0., i = 0; i < n_rows; i++)
3062 for (j = 0; j < n_cols; j++)
3064 const int deltaj = j == cm_index;
3065 accum += (mat[j + i * n_cols]
3066 * sqr ((j == fim_index[i])
3071 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3074 /* ASE0 for Y given X. */
3078 for (accum = 0., i = 0; i < n_rows; i++)
3079 if (cm_index != fim_index[i])
3080 accum += (mat[i * n_cols + fim_index[i]]
3081 + mat[i * n_cols + cm_index]);
3082 t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
3085 /* ASE1 for X given Y. */
3089 for (accum = 0., i = 0; i < n_rows; i++)
3090 for (j = 0; j < n_cols; j++)
3092 const int deltaj = i == rm_index;
3093 accum += (mat[j + i * n_cols]
3094 * sqr ((i == fmj_index[j])
3099 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3102 /* ASE0 for X given Y. */
3106 for (accum = 0., j = 0; j < n_cols; j++)
3107 if (rm_index != fmj_index[j])
3108 accum += (mat[j + n_cols * fmj_index[j]]
3109 + mat[j + n_cols * rm_index]);
3110 t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
3113 /* Symmetric ASE0 and ASE1. */
3118 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3119 for (j = 0; j < n_cols; j++)
3121 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3122 int temp1 = (i == rm_index) + (j == cm_index);
3123 accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
3124 accum1 += (mat[j + i * n_cols]
3125 * sqr (temp0 + (v[0] - 1.) * temp1));
3127 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3128 t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
3129 / (2. * W - rm - cm));
3138 double sum_fij2_ri, sum_fij2_ci;
3139 double sum_ri2, sum_cj2;
3141 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3142 for (j = 0; j < n_cols; j++)
3144 double temp = sqr (mat[j + i * n_cols]);
3145 sum_fij2_ri += temp / row_tot[i];
3146 sum_fij2_ci += temp / col_tot[j];
3149 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3150 sum_ri2 += row_tot[i] * row_tot[i];
3152 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3153 sum_cj2 += col_tot[j] * col_tot[j];
3155 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3156 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3160 if (cmd.a_statistics[CRS_ST_UC])
3162 double UX, UY, UXY, P;
3163 double ase1_yx, ase1_xy, ase1_sym;
3166 for (UX = 0., i = 0; i < n_rows; i++)
3167 if (row_tot[i] > 0.)
3168 UX -= row_tot[i] / W * log (row_tot[i] / W);
3170 for (UY = 0., j = 0; j < n_cols; j++)
3171 if (col_tot[j] > 0.)
3172 UY -= col_tot[j] / W * log (col_tot[j] / W);
3174 for (UXY = P = 0., i = 0; i < n_rows; i++)
3175 for (j = 0; j < n_cols; j++)
3177 double entry = mat[j + i * n_cols];
3182 P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
3183 UXY -= entry / W * log (entry / W);
3186 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3187 for (j = 0; j < n_cols; j++)
3189 double entry = mat[j + i * n_cols];
3194 ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
3195 + (UX - UXY) * log (col_tot[j] / W));
3196 ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
3197 + (UY - UXY) * log (row_tot[i] / W));
3198 ase1_sym += entry * sqr ((UXY
3199 * log (row_tot[i] * col_tot[j] / (W * W)))
3200 - (UX + UY) * log (entry / W));
3203 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3204 ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
3205 t[5] = v[5] / ((2. / (W * (UX + UY)))
3206 * sqrt (P - sqr (UX + UY - UXY) / W));
3208 v[6] = (UX + UY - UXY) / UX;
3209 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3210 t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
3212 v[7] = (UX + UY - UXY) / UY;
3213 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3214 t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
3218 if (cmd.a_statistics[CRS_ST_D])
3223 calc_symmetric (NULL, NULL, NULL);
3224 for (i = 0; i < 3; i++)
3226 v[8 + i] = somers_d_v[i];
3227 ase[8 + i] = somers_d_ase[i];
3228 t[8 + i] = somers_d_t[i];
3233 if (cmd.a_statistics[CRS_ST_ETA])
3236 double sum_Xr, sum_X2r;
3240 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3242 sum_Xr += rows[i].f * row_tot[i];
3243 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3245 SX = sum_X2r - sum_Xr * sum_Xr / W;
3247 for (SXW = 0., j = 0; j < n_cols; j++)
3251 for (cum = 0., i = 0; i < n_rows; i++)
3253 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3254 cum += rows[i].f * mat[j + i * n_cols];
3257 SXW -= cum * cum / col_tot[j];
3259 v[11] = sqrt (1. - SXW / SX);
3263 double sum_Yc, sum_Y2c;
3267 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3269 sum_Yc += cols[i].f * col_tot[i];
3270 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3272 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3274 for (SYW = 0., i = 0; i < n_rows; i++)
3278 for (cum = 0., j = 0; j < n_cols; j++)
3280 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3281 cum += cols[j].f * mat[j + i * n_cols];
3284 SYW -= cum * cum / row_tot[i];
3286 v[12] = sqrt (1. - SYW / SY);