1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
33 #include <libpspp/message.h>
37 #include <gsl/gsl_cdf.h>
38 #include <libpspp/array.h>
39 #include <libpspp/alloc.h>
40 #include <data/case.h>
41 #include <data/dictionary.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/pool.h>
44 #include <language/command.h>
45 #include <libpspp/compiler.h>
46 #include <language/lexer/lexer.h>
47 #include <libpspp/message.h>
48 #include <libpspp/magic.h>
49 #include <libpspp/misc.h>
50 #include <output/output.h>
51 #include <libpspp/str.h>
52 #include <output/table.h>
53 #include <data/value-labels.h>
54 #include <data/variable.h>
55 #include <procedure.h>
58 #define _(msgid) gettext (msgid)
59 #define N_(msgid) msgid
67 +missing=miss:!table/include/report;
68 +write[wr_]=none,cells,all;
69 +format=fmt:!labels/nolabels/novallabs,
72 tabl:!tables/notables,
75 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
77 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
83 /* Number of chi-square statistics. */
86 /* Number of symmetric statistics. */
89 /* Number of directional statistics. */
90 #define N_DIRECTIONAL 13
92 /* A single table entry for general mode. */
95 int table; /* Flattened table number. */
98 double freq; /* Frequency count. */
99 double *data; /* Crosstabulation table for integer mode. */
102 union value values[1]; /* Values. */
105 /* A crosstabulation. */
108 int nvar; /* Number of variables. */
109 double missing; /* Missing cases count. */
110 int ofs; /* Integer mode: Offset into sorted_tab[]. */
111 struct variable *vars[2]; /* At least two variables; sorted by
112 larger indices first. */
115 /* Integer mode variable info. */
118 int min; /* Minimum value. */
119 int max; /* Maximum value + 1. */
120 int count; /* max - min. */
123 static inline struct var_range *
124 get_var_range (struct variable *v)
127 assert (v->aux != NULL);
131 /* Indexes into crosstab.v. */
138 /* General mode crosstabulation table. */
139 static struct hsh_table *gen_tab; /* Hash table. */
140 static int n_sorted_tab; /* Number of entries in sorted_tab. */
141 static struct table_entry **sorted_tab; /* Sorted table. */
143 /* Variables specifies on VARIABLES. */
144 static struct variable **variables;
145 static size_t variables_cnt;
148 static struct crosstab **xtab;
151 /* Integer or general mode? */
160 static int num_cells; /* Number of cells requested. */
161 static int cells[8]; /* Cells requested. */
164 static int write; /* One of WR_* that specifies the WRITE style. */
166 /* Command parsing info. */
167 static struct cmd_crosstabs cmd;
170 static struct pool *pl_tc; /* For table cells. */
171 static struct pool *pl_col; /* For column data. */
173 static int internal_cmd_crosstabs (void);
174 static void precalc (void *);
175 static bool calc_general (struct ccase *, void *);
176 static bool calc_integer (struct ccase *, void *);
177 static void postcalc (void *);
178 static void submit (struct tab_table *);
180 static void format_short (char *s, const struct fmt_spec *fp,
181 const union value *v);
183 /* Parse and execute CROSSTABS, then clean up. */
187 int result = internal_cmd_crosstabs ();
190 pool_destroy (pl_tc);
191 pool_destroy (pl_col);
196 /* Parses and executes the CROSSTABS procedure. */
198 internal_cmd_crosstabs (void)
207 pl_tc = pool_create ();
208 pl_col = pool_create ();
210 if (!parse_crosstabs (&cmd))
213 mode = variables ? INTEGER : GENERAL;
218 cmd.a_cells[CRS_CL_COUNT] = 1;
224 for (i = 0; i < CRS_CL_count; i++)
229 cmd.a_cells[CRS_CL_COUNT] = 1;
230 cmd.a_cells[CRS_CL_ROW] = 1;
231 cmd.a_cells[CRS_CL_COLUMN] = 1;
232 cmd.a_cells[CRS_CL_TOTAL] = 1;
234 if (cmd.a_cells[CRS_CL_ALL])
236 for (i = 0; i < CRS_CL_count; i++)
238 cmd.a_cells[CRS_CL_ALL] = 0;
240 cmd.a_cells[CRS_CL_NONE] = 0;
242 for (num_cells = i = 0; i < CRS_CL_count; i++)
244 cells[num_cells++] = i;
247 if (cmd.sbc_statistics)
252 for (i = 0; i < CRS_ST_count; i++)
253 if (cmd.a_statistics[i])
256 cmd.a_statistics[CRS_ST_CHISQ] = 1;
257 if (cmd.a_statistics[CRS_ST_ALL])
258 for (i = 0; i < CRS_ST_count; i++)
259 cmd.a_statistics[i] = 1;
263 if (cmd.miss == CRS_REPORT && mode == GENERAL)
265 msg (SE, _("Missing mode REPORT not allowed in general mode. "
266 "Assuming MISSING=TABLE."));
267 cmd.miss = CRS_TABLE;
271 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
272 cmd.a_write[CRS_WR_ALL] = 0;
273 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
275 msg (SE, _("Write mode ALL not allowed in general mode. "
276 "Assuming WRITE=CELLS."));
277 cmd.a_write[CRS_WR_CELLS] = 1;
280 && (cmd.a_write[CRS_WR_NONE]
281 + cmd.a_write[CRS_WR_ALL]
282 + cmd.a_write[CRS_WR_CELLS] == 0))
283 cmd.a_write[CRS_WR_CELLS] = 1;
284 if (cmd.a_write[CRS_WR_CELLS])
285 write = CRS_WR_CELLS;
286 else if (cmd.a_write[CRS_WR_ALL])
291 ok = procedure_with_splits (precalc,
292 mode == GENERAL ? calc_general : calc_integer,
295 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
298 /* Parses the TABLES subcommand. */
300 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
302 struct var_set *var_set;
304 struct variable ***by = NULL;
305 size_t *by_nvar = NULL;
309 /* Ensure that this is a TABLES subcommand. */
310 if (!lex_match_id ("TABLES")
311 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
316 if (variables != NULL)
317 var_set = var_set_create_from_array (variables, variables_cnt);
319 var_set = var_set_create_from_dict (default_dict);
320 assert (var_set != NULL);
324 by = xnrealloc (by, n_by + 1, sizeof *by);
325 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
326 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
327 PV_NO_DUPLICATE | PV_NO_SCRATCH))
329 if (xalloc_oversized (nx, by_nvar[n_by]))
331 msg (SE, _("Too many crosstabulation variables or dimensions."));
337 if (!lex_match (T_BY))
341 lex_error (_("expecting BY"));
350 int *by_iter = xcalloc (n_by, sizeof *by_iter);
353 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
354 for (i = 0; i < nx; i++)
358 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
365 for (i = 0; i < n_by; i++)
366 x->vars[i] = by[i][by_iter[i]];
372 for (i = n_by - 1; i >= 0; i--)
374 if (++by_iter[i] < by_nvar[i])
387 /* All return paths lead here. */
391 for (i = 0; i < n_by; i++)
397 var_set_destroy (var_set);
402 /* Parses the VARIABLES subcommand. */
404 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
408 msg (SE, _("VARIABLES must be specified before TABLES."));
416 size_t orig_nv = variables_cnt;
421 if (!parse_variables (default_dict, &variables, &variables_cnt,
422 (PV_APPEND | PV_NUMERIC
423 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
428 lex_error ("expecting `('");
433 if (!lex_force_int ())
435 min = lex_integer ();
440 if (!lex_force_int ())
442 max = lex_integer ();
445 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
453 lex_error ("expecting `)'");
458 for (i = orig_nv; i < variables_cnt; i++)
460 struct var_range *vr = xmalloc (sizeof *vr);
463 vr->count = max - min + 1;
464 var_attach_aux (variables[i], vr, var_dtor_free);
479 /* Data file processing. */
481 static int compare_table_entry (const void *, const void *, void *);
482 static unsigned hash_table_entry (const void *, void *);
484 /* Set up the crosstabulation tables for processing. */
486 precalc (void *aux UNUSED)
490 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
500 for (i = 0; i < nxtab; i++)
502 struct crosstab *x = xtab[i];
507 x->ofs = n_sorted_tab;
509 for (j = 2; j < x->nvar; j++)
510 count *= get_var_range (x->vars[j - 2])->count;
512 sorted_tab = xnrealloc (sorted_tab,
513 n_sorted_tab + count, sizeof *sorted_tab);
514 v = local_alloc (sizeof *v * x->nvar);
515 for (j = 2; j < x->nvar; j++)
516 v[j] = get_var_range (x->vars[j])->min;
517 for (j = 0; j < count; j++)
519 struct table_entry *te;
522 te = sorted_tab[n_sorted_tab++]
523 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
527 int row_cnt = get_var_range (x->vars[0])->count;
528 int col_cnt = get_var_range (x->vars[1])->count;
529 const int mat_size = row_cnt * col_cnt;
532 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
533 for (m = 0; m < mat_size; m++)
537 for (k = 2; k < x->nvar; k++)
538 te->values[k].f = v[k];
539 for (k = 2; k < x->nvar; k++)
541 struct var_range *vr = get_var_range (x->vars[k]);
542 if (++v[k] >= vr->max)
551 sorted_tab = xnrealloc (sorted_tab,
552 n_sorted_tab + 1, sizeof *sorted_tab);
553 sorted_tab[n_sorted_tab] = NULL;
557 /* Form crosstabulations for general mode. */
559 calc_general (struct ccase *c, void *aux UNUSED)
564 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
566 /* Flattened current table index. */
569 for (t = 0; t < nxtab; t++)
571 struct crosstab *x = xtab[t];
572 const size_t entry_size = (sizeof (struct table_entry)
573 + sizeof (union value) * (x->nvar - 1));
574 struct table_entry *te = local_alloc (entry_size);
576 /* Construct table entry for the current record and table. */
582 for (j = 0; j < x->nvar; j++)
584 const union value *v = case_data (c, x->vars[j]->fv);
585 const struct missing_values *mv = &x->vars[j]->miss;
586 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
587 || (cmd.miss == CRS_INCLUDE
588 && mv_is_value_system_missing (mv, v)))
590 x->missing += weight;
594 if (x->vars[j]->type == NUMERIC)
595 te->values[j].f = case_num (c, x->vars[j]->fv);
598 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
601 /* Necessary in order to simplify comparisons. */
602 memset (&te->values[j].s[x->vars[j]->width], 0,
603 sizeof (union value) - x->vars[j]->width);
608 /* Add record to hash table. */
610 struct table_entry **tepp
611 = (struct table_entry **) hsh_probe (gen_tab, te);
614 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
617 memcpy (tep, te, entry_size);
622 (*tepp)->u.freq += weight;
633 calc_integer (struct ccase *c, void *aux UNUSED)
638 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
640 /* Flattened current table index. */
643 for (t = 0; t < nxtab; t++)
645 struct crosstab *x = xtab[t];
650 for (i = 0; i < x->nvar; i++)
652 struct variable *const v = x->vars[i];
653 struct var_range *vr = get_var_range (v);
654 double value = case_num (c, v->fv);
656 /* Note that the first test also rules out SYSMIS. */
657 if ((value < vr->min || value >= vr->max)
658 || (cmd.miss == CRS_TABLE
659 && mv_is_num_user_missing (&v->miss, value)))
661 x->missing += weight;
667 ofs += fact * ((int) value - vr->min);
673 struct variable *row_var = x->vars[ROW_VAR];
674 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
676 struct variable *col_var = x->vars[COL_VAR];
677 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
679 const int col_dim = get_var_range (col_var)->count;
681 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
690 /* Compare the table_entry's at A and B and return a strcmp()-type
693 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
695 const struct table_entry *a = a_;
696 const struct table_entry *b = b_;
698 if (a->table > b->table)
700 else if (a->table < b->table)
704 const struct crosstab *x = xtab[a->table];
707 for (i = x->nvar - 1; i >= 0; i--)
708 if (x->vars[i]->type == NUMERIC)
710 const double diffnum = a->values[i].f - b->values[i].f;
713 else if (diffnum > 0)
718 assert (x->vars[i]->type == ALPHA);
720 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
731 /* Calculate a hash value from table_entry A. */
733 hash_table_entry (const void *a_, void *foo UNUSED)
735 const struct table_entry *a = a_;
740 for (i = 0; i < xtab[a->table]->nvar; i++)
741 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
746 /* Post-data reading calculations. */
748 static struct table_entry **find_pivot_extent (struct table_entry **,
749 int *cnt, int pivot);
750 static void enum_var_values (struct table_entry **entries, int entry_cnt,
752 union value **values, int *value_cnt);
753 static void output_pivot_table (struct table_entry **, struct table_entry **,
754 double **, double **, double **,
755 int *, int *, int *);
756 static void make_summary_table (void);
759 postcalc (void *aux UNUSED)
763 n_sorted_tab = hsh_count (gen_tab);
764 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
767 make_summary_table ();
769 /* Identify all the individual crosstabulation tables, and deal with
772 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
773 int pc = n_sorted_tab; /* Pivot count. */
775 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
776 int maxrows = 0, maxcols = 0, maxcells = 0;
780 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
784 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
785 &maxrows, &maxcols, &maxcells);
794 hsh_destroy (gen_tab);
797 static void insert_summary (struct tab_table *, int tab_index, double valid);
799 /* Output a table summarizing the cases processed. */
801 make_summary_table (void)
803 struct tab_table *summary;
805 struct table_entry **pb = sorted_tab, **pe;
806 int pc = n_sorted_tab;
809 summary = tab_create (7, 3 + nxtab, 1);
810 tab_title (summary, _("Summary."));
811 tab_headers (summary, 1, 0, 3, 0);
812 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
813 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
814 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
815 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
816 tab_hline (summary, TAL_1, 1, 6, 1);
817 tab_hline (summary, TAL_1, 1, 6, 2);
818 tab_vline (summary, TAL_1, 3, 1, 1);
819 tab_vline (summary, TAL_1, 5, 1, 1);
823 for (i = 0; i < 3; i++)
825 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
826 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
829 tab_offset (summary, 0, 3);
835 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
839 while (cur_tab < (*pb)->table)
840 insert_summary (summary, cur_tab++, 0.);
843 for (valid = 0.; pb < pe; pb++)
844 valid += (*pb)->u.freq;
847 const struct crosstab *const x = xtab[(*pb)->table];
848 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
849 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
850 const int count = n_cols * n_rows;
852 for (valid = 0.; pb < pe; pb++)
854 const double *data = (*pb)->u.data;
857 for (i = 0; i < count; i++)
861 insert_summary (summary, cur_tab++, valid);
866 while (cur_tab < nxtab)
867 insert_summary (summary, cur_tab++, 0.);
872 /* Inserts a line into T describing the crosstabulation at index
873 TAB_INDEX, which has VALID valid observations. */
875 insert_summary (struct tab_table *t, int tab_index, double valid)
877 struct crosstab *x = xtab[tab_index];
879 tab_hline (t, TAL_1, 0, 6, 0);
881 /* Crosstabulation name. */
883 char *buf = local_alloc (128 * x->nvar);
887 for (i = 0; i < x->nvar; i++)
890 cp = stpcpy (cp, " * ");
893 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
895 tab_text (t, 0, 0, TAB_LEFT, buf);
900 /* Counts and percentages. */
910 for (i = 0; i < 3; i++)
912 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
913 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
924 static struct tab_table *table; /* Crosstabulation table. */
925 static struct tab_table *chisq; /* Chi-square table. */
926 static struct tab_table *sym; /* Symmetric measures table. */
927 static struct tab_table *risk; /* Risk estimate table. */
928 static struct tab_table *direct; /* Directional measures table. */
931 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
933 /* Column values, number of columns. */
934 static union value *cols;
937 /* Row values, number of rows. */
938 static union value *rows;
941 /* Number of statistically interesting columns/rows (columns/rows with
943 static int ns_cols, ns_rows;
945 /* Crosstabulation. */
946 static struct crosstab *x;
948 /* Number of variables from the crosstabulation to consider. This is
949 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
952 /* Matrix contents. */
953 static double *mat; /* Matrix proper. */
954 static double *row_tot; /* Row totals. */
955 static double *col_tot; /* Column totals. */
956 static double W; /* Grand total. */
958 static void display_dimensions (struct tab_table *, int first_difference,
959 struct table_entry *);
960 static void display_crosstabulation (void);
961 static void display_chisq (void);
962 static void display_symmetric (void);
963 static void display_risk (void);
964 static void display_directional (void);
965 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
966 static void table_value_missing (struct tab_table *table, int c, int r,
967 unsigned char opt, const union value *v,
968 const struct variable *var);
969 static void delete_missing (void);
971 /* Output pivot table beginning at PB and continuing until PE,
972 exclusive. For efficiency, *MATP is a pointer to a matrix that can
973 hold *MAXROWS entries. */
975 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
976 double **matp, double **row_totp, double **col_totp,
977 int *maxrows, int *maxcols, int *maxcells)
980 struct table_entry **tb = pb, **te; /* Table begin, table end. */
981 int tc = pe - pb; /* Table count. */
983 /* Table entry for header comparison. */
984 struct table_entry *cmp = NULL;
986 x = xtab[(*pb)->table];
987 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
989 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
991 /* Crosstabulation table initialization. */
994 table = tab_create (nvar + n_cols,
995 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
996 tab_headers (table, nvar - 1, 0, 2, 0);
998 /* First header line. */
999 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1000 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1002 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1004 /* Second header line. */
1008 for (i = 2; i < nvar; i++)
1009 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1010 TAB_RIGHT | TAT_TITLE,
1012 ? x->vars[i]->label : x->vars[i]->name));
1013 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1014 x->vars[ROW_VAR]->name);
1015 for (i = 0; i < n_cols; i++)
1016 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1018 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1021 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1022 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1026 char *title = local_alloc (x->nvar * 64 + 128);
1030 if (cmd.pivot == CRS_PIVOT)
1031 for (i = 0; i < nvar; i++)
1034 cp = stpcpy (cp, " by ");
1035 cp = stpcpy (cp, x->vars[i]->name);
1039 cp = spprintf (cp, "%s by %s for",
1040 x->vars[0]->name, x->vars[1]->name);
1041 for (i = 2; i < nvar; i++)
1043 char buf[64], *bufp;
1048 cp = stpcpy (cp, x->vars[i]->name);
1050 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1051 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1053 cp = stpcpy (cp, bufp);
1057 cp = stpcpy (cp, " [");
1058 for (i = 0; i < num_cells; i++)
1066 static const struct tuple cell_names[] =
1068 {CRS_CL_COUNT, N_("count")},
1069 {CRS_CL_ROW, N_("row %")},
1070 {CRS_CL_COLUMN, N_("column %")},
1071 {CRS_CL_TOTAL, N_("total %")},
1072 {CRS_CL_EXPECTED, N_("expected")},
1073 {CRS_CL_RESIDUAL, N_("residual")},
1074 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1075 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1079 const struct tuple *t;
1081 for (t = cell_names; t->value != cells[i]; t++)
1082 assert (t->value != -1);
1084 cp = stpcpy (cp, ", ");
1085 cp = stpcpy (cp, gettext (t->name));
1089 tab_title (table, "%s", title);
1093 tab_offset (table, 0, 2);
1098 /* Chi-square table initialization. */
1099 if (cmd.a_statistics[CRS_ST_CHISQ])
1101 chisq = tab_create (6 + (nvar - 2),
1102 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1103 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1105 tab_title (chisq, _("Chi-square tests."));
1107 tab_offset (chisq, nvar - 2, 0);
1108 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1109 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1110 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1111 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1112 _("Asymp. Sig. (2-sided)"));
1113 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1114 _("Exact. Sig. (2-sided)"));
1115 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1116 _("Exact. Sig. (1-sided)"));
1118 tab_offset (chisq, 0, 1);
1123 /* Symmetric measures. */
1124 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1125 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1126 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1127 || cmd.a_statistics[CRS_ST_KAPPA])
1129 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1130 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1131 tab_title (sym, _("Symmetric measures."));
1133 tab_offset (sym, nvar - 2, 0);
1134 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1135 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1136 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1137 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1138 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1139 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1140 tab_offset (sym, 0, 1);
1145 /* Risk estimate. */
1146 if (cmd.a_statistics[CRS_ST_RISK])
1148 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1149 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1150 tab_title (risk, _("Risk estimate."));
1152 tab_offset (risk, nvar - 2, 0);
1153 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1154 _("95%% Confidence Interval"));
1155 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1156 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1157 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1158 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1159 tab_hline (risk, TAL_1, 2, 3, 1);
1160 tab_vline (risk, TAL_1, 2, 0, 1);
1161 tab_offset (risk, 0, 2);
1166 /* Directional measures. */
1167 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1168 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1170 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1171 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1172 tab_title (direct, _("Directional measures."));
1174 tab_offset (direct, nvar - 2, 0);
1175 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1176 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1177 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1178 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1179 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1180 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1181 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1182 tab_offset (direct, 0, 1);
1189 /* Find pivot subtable if applicable. */
1190 te = find_pivot_extent (tb, &tc, 0);
1194 /* Find all the row variable values. */
1195 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1197 /* Allocate memory space for the column and row totals. */
1198 if (n_rows > *maxrows)
1200 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1201 row_tot = *row_totp;
1204 if (n_cols > *maxcols)
1206 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1207 col_tot = *col_totp;
1211 /* Allocate table space for the matrix. */
1212 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1213 tab_realloc (table, -1,
1214 max (tab_nr (table) + (n_rows + 1) * num_cells,
1215 tab_nr (table) * (pe - pb) / (te - tb)));
1217 if (mode == GENERAL)
1219 /* Allocate memory space for the matrix. */
1220 if (n_cols * n_rows > *maxcells)
1222 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1223 *maxcells = n_cols * n_rows;
1228 /* Build the matrix and calculate column totals. */
1230 union value *cur_col = cols;
1231 union value *cur_row = rows;
1233 double *cp = col_tot;
1234 struct table_entry **p;
1237 for (p = &tb[0]; p < te; p++)
1239 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1243 for (; cur_row < &rows[n_rows]; cur_row++)
1249 mp = &mat[cur_col - cols];
1252 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1259 *cp += *mp = (*p)->u.freq;
1264 /* Zero out the rest of the matrix. */
1265 for (; cur_row < &rows[n_rows]; cur_row++)
1271 if (cur_col < &cols[n_cols])
1273 const int rem_cols = n_cols - (cur_col - cols);
1276 for (c = 0; c < rem_cols; c++)
1278 mp = &mat[cur_col - cols];
1279 for (r = 0; r < n_rows; r++)
1281 for (c = 0; c < rem_cols; c++)
1283 mp += n_cols - rem_cols;
1291 double *tp = col_tot;
1293 assert (mode == INTEGER);
1294 mat = (*tb)->u.data;
1297 /* Calculate column totals. */
1298 for (c = 0; c < n_cols; c++)
1301 double *cp = &mat[c];
1303 for (r = 0; r < n_rows; r++)
1304 cum += cp[r * n_cols];
1312 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1313 ns_cols += *cp != 0.;
1316 /* Calculate row totals. */
1319 double *rp = row_tot;
1322 for (ns_rows = 0, r = n_rows; r--; )
1325 for (c = n_cols; c--; )
1333 /* Calculate grand total. */
1339 if (n_rows < n_cols)
1340 tp = row_tot, n = n_rows;
1342 tp = col_tot, n = n_cols;
1348 /* Find the first variable that differs from the last subtable,
1349 then display the values of the dimensioning variables for
1350 each table that needs it. */
1352 int first_difference = nvar - 1;
1355 for (; ; first_difference--)
1357 assert (first_difference >= 2);
1358 if (memcmp (&cmp->values[first_difference],
1359 &(*tb)->values[first_difference],
1360 sizeof *cmp->values))
1366 display_dimensions (table, first_difference, *tb);
1368 display_dimensions (chisq, first_difference, *tb);
1370 display_dimensions (sym, first_difference, *tb);
1372 display_dimensions (risk, first_difference, *tb);
1374 display_dimensions (direct, first_difference, *tb);
1378 display_crosstabulation ();
1379 if (cmd.miss == CRS_REPORT)
1384 display_symmetric ();
1388 display_directional ();
1399 tab_resize (chisq, 4 + (nvar - 2), -1);
1410 /* Delete missing rows and columns for statistical analysis when
1413 delete_missing (void)
1418 for (r = 0; r < n_rows; r++)
1419 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1423 for (c = 0; c < n_cols; c++)
1424 mat[c + r * n_cols] = 0.;
1432 for (c = 0; c < n_cols; c++)
1433 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1437 for (r = 0; r < n_rows; r++)
1438 mat[c + r * n_cols] = 0.;
1444 /* Prepare table T for submission, and submit it. */
1446 submit (struct tab_table *t)
1453 tab_resize (t, -1, 0);
1454 if (tab_nr (t) == tab_t (t))
1459 tab_offset (t, 0, 0);
1461 for (i = 2; i < nvar; i++)
1462 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1463 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1464 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1465 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1467 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1469 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1470 tab_dim (t, crosstabs_dim);
1474 /* Sets the widths of all the columns and heights of all the rows in
1475 table T for driver D. */
1477 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1481 /* Width of a numerical column. */
1482 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1483 if (cmd.miss == CRS_REPORT)
1484 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1486 /* Set width for header columns. */
1492 w = d->width - c * (t->nc - t->l);
1493 for (i = 0; i <= t->nc; i++)
1497 if (w < d->prop_em_width * 8)
1498 w = d->prop_em_width * 8;
1500 if (w > d->prop_em_width * 15)
1501 w = d->prop_em_width * 15;
1503 for (i = 0; i < t->l; i++)
1507 for (i = t->l; i < t->nc; i++)
1510 for (i = 0; i < t->nr; i++)
1511 t->h[i] = tab_natural_height (t, d, i);
1514 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1515 int *cnt, int pivot);
1516 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1517 int *cnt, int pivot);
1519 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1521 static struct table_entry **
1522 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1524 return (mode == GENERAL
1525 ? find_pivot_extent_general (tp, cnt, pivot)
1526 : find_pivot_extent_integer (tp, cnt, pivot));
1529 /* Find the extent of a region in TP that contains one table. If
1530 PIVOT != 0 that means a set of table entries with identical table
1531 number; otherwise they also have to have the same values for every
1532 dimension after the row and column dimensions. The table that is
1533 searched starts at TP and has length CNT. Returns the first entry
1534 after the last one in the table; sets *CNT to the number of
1535 remaining values. If there are no entries in TP at all, returns
1536 NULL. A yucky interface, admittedly, but it works. */
1537 static struct table_entry **
1538 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1540 struct table_entry *fp = *tp;
1545 x = xtab[(*tp)->table];
1553 if ((*tp)->table != fp->table)
1558 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1565 /* Integer mode correspondent to find_pivot_extent_general(). This
1566 could be optimized somewhat, but I just don't give a crap about
1567 CROSSTABS performance in integer mode, which is just a
1568 CROSSTABS wart as far as I'm concerned.
1570 That said, feel free to send optimization patches to me. */
1571 static struct table_entry **
1572 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1574 struct table_entry *fp = *tp;
1579 x = xtab[(*tp)->table];
1587 if ((*tp)->table != fp->table)
1592 if (memcmp (&(*tp)->values[2], &fp->values[2],
1593 sizeof (union value) * (x->nvar - 2)))
1600 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1601 result. WIDTH_ points to an int which is either 0 for a
1602 numeric value or a string width for a string value. */
1604 compare_value (const void *a_, const void *b_, void *width_)
1606 const union value *a = a_;
1607 const union value *b = b_;
1608 const int *pwidth = width_;
1609 const int width = *pwidth;
1612 return (a->f < b->f) ? -1 : (a->f > b->f);
1614 return strncmp (a->s, b->s, width);
1617 /* Given an array of ENTRY_CNT table_entry structures starting at
1618 ENTRIES, creates a sorted list of the values that the variable
1619 with index VAR_IDX takes on. The values are returned as a
1620 malloc()'darray stored in *VALUES, with the number of values
1621 stored in *VALUE_CNT.
1624 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1625 union value **values, int *value_cnt)
1627 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1629 if (mode == GENERAL)
1631 int width = v->width;
1634 *values = xnmalloc (entry_cnt, sizeof **values);
1635 for (i = 0; i < entry_cnt; i++)
1636 (*values)[i] = entries[i]->values[var_idx];
1637 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1638 compare_value, &width);
1642 struct var_range *vr = get_var_range (v);
1645 assert (mode == INTEGER);
1646 *values = xnmalloc (vr->count, sizeof **values);
1647 for (i = 0; i < vr->count; i++)
1648 (*values)[i].f = i + vr->min;
1649 *value_cnt = vr->count;
1653 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1654 from V, displayed with print format spec from variable VAR. When
1655 in REPORT missing-value mode, missing values have an M appended. */
1657 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1658 const union value *v, const struct variable *var)
1660 struct fixed_string s;
1662 const char *label = val_labs_find (var->val_labs, *v);
1665 tab_text (table, c, r, TAB_LEFT, label);
1669 s.string = tab_alloc (table, var->print.w);
1670 format_short (s.string, &var->print, v);
1671 s.length = strlen (s.string);
1672 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1673 s.string[s.length++] = 'M';
1674 while (s.length && *s.string == ' ')
1679 tab_raw (table, c, r, opt, &s);
1682 /* Draws a line across TABLE at the current row to indicate the most
1683 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1684 that changed, and puts the values that changed into the table. TB
1685 and X must be the corresponding table_entry and crosstab,
1688 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1690 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1692 for (; first_difference >= 2; first_difference--)
1693 table_value_missing (table, nvar - first_difference - 1, 0,
1694 TAB_RIGHT, &tb->values[first_difference],
1695 x->vars[first_difference]);
1698 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1699 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1700 additionally suffixed with a letter `M'. */
1702 format_cell_entry (struct tab_table *table, int c, int r, double value,
1703 char suffix, int mark_missing)
1705 const struct fmt_spec f = {FMT_F, 10, 1};
1707 struct fixed_string s;
1710 s.string = tab_alloc (table, 16);
1712 data_out (s.string, &f, &v);
1713 while (*s.string == ' ')
1719 s.string[s.length++] = suffix;
1721 s.string[s.length++] = 'M';
1723 tab_raw (table, c, r, TAB_RIGHT, &s);
1726 /* Displays the crosstabulation table. */
1728 display_crosstabulation (void)
1733 for (r = 0; r < n_rows; r++)
1734 table_value_missing (table, nvar - 2, r * num_cells,
1735 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1737 tab_text (table, nvar - 2, n_rows * num_cells,
1738 TAB_LEFT, _("Total"));
1740 /* Put in the actual cells. */
1745 tab_offset (table, nvar - 1, -1);
1746 for (r = 0; r < n_rows; r++)
1749 tab_hline (table, TAL_1, -1, n_cols, 0);
1750 for (c = 0; c < n_cols; c++)
1752 int mark_missing = 0;
1753 double expected_value = row_tot[r] * col_tot[c] / W;
1754 if (cmd.miss == CRS_REPORT
1755 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1756 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1759 for (i = 0; i < num_cells; i++)
1770 v = *mp / row_tot[r] * 100.;
1774 v = *mp / col_tot[c] * 100.;
1781 case CRS_CL_EXPECTED:
1784 case CRS_CL_RESIDUAL:
1785 v = *mp - expected_value;
1787 case CRS_CL_SRESIDUAL:
1788 v = (*mp - expected_value) / sqrt (expected_value);
1790 case CRS_CL_ASRESIDUAL:
1791 v = ((*mp - expected_value)
1792 / sqrt (expected_value
1793 * (1. - row_tot[r] / W)
1794 * (1. - col_tot[c] / W)));
1801 format_cell_entry (table, c, i, v, suffix, mark_missing);
1807 tab_offset (table, -1, tab_row (table) + num_cells);
1815 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1816 for (r = 0; r < n_rows; r++)
1819 int mark_missing = 0;
1821 if (cmd.miss == CRS_REPORT
1822 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1825 for (i = 0; i < num_cells; i++)
1839 v = row_tot[r] / W * 100.;
1843 v = row_tot[r] / W * 100.;
1846 case CRS_CL_EXPECTED:
1847 case CRS_CL_RESIDUAL:
1848 case CRS_CL_SRESIDUAL:
1849 case CRS_CL_ASRESIDUAL:
1857 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1858 tab_next_row (table);
1863 /* Column totals, grand total. */
1869 tab_hline (table, TAL_1, -1, n_cols, 0);
1870 for (c = 0; c <= n_cols; c++)
1872 double ct = c < n_cols ? col_tot[c] : W;
1873 int mark_missing = 0;
1877 if (cmd.miss == CRS_REPORT && c < n_cols
1878 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1881 for (i = 0; i < num_cells; i++)
1903 case CRS_CL_EXPECTED:
1904 case CRS_CL_RESIDUAL:
1905 case CRS_CL_SRESIDUAL:
1906 case CRS_CL_ASRESIDUAL:
1913 format_cell_entry (table, c, i, v, suffix, mark_missing);
1918 tab_offset (table, -1, tab_row (table) + last_row);
1921 tab_offset (table, 0, -1);
1924 static void calc_r (double *X, double *Y, double *, double *, double *);
1925 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1927 /* Display chi-square statistics. */
1929 display_chisq (void)
1931 static const char *chisq_stats[N_CHISQ] =
1933 N_("Pearson Chi-Square"),
1934 N_("Likelihood Ratio"),
1935 N_("Fisher's Exact Test"),
1936 N_("Continuity Correction"),
1937 N_("Linear-by-Linear Association"),
1939 double chisq_v[N_CHISQ];
1940 double fisher1, fisher2;
1946 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1948 tab_offset (chisq, nvar - 2, -1);
1950 for (i = 0; i < N_CHISQ; i++)
1952 if ((i != 2 && chisq_v[i] == SYSMIS)
1953 || (i == 2 && fisher1 == SYSMIS))
1957 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1960 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1961 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1962 tab_float (chisq, 3, 0, TAB_RIGHT,
1963 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1968 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1969 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1971 tab_next_row (chisq);
1974 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1975 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1976 tab_next_row (chisq);
1978 tab_offset (chisq, 0, -1);
1981 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1982 double[N_SYMMETRIC]);
1984 /* Display symmetric measures. */
1986 display_symmetric (void)
1988 static const char *categories[] =
1990 N_("Nominal by Nominal"),
1991 N_("Ordinal by Ordinal"),
1992 N_("Interval by Interval"),
1993 N_("Measure of Agreement"),
1996 static const char *stats[N_SYMMETRIC] =
2000 N_("Contingency Coefficient"),
2001 N_("Kendall's tau-b"),
2002 N_("Kendall's tau-c"),
2004 N_("Spearman Correlation"),
2009 static const int stats_categories[N_SYMMETRIC] =
2011 0, 0, 0, 1, 1, 1, 1, 2, 3,
2015 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2018 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2021 tab_offset (sym, nvar - 2, -1);
2023 for (i = 0; i < N_SYMMETRIC; i++)
2025 if (sym_v[i] == SYSMIS)
2028 if (stats_categories[i] != last_cat)
2030 last_cat = stats_categories[i];
2031 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2034 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2035 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2036 if (sym_ase[i] != SYSMIS)
2037 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2038 if (sym_t[i] != SYSMIS)
2039 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2040 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2044 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2045 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2048 tab_offset (sym, 0, -1);
2051 static int calc_risk (double[], double[], double[], union value *);
2053 /* Display risk estimate. */
2058 double risk_v[3], lower[3], upper[3];
2062 if (!calc_risk (risk_v, upper, lower, c))
2065 tab_offset (risk, nvar - 2, -1);
2067 for (i = 0; i < 3; i++)
2069 if (risk_v[i] == SYSMIS)
2075 if (x->vars[COL_VAR]->type == NUMERIC)
2076 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2077 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2079 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2080 x->vars[COL_VAR]->name,
2081 x->vars[COL_VAR]->width, c[0].s,
2082 x->vars[COL_VAR]->width, c[1].s);
2086 if (x->vars[ROW_VAR]->type == NUMERIC)
2087 sprintf (buf, _("For cohort %s = %g"),
2088 x->vars[ROW_VAR]->name, rows[i - 1].f);
2090 sprintf (buf, _("For cohort %s = %.*s"),
2091 x->vars[ROW_VAR]->name,
2092 x->vars[ROW_VAR]->width, rows[i - 1].s);
2096 tab_text (risk, 0, 0, TAB_LEFT, buf);
2097 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2098 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2099 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2100 tab_next_row (risk);
2103 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2104 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2105 tab_next_row (risk);
2107 tab_offset (risk, 0, -1);
2110 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2111 double[N_DIRECTIONAL]);
2113 /* Display directional measures. */
2115 display_directional (void)
2117 static const char *categories[] =
2119 N_("Nominal by Nominal"),
2120 N_("Ordinal by Ordinal"),
2121 N_("Nominal by Interval"),
2124 static const char *stats[] =
2127 N_("Goodman and Kruskal tau"),
2128 N_("Uncertainty Coefficient"),
2133 static const char *types[] =
2140 static const int stats_categories[N_DIRECTIONAL] =
2142 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2145 static const int stats_stats[N_DIRECTIONAL] =
2147 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2150 static const int stats_types[N_DIRECTIONAL] =
2152 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2155 static const int *stats_lookup[] =
2162 static const char **stats_names[] =
2174 double direct_v[N_DIRECTIONAL];
2175 double direct_ase[N_DIRECTIONAL];
2176 double direct_t[N_DIRECTIONAL];
2180 if (!calc_directional (direct_v, direct_ase, direct_t))
2183 tab_offset (direct, nvar - 2, -1);
2185 for (i = 0; i < N_DIRECTIONAL; i++)
2187 if (direct_v[i] == SYSMIS)
2193 for (j = 0; j < 3; j++)
2194 if (last[j] != stats_lookup[j][i])
2197 tab_hline (direct, TAL_1, j, 6, 0);
2202 int k = last[j] = stats_lookup[j][i];
2207 string = x->vars[0]->name;
2209 string = x->vars[1]->name;
2211 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2212 gettext (stats_names[j][k]), string);
2217 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2218 if (direct_ase[i] != SYSMIS)
2219 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2220 if (direct_t[i] != SYSMIS)
2221 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2222 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2223 tab_next_row (direct);
2226 tab_offset (direct, 0, -1);
2229 /* Statistical calculations. */
2231 /* Returns the value of the gamma (factorial) function for an integer
2234 gamma_int (double x)
2239 for (i = 2; i < x; i++)
2244 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2246 static inline double
2247 Pr (int a, int b, int c, int d)
2249 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2250 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2251 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2252 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2253 / gamma_int (a + b + c + d + 1.));
2256 /* Swap the contents of A and B. */
2258 swap (int *a, int *b)
2265 /* Calculate significance for Fisher's exact test as specified in
2266 _SPSS Statistical Algorithms_, Appendix 5. */
2268 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2272 if (min (c, d) < min (a, b))
2273 swap (&a, &c), swap (&b, &d);
2274 if (min (b, d) < min (a, c))
2275 swap (&a, &b), swap (&c, &d);
2279 swap (&a, &b), swap (&c, &d);
2281 swap (&a, &c), swap (&b, &d);
2285 for (x = 0; x <= a; x++)
2286 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2288 *fisher2 = *fisher1;
2289 for (x = 1; x <= b; x++)
2290 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2293 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2294 columns with values COLS and N_ROWS rows with values ROWS. Values
2295 in the matrix sum to W. */
2297 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2298 double *fisher1, double *fisher2)
2302 chisq[0] = chisq[1] = 0.;
2303 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2304 *fisher1 = *fisher2 = SYSMIS;
2306 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2308 if (ns_rows <= 1 || ns_cols <= 1)
2310 chisq[0] = chisq[1] = SYSMIS;
2314 for (r = 0; r < n_rows; r++)
2315 for (c = 0; c < n_cols; c++)
2317 const double expected = row_tot[r] * col_tot[c] / W;
2318 const double freq = mat[n_cols * r + c];
2319 const double residual = freq - expected;
2321 chisq[0] += residual * residual / expected;
2323 chisq[1] += freq * log (expected / freq);
2334 /* Calculate Yates and Fisher exact test. */
2335 if (ns_cols == 2 && ns_rows == 2)
2337 double f11, f12, f21, f22;
2343 for (i = j = 0; i < n_cols; i++)
2344 if (col_tot[i] != 0.)
2353 f11 = mat[nz_cols[0]];
2354 f12 = mat[nz_cols[1]];
2355 f21 = mat[nz_cols[0] + n_cols];
2356 f22 = mat[nz_cols[1] + n_cols];
2361 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2364 chisq[3] = (W * x * x
2365 / (f11 + f12) / (f21 + f22)
2366 / (f11 + f21) / (f12 + f22));
2374 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2375 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2378 /* Calculate Mantel-Haenszel. */
2379 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2381 double r, ase_0, ase_1;
2382 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2384 chisq[4] = (W - 1.) * r * r;
2389 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2390 ASE_1, and ase_0 into ASE_0. The row and column values must be
2391 passed in X and Y. */
2393 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2395 double SX, SY, S, T;
2397 double sum_XYf, sum_X2Y2f;
2398 double sum_Xr, sum_X2r;
2399 double sum_Yc, sum_Y2c;
2402 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2403 for (j = 0; j < n_cols; j++)
2405 double fij = mat[j + i * n_cols];
2406 double product = X[i] * Y[j];
2407 double temp = fij * product;
2409 sum_X2Y2f += temp * product;
2412 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2414 sum_Xr += X[i] * row_tot[i];
2415 sum_X2r += X[i] * X[i] * row_tot[i];
2419 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2421 sum_Yc += Y[i] * col_tot[i];
2422 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2426 S = sum_XYf - sum_Xr * sum_Yc / W;
2427 SX = sum_X2r - sum_Xr * sum_Xr / W;
2428 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2431 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2436 for (s = c = 0., i = 0; i < n_rows; i++)
2437 for (j = 0; j < n_cols; j++)
2439 double Xresid, Yresid;
2442 Xresid = X[i] - Xbar;
2443 Yresid = Y[j] - Ybar;
2444 temp = (T * Xresid * Yresid
2446 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2447 y = mat[j + i * n_cols] * temp * temp - c;
2452 *ase_1 = sqrt (s) / (T * T);
2456 static double somers_d_v[3];
2457 static double somers_d_ase[3];
2458 static double somers_d_t[3];
2460 /* Calculate symmetric statistics and their asymptotic standard
2461 errors. Returns 0 if none could be calculated. */
2463 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2464 double t[N_SYMMETRIC])
2466 int q = min (ns_rows, ns_cols);
2475 for (i = 0; i < N_SYMMETRIC; i++)
2476 v[i] = ase[i] = t[i] = SYSMIS;
2479 /* Phi, Cramer's V, contingency coefficient. */
2480 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2482 double Xp = 0.; /* Pearson chi-square. */
2487 for (r = 0; r < n_rows; r++)
2488 for (c = 0; c < n_cols; c++)
2490 const double expected = row_tot[r] * col_tot[c] / W;
2491 const double freq = mat[n_cols * r + c];
2492 const double residual = freq - expected;
2494 Xp += residual * residual / expected;
2498 if (cmd.a_statistics[CRS_ST_PHI])
2500 v[0] = sqrt (Xp / W);
2501 v[1] = sqrt (Xp / (W * (q - 1)));
2503 if (cmd.a_statistics[CRS_ST_CC])
2504 v[2] = sqrt (Xp / (Xp + W));
2507 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2508 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2513 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2520 for (r = 0; r < n_rows; r++)
2521 Dr -= row_tot[r] * row_tot[r];
2522 for (c = 0; c < n_cols; c++)
2523 Dc -= col_tot[c] * col_tot[c];
2529 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2530 for (c = 0; c < n_cols; c++)
2534 for (r = 0; r < n_rows; r++)
2535 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2545 for (i = 0; i < n_rows; i++)
2549 for (j = 1; j < n_cols; j++)
2550 Cij += col_tot[j] - cum[j + i * n_cols];
2553 for (j = 1; j < n_cols; j++)
2554 Dij += cum[j + (i - 1) * n_cols];
2558 double fij = mat[j + i * n_cols];
2564 assert (j < n_cols);
2566 Cij -= col_tot[j] - cum[j + i * n_cols];
2567 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2571 Cij += cum[j - 1 + (i - 1) * n_cols];
2572 Dij -= cum[j + (i - 1) * n_cols];
2578 if (cmd.a_statistics[CRS_ST_BTAU])
2579 v[3] = (P - Q) / sqrt (Dr * Dc);
2580 if (cmd.a_statistics[CRS_ST_CTAU])
2581 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2582 if (cmd.a_statistics[CRS_ST_GAMMA])
2583 v[5] = (P - Q) / (P + Q);
2585 /* ASE for tau-b, tau-c, gamma. Calculations could be
2586 eliminated here, at expense of memory. */
2591 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2592 for (i = 0; i < n_rows; i++)
2596 for (j = 1; j < n_cols; j++)
2597 Cij += col_tot[j] - cum[j + i * n_cols];
2600 for (j = 1; j < n_cols; j++)
2601 Dij += cum[j + (i - 1) * n_cols];
2605 double fij = mat[j + i * n_cols];
2607 if (cmd.a_statistics[CRS_ST_BTAU])
2609 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2610 + v[3] * (row_tot[i] * Dc
2611 + col_tot[j] * Dr));
2612 btau_cum += fij * temp * temp;
2616 const double temp = Cij - Dij;
2617 ctau_cum += fij * temp * temp;
2620 if (cmd.a_statistics[CRS_ST_GAMMA])
2622 const double temp = Q * Cij - P * Dij;
2623 gamma_cum += fij * temp * temp;
2626 if (cmd.a_statistics[CRS_ST_D])
2628 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2629 - (P - Q) * (W - row_tot[i]));
2630 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2631 - (Q - P) * (W - col_tot[j]));
2636 assert (j < n_cols);
2638 Cij -= col_tot[j] - cum[j + i * n_cols];
2639 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2643 Cij += cum[j - 1 + (i - 1) * n_cols];
2644 Dij -= cum[j + (i - 1) * n_cols];
2650 btau_var = ((btau_cum
2651 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2653 if (cmd.a_statistics[CRS_ST_BTAU])
2655 ase[3] = sqrt (btau_var);
2656 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2659 if (cmd.a_statistics[CRS_ST_CTAU])
2661 ase[4] = ((2 * q / ((q - 1) * W * W))
2662 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2663 t[4] = v[4] / ase[4];
2665 if (cmd.a_statistics[CRS_ST_GAMMA])
2667 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2668 t[5] = v[5] / (2. / (P + Q)
2669 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2671 if (cmd.a_statistics[CRS_ST_D])
2673 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2674 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2675 somers_d_t[0] = (somers_d_v[0]
2677 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2678 somers_d_v[1] = (P - Q) / Dc;
2679 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2680 somers_d_t[1] = (somers_d_v[1]
2682 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2683 somers_d_v[2] = (P - Q) / Dr;
2684 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2685 somers_d_t[2] = (somers_d_v[2]
2687 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2693 /* Spearman correlation, Pearson's r. */
2694 if (cmd.a_statistics[CRS_ST_CORR])
2696 double *R = local_alloc (sizeof *R * n_rows);
2697 double *C = local_alloc (sizeof *C * n_cols);
2700 double y, t, c = 0., s = 0.;
2705 R[i] = s + (row_tot[i] + 1.) / 2.;
2712 assert (i < n_rows);
2717 double y, t, c = 0., s = 0.;
2722 C[j] = s + (col_tot[j] + 1.) / 2;
2729 assert (j < n_cols);
2733 calc_r (R, C, &v[6], &t[6], &ase[6]);
2739 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2743 /* Cohen's kappa. */
2744 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2746 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2749 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2750 i < ns_rows; i++, j++)
2754 while (col_tot[j] == 0.)
2757 prod = row_tot[i] * col_tot[j];
2758 sum = row_tot[i] + col_tot[j];
2760 sum_fii += mat[j + i * n_cols];
2762 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2763 sum_riciri_ci += prod * sum;
2765 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2766 for (j = 0; j < ns_cols; j++)
2768 double sum = row_tot[i] + col_tot[j];
2769 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2772 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2774 ase[8] = sqrt ((W * W * sum_rici
2775 + sum_rici * sum_rici
2776 - W * sum_riciri_ci)
2777 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2779 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2780 / pow2 (W * W - sum_rici))
2781 + ((2. * (W - sum_fii)
2782 * (2. * sum_fii * sum_rici
2783 - W * sum_fiiri_ci))
2784 / cube (W * W - sum_rici))
2785 + (pow2 (W - sum_fii)
2786 * (W * sum_fijri_ci2 - 4.
2787 * sum_rici * sum_rici)
2788 / pow4 (W * W - sum_rici))));
2790 t[8] = v[8] / ase[8];
2797 /* Calculate risk estimate. */
2799 calc_risk (double *value, double *upper, double *lower, union value *c)
2801 double f11, f12, f21, f22;
2807 for (i = 0; i < 3; i++)
2808 value[i] = upper[i] = lower[i] = SYSMIS;
2811 if (ns_rows != 2 || ns_cols != 2)
2818 for (i = j = 0; i < n_cols; i++)
2819 if (col_tot[i] != 0.)
2828 f11 = mat[nz_cols[0]];
2829 f12 = mat[nz_cols[1]];
2830 f21 = mat[nz_cols[0] + n_cols];
2831 f22 = mat[nz_cols[1] + n_cols];
2833 c[0] = cols[nz_cols[0]];
2834 c[1] = cols[nz_cols[1]];
2837 value[0] = (f11 * f22) / (f12 * f21);
2838 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2839 lower[0] = value[0] * exp (-1.960 * v);
2840 upper[0] = value[0] * exp (1.960 * v);
2842 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2843 v = sqrt ((f12 / (f11 * (f11 + f12)))
2844 + (f22 / (f21 * (f21 + f22))));
2845 lower[1] = value[1] * exp (-1.960 * v);
2846 upper[1] = value[1] * exp (1.960 * v);
2848 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2849 v = sqrt ((f11 / (f12 * (f11 + f12)))
2850 + (f21 / (f22 * (f21 + f22))));
2851 lower[2] = value[2] * exp (-1.960 * v);
2852 upper[2] = value[2] * exp (1.960 * v);
2857 /* Calculate directional measures. */
2859 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2860 double t[N_DIRECTIONAL])
2865 for (i = 0; i < N_DIRECTIONAL; i++)
2866 v[i] = ase[i] = t[i] = SYSMIS;
2870 if (cmd.a_statistics[CRS_ST_LAMBDA])
2872 double *fim = xnmalloc (n_rows, sizeof *fim);
2873 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2874 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2875 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2876 double sum_fim, sum_fmj;
2878 int rm_index, cm_index;
2881 /* Find maximum for each row and their sum. */
2882 for (sum_fim = 0., i = 0; i < n_rows; i++)
2884 double max = mat[i * n_cols];
2887 for (j = 1; j < n_cols; j++)
2888 if (mat[j + i * n_cols] > max)
2890 max = mat[j + i * n_cols];
2894 sum_fim += fim[i] = max;
2895 fim_index[i] = index;
2898 /* Find maximum for each column. */
2899 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2901 double max = mat[j];
2904 for (i = 1; i < n_rows; i++)
2905 if (mat[j + i * n_cols] > max)
2907 max = mat[j + i * n_cols];
2911 sum_fmj += fmj[j] = max;
2912 fmj_index[j] = index;
2915 /* Find maximum row total. */
2918 for (i = 1; i < n_rows; i++)
2919 if (row_tot[i] > rm)
2925 /* Find maximum column total. */
2928 for (j = 1; j < n_cols; j++)
2929 if (col_tot[j] > cm)
2935 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2936 v[1] = (sum_fmj - rm) / (W - rm);
2937 v[2] = (sum_fim - cm) / (W - cm);
2939 /* ASE1 for Y given X. */
2943 for (accum = 0., i = 0; i < n_rows; i++)
2944 for (j = 0; j < n_cols; j++)
2946 const int deltaj = j == cm_index;
2947 accum += (mat[j + i * n_cols]
2948 * pow2 ((j == fim_index[i])
2953 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2956 /* ASE0 for Y given X. */
2960 for (accum = 0., i = 0; i < n_rows; i++)
2961 if (cm_index != fim_index[i])
2962 accum += (mat[i * n_cols + fim_index[i]]
2963 + mat[i * n_cols + cm_index]);
2964 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2967 /* ASE1 for X given Y. */
2971 for (accum = 0., i = 0; i < n_rows; i++)
2972 for (j = 0; j < n_cols; j++)
2974 const int deltaj = i == rm_index;
2975 accum += (mat[j + i * n_cols]
2976 * pow2 ((i == fmj_index[j])
2981 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2984 /* ASE0 for X given Y. */
2988 for (accum = 0., j = 0; j < n_cols; j++)
2989 if (rm_index != fmj_index[j])
2990 accum += (mat[j + n_cols * fmj_index[j]]
2991 + mat[j + n_cols * rm_index]);
2992 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2995 /* Symmetric ASE0 and ASE1. */
3000 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3001 for (j = 0; j < n_cols; j++)
3003 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3004 int temp1 = (i == rm_index) + (j == cm_index);
3005 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3006 accum1 += (mat[j + i * n_cols]
3007 * pow2 (temp0 + (v[0] - 1.) * temp1));
3009 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3010 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3011 / (2. * W - rm - cm));
3020 double sum_fij2_ri, sum_fij2_ci;
3021 double sum_ri2, sum_cj2;
3023 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3024 for (j = 0; j < n_cols; j++)
3026 double temp = pow2 (mat[j + i * n_cols]);
3027 sum_fij2_ri += temp / row_tot[i];
3028 sum_fij2_ci += temp / col_tot[j];
3031 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3032 sum_ri2 += row_tot[i] * row_tot[i];
3034 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3035 sum_cj2 += col_tot[j] * col_tot[j];
3037 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3038 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3042 if (cmd.a_statistics[CRS_ST_UC])
3044 double UX, UY, UXY, P;
3045 double ase1_yx, ase1_xy, ase1_sym;
3048 for (UX = 0., i = 0; i < n_rows; i++)
3049 if (row_tot[i] > 0.)
3050 UX -= row_tot[i] / W * log (row_tot[i] / W);
3052 for (UY = 0., j = 0; j < n_cols; j++)
3053 if (col_tot[j] > 0.)
3054 UY -= col_tot[j] / W * log (col_tot[j] / W);
3056 for (UXY = P = 0., i = 0; i < n_rows; i++)
3057 for (j = 0; j < n_cols; j++)
3059 double entry = mat[j + i * n_cols];
3064 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3065 UXY -= entry / W * log (entry / W);
3068 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3069 for (j = 0; j < n_cols; j++)
3071 double entry = mat[j + i * n_cols];
3076 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3077 + (UX - UXY) * log (col_tot[j] / W));
3078 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3079 + (UY - UXY) * log (row_tot[i] / W));
3080 ase1_sym += entry * pow2 ((UXY
3081 * log (row_tot[i] * col_tot[j] / (W * W)))
3082 - (UX + UY) * log (entry / W));
3085 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3086 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3087 t[5] = v[5] / ((2. / (W * (UX + UY)))
3088 * sqrt (P - pow2 (UX + UY - UXY) / W));
3090 v[6] = (UX + UY - UXY) / UX;
3091 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3092 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3094 v[7] = (UX + UY - UXY) / UY;
3095 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3096 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3100 if (cmd.a_statistics[CRS_ST_D])
3105 calc_symmetric (NULL, NULL, NULL);
3106 for (i = 0; i < 3; i++)
3108 v[8 + i] = somers_d_v[i];
3109 ase[8 + i] = somers_d_ase[i];
3110 t[8 + i] = somers_d_t[i];
3115 if (cmd.a_statistics[CRS_ST_ETA])
3118 double sum_Xr, sum_X2r;
3122 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3124 sum_Xr += rows[i].f * row_tot[i];
3125 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3127 SX = sum_X2r - sum_Xr * sum_Xr / W;
3129 for (SXW = 0., j = 0; j < n_cols; j++)
3133 for (cum = 0., i = 0; i < n_rows; i++)
3135 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3136 cum += rows[i].f * mat[j + i * n_cols];
3139 SXW -= cum * cum / col_tot[j];
3141 v[11] = sqrt (1. - SXW / SX);
3145 double sum_Yc, sum_Y2c;
3149 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3151 sum_Yc += cols[i].f * col_tot[i];
3152 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3154 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3156 for (SYW = 0., i = 0; i < n_rows; i++)
3160 for (cum = 0., j = 0; j < n_cols; j++)
3162 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3163 cum += cols[j].f * mat[j + i * n_cols];
3166 SYW -= cum * cum / row_tot[i];
3168 v[12] = sqrt (1. - SYW / SY);
3175 /* A wrapper around data_out() that limits string output to short
3176 string width and null terminates the result. */
3178 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3180 struct fmt_spec fmt_subst;
3182 /* Limit to short string width. */
3183 if (formats[fp->type].cat & FCAT_STRING)
3187 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3188 if (fmt_subst.type == FMT_A)
3189 fmt_subst.w = min (8, fmt_subst.w);
3191 fmt_subst.w = min (16, fmt_subst.w);
3197 data_out (s, fp, v);
3199 /* Null terminate. */