1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
33 #include <libpspp/message.h>
37 #include <gsl/gsl_cdf.h>
38 #include <libpspp/array.h>
39 #include <libpspp/alloc.h>
40 #include <data/case.h>
41 #include <data/dictionary.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/pool.h>
44 #include <language/command.h>
45 #include <libpspp/compiler.h>
46 #include <language/lexer/lexer.h>
47 #include <libpspp/message.h>
48 #include <libpspp/magic.h>
49 #include <libpspp/misc.h>
50 #include <output/output.h>
51 #include <libpspp/str.h>
52 #include <output/table.h>
53 #include <data/value-labels.h>
54 #include <data/variable.h>
55 #include <procedure.h>
58 #define _(msgid) gettext (msgid)
59 #define N_(msgid) msgid
63 #include <libpspp/debug-print.h>
69 +missing=miss:!table/include/report;
70 +write[wr_]=none,cells,all;
71 +format=fmt:!labels/nolabels/novallabs,
74 tabl:!tables/notables,
77 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
79 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
85 /* Number of chi-square statistics. */
88 /* Number of symmetric statistics. */
91 /* Number of directional statistics. */
92 #define N_DIRECTIONAL 13
94 /* A single table entry for general mode. */
97 int table; /* Flattened table number. */
100 double freq; /* Frequency count. */
101 double *data; /* Crosstabulation table for integer mode. */
104 union value values[1]; /* Values. */
107 /* A crosstabulation. */
110 int nvar; /* Number of variables. */
111 double missing; /* Missing cases count. */
112 int ofs; /* Integer mode: Offset into sorted_tab[]. */
113 struct variable *vars[2]; /* At least two variables; sorted by
114 larger indices first. */
117 /* Integer mode variable info. */
120 int min; /* Minimum value. */
121 int max; /* Maximum value + 1. */
122 int count; /* max - min. */
125 static inline struct var_range *
126 get_var_range (struct variable *v)
129 assert (v->aux != NULL);
133 /* Indexes into crosstab.v. */
140 /* General mode crosstabulation table. */
141 static struct hsh_table *gen_tab; /* Hash table. */
142 static int n_sorted_tab; /* Number of entries in sorted_tab. */
143 static struct table_entry **sorted_tab; /* Sorted table. */
145 /* Variables specifies on VARIABLES. */
146 static struct variable **variables;
147 static size_t variables_cnt;
150 static struct crosstab **xtab;
153 /* Integer or general mode? */
162 static int num_cells; /* Number of cells requested. */
163 static int cells[8]; /* Cells requested. */
166 static int write; /* One of WR_* that specifies the WRITE style. */
168 /* Command parsing info. */
169 static struct cmd_crosstabs cmd;
172 static struct pool *pl_tc; /* For table cells. */
173 static struct pool *pl_col; /* For column data. */
175 static int internal_cmd_crosstabs (void);
176 static void precalc (void *);
177 static bool calc_general (struct ccase *, void *);
178 static bool calc_integer (struct ccase *, void *);
179 static void postcalc (void *);
180 static void submit (struct tab_table *);
182 static void format_short (char *s, const struct fmt_spec *fp,
183 const union value *v);
185 /* Parse and execute CROSSTABS, then clean up. */
189 int result = internal_cmd_crosstabs ();
192 pool_destroy (pl_tc);
193 pool_destroy (pl_col);
198 /* Parses and executes the CROSSTABS procedure. */
200 internal_cmd_crosstabs (void)
209 pl_tc = pool_create ();
210 pl_col = pool_create ();
212 if (!parse_crosstabs (&cmd))
215 mode = variables ? INTEGER : GENERAL;
220 cmd.a_cells[CRS_CL_COUNT] = 1;
226 for (i = 0; i < CRS_CL_count; i++)
231 cmd.a_cells[CRS_CL_COUNT] = 1;
232 cmd.a_cells[CRS_CL_ROW] = 1;
233 cmd.a_cells[CRS_CL_COLUMN] = 1;
234 cmd.a_cells[CRS_CL_TOTAL] = 1;
236 if (cmd.a_cells[CRS_CL_ALL])
238 for (i = 0; i < CRS_CL_count; i++)
240 cmd.a_cells[CRS_CL_ALL] = 0;
242 cmd.a_cells[CRS_CL_NONE] = 0;
244 for (num_cells = i = 0; i < CRS_CL_count; i++)
246 cells[num_cells++] = i;
249 if (cmd.sbc_statistics)
254 for (i = 0; i < CRS_ST_count; i++)
255 if (cmd.a_statistics[i])
258 cmd.a_statistics[CRS_ST_CHISQ] = 1;
259 if (cmd.a_statistics[CRS_ST_ALL])
260 for (i = 0; i < CRS_ST_count; i++)
261 cmd.a_statistics[i] = 1;
265 if (cmd.miss == CRS_REPORT && mode == GENERAL)
267 msg (SE, _("Missing mode REPORT not allowed in general mode. "
268 "Assuming MISSING=TABLE."));
269 cmd.miss = CRS_TABLE;
273 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
274 cmd.a_write[CRS_WR_ALL] = 0;
275 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
277 msg (SE, _("Write mode ALL not allowed in general mode. "
278 "Assuming WRITE=CELLS."));
279 cmd.a_write[CRS_WR_CELLS] = 1;
282 && (cmd.a_write[CRS_WR_NONE]
283 + cmd.a_write[CRS_WR_ALL]
284 + cmd.a_write[CRS_WR_CELLS] == 0))
285 cmd.a_write[CRS_WR_CELLS] = 1;
286 if (cmd.a_write[CRS_WR_CELLS])
287 write = CRS_WR_CELLS;
288 else if (cmd.a_write[CRS_WR_ALL])
293 ok = procedure_with_splits (precalc,
294 mode == GENERAL ? calc_general : calc_integer,
297 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
300 /* Parses the TABLES subcommand. */
302 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED)
304 struct var_set *var_set;
306 struct variable ***by = NULL;
307 size_t *by_nvar = NULL;
311 /* Ensure that this is a TABLES subcommand. */
312 if (!lex_match_id ("TABLES")
313 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
318 if (variables != NULL)
319 var_set = var_set_create_from_array (variables, variables_cnt);
321 var_set = var_set_create_from_dict (default_dict);
322 assert (var_set != NULL);
326 by = xnrealloc (by, n_by + 1, sizeof *by);
327 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
328 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
329 PV_NO_DUPLICATE | PV_NO_SCRATCH))
331 if (xalloc_oversized (nx, by_nvar[n_by]))
333 msg (SE, _("Too many crosstabulation variables or dimensions."));
339 if (!lex_match (T_BY))
343 lex_error (_("expecting BY"));
352 int *by_iter = xcalloc (n_by, sizeof *by_iter);
355 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
356 for (i = 0; i < nx; i++)
360 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
367 for (i = 0; i < n_by; i++)
368 x->vars[i] = by[i][by_iter[i]];
374 for (i = n_by - 1; i >= 0; i--)
376 if (++by_iter[i] < by_nvar[i])
389 /* All return paths lead here. */
393 for (i = 0; i < n_by; i++)
399 var_set_destroy (var_set);
404 /* Parses the VARIABLES subcommand. */
406 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED)
410 msg (SE, _("VARIABLES must be specified before TABLES."));
418 size_t orig_nv = variables_cnt;
423 if (!parse_variables (default_dict, &variables, &variables_cnt,
424 (PV_APPEND | PV_NUMERIC
425 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
430 lex_error ("expecting `('");
435 if (!lex_force_int ())
437 min = lex_integer ();
442 if (!lex_force_int ())
444 max = lex_integer ();
447 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
455 lex_error ("expecting `)'");
460 for (i = orig_nv; i < variables_cnt; i++)
462 struct var_range *vr = xmalloc (sizeof *vr);
465 vr->count = max - min + 1;
466 var_attach_aux (variables[i], vr, var_dtor_free);
481 /* Data file processing. */
483 static int compare_table_entry (const void *, const void *, void *);
484 static unsigned hash_table_entry (const void *, void *);
486 /* Set up the crosstabulation tables for processing. */
488 precalc (void *aux UNUSED)
492 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
502 for (i = 0; i < nxtab; i++)
504 struct crosstab *x = xtab[i];
509 x->ofs = n_sorted_tab;
511 for (j = 2; j < x->nvar; j++)
512 count *= get_var_range (x->vars[j - 2])->count;
514 sorted_tab = xnrealloc (sorted_tab,
515 n_sorted_tab + count, sizeof *sorted_tab);
516 v = local_alloc (sizeof *v * x->nvar);
517 for (j = 2; j < x->nvar; j++)
518 v[j] = get_var_range (x->vars[j])->min;
519 for (j = 0; j < count; j++)
521 struct table_entry *te;
524 te = sorted_tab[n_sorted_tab++]
525 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
529 int row_cnt = get_var_range (x->vars[0])->count;
530 int col_cnt = get_var_range (x->vars[1])->count;
531 const int mat_size = row_cnt * col_cnt;
534 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
535 for (m = 0; m < mat_size; m++)
539 for (k = 2; k < x->nvar; k++)
540 te->values[k].f = v[k];
541 for (k = 2; k < x->nvar; k++)
543 struct var_range *vr = get_var_range (x->vars[k]);
544 if (++v[k] >= vr->max)
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + 1, sizeof *sorted_tab);
555 sorted_tab[n_sorted_tab] = NULL;
559 /* Form crosstabulations for general mode. */
561 calc_general (struct ccase *c, void *aux UNUSED)
566 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
568 /* Flattened current table index. */
571 for (t = 0; t < nxtab; t++)
573 struct crosstab *x = xtab[t];
574 const size_t entry_size = (sizeof (struct table_entry)
575 + sizeof (union value) * (x->nvar - 1));
576 struct table_entry *te = local_alloc (entry_size);
578 /* Construct table entry for the current record and table. */
584 for (j = 0; j < x->nvar; j++)
586 const union value *v = case_data (c, x->vars[j]->fv);
587 const struct missing_values *mv = &x->vars[j]->miss;
588 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
589 || (cmd.miss == CRS_INCLUDE
590 && mv_is_value_system_missing (mv, v)))
592 x->missing += weight;
596 if (x->vars[j]->type == NUMERIC)
597 te->values[j].f = case_num (c, x->vars[j]->fv);
600 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
603 /* Necessary in order to simplify comparisons. */
604 memset (&te->values[j].s[x->vars[j]->width], 0,
605 sizeof (union value) - x->vars[j]->width);
610 /* Add record to hash table. */
612 struct table_entry **tepp
613 = (struct table_entry **) hsh_probe (gen_tab, te);
616 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
619 memcpy (tep, te, entry_size);
624 (*tepp)->u.freq += weight;
635 calc_integer (struct ccase *c, void *aux UNUSED)
640 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
642 /* Flattened current table index. */
645 for (t = 0; t < nxtab; t++)
647 struct crosstab *x = xtab[t];
652 for (i = 0; i < x->nvar; i++)
654 struct variable *const v = x->vars[i];
655 struct var_range *vr = get_var_range (v);
656 double value = case_num (c, v->fv);
658 /* Note that the first test also rules out SYSMIS. */
659 if ((value < vr->min || value >= vr->max)
660 || (cmd.miss == CRS_TABLE
661 && mv_is_num_user_missing (&v->miss, value)))
663 x->missing += weight;
669 ofs += fact * ((int) value - vr->min);
675 struct variable *row_var = x->vars[ROW_VAR];
676 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
678 struct variable *col_var = x->vars[COL_VAR];
679 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
681 const int col_dim = get_var_range (col_var)->count;
683 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
692 /* Compare the table_entry's at A and B and return a strcmp()-type
695 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
697 const struct table_entry *a = a_;
698 const struct table_entry *b = b_;
700 if (a->table > b->table)
702 else if (a->table < b->table)
706 const struct crosstab *x = xtab[a->table];
709 for (i = x->nvar - 1; i >= 0; i--)
710 if (x->vars[i]->type == NUMERIC)
712 const double diffnum = a->values[i].f - b->values[i].f;
715 else if (diffnum > 0)
720 assert (x->vars[i]->type == ALPHA);
722 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
733 /* Calculate a hash value from table_entry A. */
735 hash_table_entry (const void *a_, void *foo UNUSED)
737 const struct table_entry *a = a_;
742 for (i = 0; i < xtab[a->table]->nvar; i++)
743 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
748 /* Post-data reading calculations. */
750 static struct table_entry **find_pivot_extent (struct table_entry **,
751 int *cnt, int pivot);
752 static void enum_var_values (struct table_entry **entries, int entry_cnt,
754 union value **values, int *value_cnt);
755 static void output_pivot_table (struct table_entry **, struct table_entry **,
756 double **, double **, double **,
757 int *, int *, int *);
758 static void make_summary_table (void);
761 postcalc (void *aux UNUSED)
765 n_sorted_tab = hsh_count (gen_tab);
766 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
769 make_summary_table ();
771 /* Identify all the individual crosstabulation tables, and deal with
774 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
775 int pc = n_sorted_tab; /* Pivot count. */
777 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
778 int maxrows = 0, maxcols = 0, maxcells = 0;
782 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
786 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
787 &maxrows, &maxcols, &maxcells);
796 hsh_destroy (gen_tab);
799 static void insert_summary (struct tab_table *, int tab_index, double valid);
801 /* Output a table summarizing the cases processed. */
803 make_summary_table (void)
805 struct tab_table *summary;
807 struct table_entry **pb = sorted_tab, **pe;
808 int pc = n_sorted_tab;
811 summary = tab_create (7, 3 + nxtab, 1);
812 tab_title (summary, _("Summary."));
813 tab_headers (summary, 1, 0, 3, 0);
814 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
815 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
816 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
817 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
818 tab_hline (summary, TAL_1, 1, 6, 1);
819 tab_hline (summary, TAL_1, 1, 6, 2);
820 tab_vline (summary, TAL_1, 3, 1, 1);
821 tab_vline (summary, TAL_1, 5, 1, 1);
825 for (i = 0; i < 3; i++)
827 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
828 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
831 tab_offset (summary, 0, 3);
837 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
841 while (cur_tab < (*pb)->table)
842 insert_summary (summary, cur_tab++, 0.);
845 for (valid = 0.; pb < pe; pb++)
846 valid += (*pb)->u.freq;
849 const struct crosstab *const x = xtab[(*pb)->table];
850 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
851 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
852 const int count = n_cols * n_rows;
854 for (valid = 0.; pb < pe; pb++)
856 const double *data = (*pb)->u.data;
859 for (i = 0; i < count; i++)
863 insert_summary (summary, cur_tab++, valid);
868 while (cur_tab < nxtab)
869 insert_summary (summary, cur_tab++, 0.);
874 /* Inserts a line into T describing the crosstabulation at index
875 TAB_INDEX, which has VALID valid observations. */
877 insert_summary (struct tab_table *t, int tab_index, double valid)
879 struct crosstab *x = xtab[tab_index];
881 tab_hline (t, TAL_1, 0, 6, 0);
883 /* Crosstabulation name. */
885 char *buf = local_alloc (128 * x->nvar);
889 for (i = 0; i < x->nvar; i++)
892 cp = stpcpy (cp, " * ");
895 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
897 tab_text (t, 0, 0, TAB_LEFT, buf);
902 /* Counts and percentages. */
912 for (i = 0; i < 3; i++)
914 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
915 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
926 static struct tab_table *table; /* Crosstabulation table. */
927 static struct tab_table *chisq; /* Chi-square table. */
928 static struct tab_table *sym; /* Symmetric measures table. */
929 static struct tab_table *risk; /* Risk estimate table. */
930 static struct tab_table *direct; /* Directional measures table. */
933 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
935 /* Column values, number of columns. */
936 static union value *cols;
939 /* Row values, number of rows. */
940 static union value *rows;
943 /* Number of statistically interesting columns/rows (columns/rows with
945 static int ns_cols, ns_rows;
947 /* Crosstabulation. */
948 static struct crosstab *x;
950 /* Number of variables from the crosstabulation to consider. This is
951 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
954 /* Matrix contents. */
955 static double *mat; /* Matrix proper. */
956 static double *row_tot; /* Row totals. */
957 static double *col_tot; /* Column totals. */
958 static double W; /* Grand total. */
960 static void display_dimensions (struct tab_table *, int first_difference,
961 struct table_entry *);
962 static void display_crosstabulation (void);
963 static void display_chisq (void);
964 static void display_symmetric (void);
965 static void display_risk (void);
966 static void display_directional (void);
967 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
968 static void table_value_missing (struct tab_table *table, int c, int r,
969 unsigned char opt, const union value *v,
970 const struct variable *var);
971 static void delete_missing (void);
973 /* Output pivot table beginning at PB and continuing until PE,
974 exclusive. For efficiency, *MATP is a pointer to a matrix that can
975 hold *MAXROWS entries. */
977 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
978 double **matp, double **row_totp, double **col_totp,
979 int *maxrows, int *maxcols, int *maxcells)
982 struct table_entry **tb = pb, **te; /* Table begin, table end. */
983 int tc = pe - pb; /* Table count. */
985 /* Table entry for header comparison. */
986 struct table_entry *cmp = NULL;
988 x = xtab[(*pb)->table];
989 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
991 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
993 /* Crosstabulation table initialization. */
996 table = tab_create (nvar + n_cols,
997 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
998 tab_headers (table, nvar - 1, 0, 2, 0);
1000 /* First header line. */
1001 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1002 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1004 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1006 /* Second header line. */
1010 for (i = 2; i < nvar; i++)
1011 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1012 TAB_RIGHT | TAT_TITLE,
1014 ? x->vars[i]->label : x->vars[i]->name));
1015 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1016 x->vars[ROW_VAR]->name);
1017 for (i = 0; i < n_cols; i++)
1018 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1020 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1023 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1024 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1028 char *title = local_alloc (x->nvar * 64 + 128);
1032 if (cmd.pivot == CRS_PIVOT)
1033 for (i = 0; i < nvar; i++)
1036 cp = stpcpy (cp, " by ");
1037 cp = stpcpy (cp, x->vars[i]->name);
1041 cp = spprintf (cp, "%s by %s for",
1042 x->vars[0]->name, x->vars[1]->name);
1043 for (i = 2; i < nvar; i++)
1045 char buf[64], *bufp;
1050 cp = stpcpy (cp, x->vars[i]->name);
1052 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1053 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1055 cp = stpcpy (cp, bufp);
1059 cp = stpcpy (cp, " [");
1060 for (i = 0; i < num_cells; i++)
1068 static const struct tuple cell_names[] =
1070 {CRS_CL_COUNT, N_("count")},
1071 {CRS_CL_ROW, N_("row %")},
1072 {CRS_CL_COLUMN, N_("column %")},
1073 {CRS_CL_TOTAL, N_("total %")},
1074 {CRS_CL_EXPECTED, N_("expected")},
1075 {CRS_CL_RESIDUAL, N_("residual")},
1076 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1077 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1081 const struct tuple *t;
1083 for (t = cell_names; t->value != cells[i]; t++)
1084 assert (t->value != -1);
1086 cp = stpcpy (cp, ", ");
1087 cp = stpcpy (cp, gettext (t->name));
1091 tab_title (table, "%s", title);
1095 tab_offset (table, 0, 2);
1100 /* Chi-square table initialization. */
1101 if (cmd.a_statistics[CRS_ST_CHISQ])
1103 chisq = tab_create (6 + (nvar - 2),
1104 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1105 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1107 tab_title (chisq, _("Chi-square tests."));
1109 tab_offset (chisq, nvar - 2, 0);
1110 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1111 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1112 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1113 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1114 _("Asymp. Sig. (2-sided)"));
1115 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1116 _("Exact. Sig. (2-sided)"));
1117 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1118 _("Exact. Sig. (1-sided)"));
1120 tab_offset (chisq, 0, 1);
1125 /* Symmetric measures. */
1126 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1127 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1128 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1129 || cmd.a_statistics[CRS_ST_KAPPA])
1131 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1132 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1133 tab_title (sym, _("Symmetric measures."));
1135 tab_offset (sym, nvar - 2, 0);
1136 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1137 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1138 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1139 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1140 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1141 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1142 tab_offset (sym, 0, 1);
1147 /* Risk estimate. */
1148 if (cmd.a_statistics[CRS_ST_RISK])
1150 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1151 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1152 tab_title (risk, _("Risk estimate."));
1154 tab_offset (risk, nvar - 2, 0);
1155 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1156 _("95%% Confidence Interval"));
1157 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1158 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1159 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1160 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1161 tab_hline (risk, TAL_1, 2, 3, 1);
1162 tab_vline (risk, TAL_1, 2, 0, 1);
1163 tab_offset (risk, 0, 2);
1168 /* Directional measures. */
1169 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1170 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1172 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1173 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1174 tab_title (direct, _("Directional measures."));
1176 tab_offset (direct, nvar - 2, 0);
1177 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1178 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1179 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1180 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1181 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1182 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1183 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1184 tab_offset (direct, 0, 1);
1191 /* Find pivot subtable if applicable. */
1192 te = find_pivot_extent (tb, &tc, 0);
1196 /* Find all the row variable values. */
1197 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1199 /* Allocate memory space for the column and row totals. */
1200 if (n_rows > *maxrows)
1202 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1203 row_tot = *row_totp;
1206 if (n_cols > *maxcols)
1208 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1209 col_tot = *col_totp;
1213 /* Allocate table space for the matrix. */
1214 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1215 tab_realloc (table, -1,
1216 max (tab_nr (table) + (n_rows + 1) * num_cells,
1217 tab_nr (table) * (pe - pb) / (te - tb)));
1219 if (mode == GENERAL)
1221 /* Allocate memory space for the matrix. */
1222 if (n_cols * n_rows > *maxcells)
1224 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1225 *maxcells = n_cols * n_rows;
1230 /* Build the matrix and calculate column totals. */
1232 union value *cur_col = cols;
1233 union value *cur_row = rows;
1235 double *cp = col_tot;
1236 struct table_entry **p;
1239 for (p = &tb[0]; p < te; p++)
1241 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1245 for (; cur_row < &rows[n_rows]; cur_row++)
1251 mp = &mat[cur_col - cols];
1254 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1261 *cp += *mp = (*p)->u.freq;
1266 /* Zero out the rest of the matrix. */
1267 for (; cur_row < &rows[n_rows]; cur_row++)
1273 if (cur_col < &cols[n_cols])
1275 const int rem_cols = n_cols - (cur_col - cols);
1278 for (c = 0; c < rem_cols; c++)
1280 mp = &mat[cur_col - cols];
1281 for (r = 0; r < n_rows; r++)
1283 for (c = 0; c < rem_cols; c++)
1285 mp += n_cols - rem_cols;
1293 double *tp = col_tot;
1295 assert (mode == INTEGER);
1296 mat = (*tb)->u.data;
1299 /* Calculate column totals. */
1300 for (c = 0; c < n_cols; c++)
1303 double *cp = &mat[c];
1305 for (r = 0; r < n_rows; r++)
1306 cum += cp[r * n_cols];
1314 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1315 ns_cols += *cp != 0.;
1318 /* Calculate row totals. */
1321 double *rp = row_tot;
1324 for (ns_rows = 0, r = n_rows; r--; )
1327 for (c = n_cols; c--; )
1335 /* Calculate grand total. */
1341 if (n_rows < n_cols)
1342 tp = row_tot, n = n_rows;
1344 tp = col_tot, n = n_cols;
1350 /* Find the first variable that differs from the last subtable,
1351 then display the values of the dimensioning variables for
1352 each table that needs it. */
1354 int first_difference = nvar - 1;
1357 for (; ; first_difference--)
1359 assert (first_difference >= 2);
1360 if (memcmp (&cmp->values[first_difference],
1361 &(*tb)->values[first_difference],
1362 sizeof *cmp->values))
1368 display_dimensions (table, first_difference, *tb);
1370 display_dimensions (chisq, first_difference, *tb);
1372 display_dimensions (sym, first_difference, *tb);
1374 display_dimensions (risk, first_difference, *tb);
1376 display_dimensions (direct, first_difference, *tb);
1380 display_crosstabulation ();
1381 if (cmd.miss == CRS_REPORT)
1386 display_symmetric ();
1390 display_directional ();
1401 tab_resize (chisq, 4 + (nvar - 2), -1);
1412 /* Delete missing rows and columns for statistical analysis when
1415 delete_missing (void)
1420 for (r = 0; r < n_rows; r++)
1421 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1425 for (c = 0; c < n_cols; c++)
1426 mat[c + r * n_cols] = 0.;
1434 for (c = 0; c < n_cols; c++)
1435 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1439 for (r = 0; r < n_rows; r++)
1440 mat[c + r * n_cols] = 0.;
1446 /* Prepare table T for submission, and submit it. */
1448 submit (struct tab_table *t)
1455 tab_resize (t, -1, 0);
1456 if (tab_nr (t) == tab_t (t))
1461 tab_offset (t, 0, 0);
1463 for (i = 2; i < nvar; i++)
1464 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1465 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1466 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1467 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1469 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1471 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1472 tab_dim (t, crosstabs_dim);
1476 /* Sets the widths of all the columns and heights of all the rows in
1477 table T for driver D. */
1479 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1483 /* Width of a numerical column. */
1484 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1485 if (cmd.miss == CRS_REPORT)
1486 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1488 /* Set width for header columns. */
1494 w = d->width - c * (t->nc - t->l);
1495 for (i = 0; i <= t->nc; i++)
1499 if (w < d->prop_em_width * 8)
1500 w = d->prop_em_width * 8;
1502 if (w > d->prop_em_width * 15)
1503 w = d->prop_em_width * 15;
1505 for (i = 0; i < t->l; i++)
1509 for (i = t->l; i < t->nc; i++)
1512 for (i = 0; i < t->nr; i++)
1513 t->h[i] = tab_natural_height (t, d, i);
1516 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1517 int *cnt, int pivot);
1518 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1519 int *cnt, int pivot);
1521 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1523 static struct table_entry **
1524 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1526 return (mode == GENERAL
1527 ? find_pivot_extent_general (tp, cnt, pivot)
1528 : find_pivot_extent_integer (tp, cnt, pivot));
1531 /* Find the extent of a region in TP that contains one table. If
1532 PIVOT != 0 that means a set of table entries with identical table
1533 number; otherwise they also have to have the same values for every
1534 dimension after the row and column dimensions. The table that is
1535 searched starts at TP and has length CNT. Returns the first entry
1536 after the last one in the table; sets *CNT to the number of
1537 remaining values. If there are no entries in TP at all, returns
1538 NULL. A yucky interface, admittedly, but it works. */
1539 static struct table_entry **
1540 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1542 struct table_entry *fp = *tp;
1547 x = xtab[(*tp)->table];
1555 if ((*tp)->table != fp->table)
1560 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1567 /* Integer mode correspondent to find_pivot_extent_general(). This
1568 could be optimized somewhat, but I just don't give a crap about
1569 CROSSTABS performance in integer mode, which is just a
1570 CROSSTABS wart as far as I'm concerned.
1572 That said, feel free to send optimization patches to me. */
1573 static struct table_entry **
1574 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1576 struct table_entry *fp = *tp;
1581 x = xtab[(*tp)->table];
1589 if ((*tp)->table != fp->table)
1594 if (memcmp (&(*tp)->values[2], &fp->values[2],
1595 sizeof (union value) * (x->nvar - 2)))
1602 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1603 result. WIDTH_ points to an int which is either 0 for a
1604 numeric value or a string width for a string value. */
1606 compare_value (const void *a_, const void *b_, void *width_)
1608 const union value *a = a_;
1609 const union value *b = b_;
1610 const int *pwidth = width_;
1611 const int width = *pwidth;
1614 return (a->f < b->f) ? -1 : (a->f > b->f);
1616 return strncmp (a->s, b->s, width);
1619 /* Given an array of ENTRY_CNT table_entry structures starting at
1620 ENTRIES, creates a sorted list of the values that the variable
1621 with index VAR_IDX takes on. The values are returned as a
1622 malloc()'darray stored in *VALUES, with the number of values
1623 stored in *VALUE_CNT.
1626 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1627 union value **values, int *value_cnt)
1629 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1631 if (mode == GENERAL)
1633 int width = v->width;
1636 *values = xnmalloc (entry_cnt, sizeof **values);
1637 for (i = 0; i < entry_cnt; i++)
1638 (*values)[i] = entries[i]->values[var_idx];
1639 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1640 compare_value, &width);
1644 struct var_range *vr = get_var_range (v);
1647 assert (mode == INTEGER);
1648 *values = xnmalloc (vr->count, sizeof **values);
1649 for (i = 0; i < vr->count; i++)
1650 (*values)[i].f = i + vr->min;
1651 *value_cnt = vr->count;
1655 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1656 from V, displayed with print format spec from variable VAR. When
1657 in REPORT missing-value mode, missing values have an M appended. */
1659 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1660 const union value *v, const struct variable *var)
1662 struct fixed_string s;
1664 const char *label = val_labs_find (var->val_labs, *v);
1667 tab_text (table, c, r, TAB_LEFT, label);
1671 s.string = tab_alloc (table, var->print.w);
1672 format_short (s.string, &var->print, v);
1673 s.length = strlen (s.string);
1674 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1675 s.string[s.length++] = 'M';
1676 while (s.length && *s.string == ' ')
1681 tab_raw (table, c, r, opt, &s);
1684 /* Draws a line across TABLE at the current row to indicate the most
1685 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1686 that changed, and puts the values that changed into the table. TB
1687 and X must be the corresponding table_entry and crosstab,
1690 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1692 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1694 for (; first_difference >= 2; first_difference--)
1695 table_value_missing (table, nvar - first_difference - 1, 0,
1696 TAB_RIGHT, &tb->values[first_difference],
1697 x->vars[first_difference]);
1700 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1701 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1702 additionally suffixed with a letter `M'. */
1704 format_cell_entry (struct tab_table *table, int c, int r, double value,
1705 char suffix, int mark_missing)
1707 const struct fmt_spec f = {FMT_F, 10, 1};
1709 struct fixed_string s;
1712 s.string = tab_alloc (table, 16);
1714 data_out (s.string, &f, &v);
1715 while (*s.string == ' ')
1721 s.string[s.length++] = suffix;
1723 s.string[s.length++] = 'M';
1725 tab_raw (table, c, r, TAB_RIGHT, &s);
1728 /* Displays the crosstabulation table. */
1730 display_crosstabulation (void)
1735 for (r = 0; r < n_rows; r++)
1736 table_value_missing (table, nvar - 2, r * num_cells,
1737 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1739 tab_text (table, nvar - 2, n_rows * num_cells,
1740 TAB_LEFT, _("Total"));
1742 /* Put in the actual cells. */
1747 tab_offset (table, nvar - 1, -1);
1748 for (r = 0; r < n_rows; r++)
1751 tab_hline (table, TAL_1, -1, n_cols, 0);
1752 for (c = 0; c < n_cols; c++)
1754 int mark_missing = 0;
1755 double expected_value = row_tot[r] * col_tot[c] / W;
1756 if (cmd.miss == CRS_REPORT
1757 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1758 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1761 for (i = 0; i < num_cells; i++)
1772 v = *mp / row_tot[r] * 100.;
1776 v = *mp / col_tot[c] * 100.;
1783 case CRS_CL_EXPECTED:
1786 case CRS_CL_RESIDUAL:
1787 v = *mp - expected_value;
1789 case CRS_CL_SRESIDUAL:
1790 v = (*mp - expected_value) / sqrt (expected_value);
1792 case CRS_CL_ASRESIDUAL:
1793 v = ((*mp - expected_value)
1794 / sqrt (expected_value
1795 * (1. - row_tot[r] / W)
1796 * (1. - col_tot[c] / W)));
1803 format_cell_entry (table, c, i, v, suffix, mark_missing);
1809 tab_offset (table, -1, tab_row (table) + num_cells);
1817 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1818 for (r = 0; r < n_rows; r++)
1821 int mark_missing = 0;
1823 if (cmd.miss == CRS_REPORT
1824 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1827 for (i = 0; i < num_cells; i++)
1841 v = row_tot[r] / W * 100.;
1845 v = row_tot[r] / W * 100.;
1848 case CRS_CL_EXPECTED:
1849 case CRS_CL_RESIDUAL:
1850 case CRS_CL_SRESIDUAL:
1851 case CRS_CL_ASRESIDUAL:
1859 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1860 tab_next_row (table);
1865 /* Column totals, grand total. */
1871 tab_hline (table, TAL_1, -1, n_cols, 0);
1872 for (c = 0; c <= n_cols; c++)
1874 double ct = c < n_cols ? col_tot[c] : W;
1875 int mark_missing = 0;
1879 if (cmd.miss == CRS_REPORT && c < n_cols
1880 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1883 for (i = 0; i < num_cells; i++)
1905 case CRS_CL_EXPECTED:
1906 case CRS_CL_RESIDUAL:
1907 case CRS_CL_SRESIDUAL:
1908 case CRS_CL_ASRESIDUAL:
1915 format_cell_entry (table, c, i, v, suffix, mark_missing);
1920 tab_offset (table, -1, tab_row (table) + last_row);
1923 tab_offset (table, 0, -1);
1926 static void calc_r (double *X, double *Y, double *, double *, double *);
1927 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1929 /* Display chi-square statistics. */
1931 display_chisq (void)
1933 static const char *chisq_stats[N_CHISQ] =
1935 N_("Pearson Chi-Square"),
1936 N_("Likelihood Ratio"),
1937 N_("Fisher's Exact Test"),
1938 N_("Continuity Correction"),
1939 N_("Linear-by-Linear Association"),
1941 double chisq_v[N_CHISQ];
1942 double fisher1, fisher2;
1948 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1950 tab_offset (chisq, nvar - 2, -1);
1952 for (i = 0; i < N_CHISQ; i++)
1954 if ((i != 2 && chisq_v[i] == SYSMIS)
1955 || (i == 2 && fisher1 == SYSMIS))
1959 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1962 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1963 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1964 tab_float (chisq, 3, 0, TAB_RIGHT,
1965 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1970 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1971 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1973 tab_next_row (chisq);
1976 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1977 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1978 tab_next_row (chisq);
1980 tab_offset (chisq, 0, -1);
1983 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1984 double[N_SYMMETRIC]);
1986 /* Display symmetric measures. */
1988 display_symmetric (void)
1990 static const char *categories[] =
1992 N_("Nominal by Nominal"),
1993 N_("Ordinal by Ordinal"),
1994 N_("Interval by Interval"),
1995 N_("Measure of Agreement"),
1998 static const char *stats[N_SYMMETRIC] =
2002 N_("Contingency Coefficient"),
2003 N_("Kendall's tau-b"),
2004 N_("Kendall's tau-c"),
2006 N_("Spearman Correlation"),
2011 static const int stats_categories[N_SYMMETRIC] =
2013 0, 0, 0, 1, 1, 1, 1, 2, 3,
2017 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2020 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2023 tab_offset (sym, nvar - 2, -1);
2025 for (i = 0; i < N_SYMMETRIC; i++)
2027 if (sym_v[i] == SYSMIS)
2030 if (stats_categories[i] != last_cat)
2032 last_cat = stats_categories[i];
2033 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2036 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2037 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2038 if (sym_ase[i] != SYSMIS)
2039 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2040 if (sym_t[i] != SYSMIS)
2041 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2042 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2046 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2047 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2050 tab_offset (sym, 0, -1);
2053 static int calc_risk (double[], double[], double[], union value *);
2055 /* Display risk estimate. */
2060 double risk_v[3], lower[3], upper[3];
2064 if (!calc_risk (risk_v, upper, lower, c))
2067 tab_offset (risk, nvar - 2, -1);
2069 for (i = 0; i < 3; i++)
2071 if (risk_v[i] == SYSMIS)
2077 if (x->vars[COL_VAR]->type == NUMERIC)
2078 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2079 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2081 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2082 x->vars[COL_VAR]->name,
2083 x->vars[COL_VAR]->width, c[0].s,
2084 x->vars[COL_VAR]->width, c[1].s);
2088 if (x->vars[ROW_VAR]->type == NUMERIC)
2089 sprintf (buf, _("For cohort %s = %g"),
2090 x->vars[ROW_VAR]->name, rows[i - 1].f);
2092 sprintf (buf, _("For cohort %s = %.*s"),
2093 x->vars[ROW_VAR]->name,
2094 x->vars[ROW_VAR]->width, rows[i - 1].s);
2098 tab_text (risk, 0, 0, TAB_LEFT, buf);
2099 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2100 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2101 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2102 tab_next_row (risk);
2105 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2106 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2107 tab_next_row (risk);
2109 tab_offset (risk, 0, -1);
2112 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2113 double[N_DIRECTIONAL]);
2115 /* Display directional measures. */
2117 display_directional (void)
2119 static const char *categories[] =
2121 N_("Nominal by Nominal"),
2122 N_("Ordinal by Ordinal"),
2123 N_("Nominal by Interval"),
2126 static const char *stats[] =
2129 N_("Goodman and Kruskal tau"),
2130 N_("Uncertainty Coefficient"),
2135 static const char *types[] =
2142 static const int stats_categories[N_DIRECTIONAL] =
2144 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2147 static const int stats_stats[N_DIRECTIONAL] =
2149 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2152 static const int stats_types[N_DIRECTIONAL] =
2154 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2157 static const int *stats_lookup[] =
2164 static const char **stats_names[] =
2176 double direct_v[N_DIRECTIONAL];
2177 double direct_ase[N_DIRECTIONAL];
2178 double direct_t[N_DIRECTIONAL];
2182 if (!calc_directional (direct_v, direct_ase, direct_t))
2185 tab_offset (direct, nvar - 2, -1);
2187 for (i = 0; i < N_DIRECTIONAL; i++)
2189 if (direct_v[i] == SYSMIS)
2195 for (j = 0; j < 3; j++)
2196 if (last[j] != stats_lookup[j][i])
2199 tab_hline (direct, TAL_1, j, 6, 0);
2204 int k = last[j] = stats_lookup[j][i];
2209 string = x->vars[0]->name;
2211 string = x->vars[1]->name;
2213 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2214 gettext (stats_names[j][k]), string);
2219 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2220 if (direct_ase[i] != SYSMIS)
2221 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2222 if (direct_t[i] != SYSMIS)
2223 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2224 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2225 tab_next_row (direct);
2228 tab_offset (direct, 0, -1);
2231 /* Statistical calculations. */
2233 /* Returns the value of the gamma (factorial) function for an integer
2236 gamma_int (double x)
2241 for (i = 2; i < x; i++)
2246 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2248 static inline double
2249 Pr (int a, int b, int c, int d)
2251 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2252 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2253 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2254 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2255 / gamma_int (a + b + c + d + 1.));
2258 /* Swap the contents of A and B. */
2260 swap (int *a, int *b)
2267 /* Calculate significance for Fisher's exact test as specified in
2268 _SPSS Statistical Algorithms_, Appendix 5. */
2270 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2274 if (min (c, d) < min (a, b))
2275 swap (&a, &c), swap (&b, &d);
2276 if (min (b, d) < min (a, c))
2277 swap (&a, &b), swap (&c, &d);
2281 swap (&a, &b), swap (&c, &d);
2283 swap (&a, &c), swap (&b, &d);
2287 for (x = 0; x <= a; x++)
2288 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2290 *fisher2 = *fisher1;
2291 for (x = 1; x <= b; x++)
2292 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2295 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2296 columns with values COLS and N_ROWS rows with values ROWS. Values
2297 in the matrix sum to W. */
2299 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2300 double *fisher1, double *fisher2)
2304 chisq[0] = chisq[1] = 0.;
2305 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2306 *fisher1 = *fisher2 = SYSMIS;
2308 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2310 if (ns_rows <= 1 || ns_cols <= 1)
2312 chisq[0] = chisq[1] = SYSMIS;
2316 for (r = 0; r < n_rows; r++)
2317 for (c = 0; c < n_cols; c++)
2319 const double expected = row_tot[r] * col_tot[c] / W;
2320 const double freq = mat[n_cols * r + c];
2321 const double residual = freq - expected;
2323 chisq[0] += residual * residual / expected;
2325 chisq[1] += freq * log (expected / freq);
2336 /* Calculate Yates and Fisher exact test. */
2337 if (ns_cols == 2 && ns_rows == 2)
2339 double f11, f12, f21, f22;
2345 for (i = j = 0; i < n_cols; i++)
2346 if (col_tot[i] != 0.)
2355 f11 = mat[nz_cols[0]];
2356 f12 = mat[nz_cols[1]];
2357 f21 = mat[nz_cols[0] + n_cols];
2358 f22 = mat[nz_cols[1] + n_cols];
2363 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2366 chisq[3] = (W * x * x
2367 / (f11 + f12) / (f21 + f22)
2368 / (f11 + f21) / (f12 + f22));
2376 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2377 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2380 /* Calculate Mantel-Haenszel. */
2381 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2383 double r, ase_0, ase_1;
2384 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2386 chisq[4] = (W - 1.) * r * r;
2391 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2392 ASE_1, and ase_0 into ASE_0. The row and column values must be
2393 passed in X and Y. */
2395 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2397 double SX, SY, S, T;
2399 double sum_XYf, sum_X2Y2f;
2400 double sum_Xr, sum_X2r;
2401 double sum_Yc, sum_Y2c;
2404 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2405 for (j = 0; j < n_cols; j++)
2407 double fij = mat[j + i * n_cols];
2408 double product = X[i] * Y[j];
2409 double temp = fij * product;
2411 sum_X2Y2f += temp * product;
2414 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2416 sum_Xr += X[i] * row_tot[i];
2417 sum_X2r += X[i] * X[i] * row_tot[i];
2421 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2423 sum_Yc += Y[i] * col_tot[i];
2424 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2428 S = sum_XYf - sum_Xr * sum_Yc / W;
2429 SX = sum_X2r - sum_Xr * sum_Xr / W;
2430 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2433 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2438 for (s = c = 0., i = 0; i < n_rows; i++)
2439 for (j = 0; j < n_cols; j++)
2441 double Xresid, Yresid;
2444 Xresid = X[i] - Xbar;
2445 Yresid = Y[j] - Ybar;
2446 temp = (T * Xresid * Yresid
2448 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2449 y = mat[j + i * n_cols] * temp * temp - c;
2454 *ase_1 = sqrt (s) / (T * T);
2458 static double somers_d_v[3];
2459 static double somers_d_ase[3];
2460 static double somers_d_t[3];
2462 /* Calculate symmetric statistics and their asymptotic standard
2463 errors. Returns 0 if none could be calculated. */
2465 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2466 double t[N_SYMMETRIC])
2468 int q = min (ns_rows, ns_cols);
2477 for (i = 0; i < N_SYMMETRIC; i++)
2478 v[i] = ase[i] = t[i] = SYSMIS;
2481 /* Phi, Cramer's V, contingency coefficient. */
2482 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2484 double Xp = 0.; /* Pearson chi-square. */
2489 for (r = 0; r < n_rows; r++)
2490 for (c = 0; c < n_cols; c++)
2492 const double expected = row_tot[r] * col_tot[c] / W;
2493 const double freq = mat[n_cols * r + c];
2494 const double residual = freq - expected;
2496 Xp += residual * residual / expected;
2500 if (cmd.a_statistics[CRS_ST_PHI])
2502 v[0] = sqrt (Xp / W);
2503 v[1] = sqrt (Xp / (W * (q - 1)));
2505 if (cmd.a_statistics[CRS_ST_CC])
2506 v[2] = sqrt (Xp / (Xp + W));
2509 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2510 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2515 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2522 for (r = 0; r < n_rows; r++)
2523 Dr -= row_tot[r] * row_tot[r];
2524 for (c = 0; c < n_cols; c++)
2525 Dc -= col_tot[c] * col_tot[c];
2531 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2532 for (c = 0; c < n_cols; c++)
2536 for (r = 0; r < n_rows; r++)
2537 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2547 for (i = 0; i < n_rows; i++)
2551 for (j = 1; j < n_cols; j++)
2552 Cij += col_tot[j] - cum[j + i * n_cols];
2555 for (j = 1; j < n_cols; j++)
2556 Dij += cum[j + (i - 1) * n_cols];
2560 double fij = mat[j + i * n_cols];
2566 assert (j < n_cols);
2568 Cij -= col_tot[j] - cum[j + i * n_cols];
2569 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2573 Cij += cum[j - 1 + (i - 1) * n_cols];
2574 Dij -= cum[j + (i - 1) * n_cols];
2580 if (cmd.a_statistics[CRS_ST_BTAU])
2581 v[3] = (P - Q) / sqrt (Dr * Dc);
2582 if (cmd.a_statistics[CRS_ST_CTAU])
2583 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2584 if (cmd.a_statistics[CRS_ST_GAMMA])
2585 v[5] = (P - Q) / (P + Q);
2587 /* ASE for tau-b, tau-c, gamma. Calculations could be
2588 eliminated here, at expense of memory. */
2593 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2594 for (i = 0; i < n_rows; i++)
2598 for (j = 1; j < n_cols; j++)
2599 Cij += col_tot[j] - cum[j + i * n_cols];
2602 for (j = 1; j < n_cols; j++)
2603 Dij += cum[j + (i - 1) * n_cols];
2607 double fij = mat[j + i * n_cols];
2609 if (cmd.a_statistics[CRS_ST_BTAU])
2611 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2612 + v[3] * (row_tot[i] * Dc
2613 + col_tot[j] * Dr));
2614 btau_cum += fij * temp * temp;
2618 const double temp = Cij - Dij;
2619 ctau_cum += fij * temp * temp;
2622 if (cmd.a_statistics[CRS_ST_GAMMA])
2624 const double temp = Q * Cij - P * Dij;
2625 gamma_cum += fij * temp * temp;
2628 if (cmd.a_statistics[CRS_ST_D])
2630 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2631 - (P - Q) * (W - row_tot[i]));
2632 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2633 - (Q - P) * (W - col_tot[j]));
2638 assert (j < n_cols);
2640 Cij -= col_tot[j] - cum[j + i * n_cols];
2641 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2645 Cij += cum[j - 1 + (i - 1) * n_cols];
2646 Dij -= cum[j + (i - 1) * n_cols];
2652 btau_var = ((btau_cum
2653 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2655 if (cmd.a_statistics[CRS_ST_BTAU])
2657 ase[3] = sqrt (btau_var);
2658 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2661 if (cmd.a_statistics[CRS_ST_CTAU])
2663 ase[4] = ((2 * q / ((q - 1) * W * W))
2664 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2665 t[4] = v[4] / ase[4];
2667 if (cmd.a_statistics[CRS_ST_GAMMA])
2669 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2670 t[5] = v[5] / (2. / (P + Q)
2671 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2673 if (cmd.a_statistics[CRS_ST_D])
2675 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2676 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2677 somers_d_t[0] = (somers_d_v[0]
2679 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2680 somers_d_v[1] = (P - Q) / Dc;
2681 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2682 somers_d_t[1] = (somers_d_v[1]
2684 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2685 somers_d_v[2] = (P - Q) / Dr;
2686 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2687 somers_d_t[2] = (somers_d_v[2]
2689 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2695 /* Spearman correlation, Pearson's r. */
2696 if (cmd.a_statistics[CRS_ST_CORR])
2698 double *R = local_alloc (sizeof *R * n_rows);
2699 double *C = local_alloc (sizeof *C * n_cols);
2702 double y, t, c = 0., s = 0.;
2707 R[i] = s + (row_tot[i] + 1.) / 2.;
2714 assert (i < n_rows);
2719 double y, t, c = 0., s = 0.;
2724 C[j] = s + (col_tot[j] + 1.) / 2;
2731 assert (j < n_cols);
2735 calc_r (R, C, &v[6], &t[6], &ase[6]);
2741 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2745 /* Cohen's kappa. */
2746 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2748 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2751 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2752 i < ns_rows; i++, j++)
2756 while (col_tot[j] == 0.)
2759 prod = row_tot[i] * col_tot[j];
2760 sum = row_tot[i] + col_tot[j];
2762 sum_fii += mat[j + i * n_cols];
2764 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2765 sum_riciri_ci += prod * sum;
2767 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2768 for (j = 0; j < ns_cols; j++)
2770 double sum = row_tot[i] + col_tot[j];
2771 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2774 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2776 ase[8] = sqrt ((W * W * sum_rici
2777 + sum_rici * sum_rici
2778 - W * sum_riciri_ci)
2779 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2781 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2782 / pow2 (W * W - sum_rici))
2783 + ((2. * (W - sum_fii)
2784 * (2. * sum_fii * sum_rici
2785 - W * sum_fiiri_ci))
2786 / cube (W * W - sum_rici))
2787 + (pow2 (W - sum_fii)
2788 * (W * sum_fijri_ci2 - 4.
2789 * sum_rici * sum_rici)
2790 / pow4 (W * W - sum_rici))));
2792 t[8] = v[8] / ase[8];
2799 /* Calculate risk estimate. */
2801 calc_risk (double *value, double *upper, double *lower, union value *c)
2803 double f11, f12, f21, f22;
2809 for (i = 0; i < 3; i++)
2810 value[i] = upper[i] = lower[i] = SYSMIS;
2813 if (ns_rows != 2 || ns_cols != 2)
2820 for (i = j = 0; i < n_cols; i++)
2821 if (col_tot[i] != 0.)
2830 f11 = mat[nz_cols[0]];
2831 f12 = mat[nz_cols[1]];
2832 f21 = mat[nz_cols[0] + n_cols];
2833 f22 = mat[nz_cols[1] + n_cols];
2835 c[0] = cols[nz_cols[0]];
2836 c[1] = cols[nz_cols[1]];
2839 value[0] = (f11 * f22) / (f12 * f21);
2840 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2841 lower[0] = value[0] * exp (-1.960 * v);
2842 upper[0] = value[0] * exp (1.960 * v);
2844 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2845 v = sqrt ((f12 / (f11 * (f11 + f12)))
2846 + (f22 / (f21 * (f21 + f22))));
2847 lower[1] = value[1] * exp (-1.960 * v);
2848 upper[1] = value[1] * exp (1.960 * v);
2850 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2851 v = sqrt ((f11 / (f12 * (f11 + f12)))
2852 + (f21 / (f22 * (f21 + f22))));
2853 lower[2] = value[2] * exp (-1.960 * v);
2854 upper[2] = value[2] * exp (1.960 * v);
2859 /* Calculate directional measures. */
2861 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2862 double t[N_DIRECTIONAL])
2867 for (i = 0; i < N_DIRECTIONAL; i++)
2868 v[i] = ase[i] = t[i] = SYSMIS;
2872 if (cmd.a_statistics[CRS_ST_LAMBDA])
2874 double *fim = xnmalloc (n_rows, sizeof *fim);
2875 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2876 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2877 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2878 double sum_fim, sum_fmj;
2880 int rm_index, cm_index;
2883 /* Find maximum for each row and their sum. */
2884 for (sum_fim = 0., i = 0; i < n_rows; i++)
2886 double max = mat[i * n_cols];
2889 for (j = 1; j < n_cols; j++)
2890 if (mat[j + i * n_cols] > max)
2892 max = mat[j + i * n_cols];
2896 sum_fim += fim[i] = max;
2897 fim_index[i] = index;
2900 /* Find maximum for each column. */
2901 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2903 double max = mat[j];
2906 for (i = 1; i < n_rows; i++)
2907 if (mat[j + i * n_cols] > max)
2909 max = mat[j + i * n_cols];
2913 sum_fmj += fmj[j] = max;
2914 fmj_index[j] = index;
2917 /* Find maximum row total. */
2920 for (i = 1; i < n_rows; i++)
2921 if (row_tot[i] > rm)
2927 /* Find maximum column total. */
2930 for (j = 1; j < n_cols; j++)
2931 if (col_tot[j] > cm)
2937 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2938 v[1] = (sum_fmj - rm) / (W - rm);
2939 v[2] = (sum_fim - cm) / (W - cm);
2941 /* ASE1 for Y given X. */
2945 for (accum = 0., i = 0; i < n_rows; i++)
2946 for (j = 0; j < n_cols; j++)
2948 const int deltaj = j == cm_index;
2949 accum += (mat[j + i * n_cols]
2950 * pow2 ((j == fim_index[i])
2955 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2958 /* ASE0 for Y given X. */
2962 for (accum = 0., i = 0; i < n_rows; i++)
2963 if (cm_index != fim_index[i])
2964 accum += (mat[i * n_cols + fim_index[i]]
2965 + mat[i * n_cols + cm_index]);
2966 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2969 /* ASE1 for X given Y. */
2973 for (accum = 0., i = 0; i < n_rows; i++)
2974 for (j = 0; j < n_cols; j++)
2976 const int deltaj = i == rm_index;
2977 accum += (mat[j + i * n_cols]
2978 * pow2 ((i == fmj_index[j])
2983 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2986 /* ASE0 for X given Y. */
2990 for (accum = 0., j = 0; j < n_cols; j++)
2991 if (rm_index != fmj_index[j])
2992 accum += (mat[j + n_cols * fmj_index[j]]
2993 + mat[j + n_cols * rm_index]);
2994 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2997 /* Symmetric ASE0 and ASE1. */
3002 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3003 for (j = 0; j < n_cols; j++)
3005 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3006 int temp1 = (i == rm_index) + (j == cm_index);
3007 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3008 accum1 += (mat[j + i * n_cols]
3009 * pow2 (temp0 + (v[0] - 1.) * temp1));
3011 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3012 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3013 / (2. * W - rm - cm));
3022 double sum_fij2_ri, sum_fij2_ci;
3023 double sum_ri2, sum_cj2;
3025 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3026 for (j = 0; j < n_cols; j++)
3028 double temp = pow2 (mat[j + i * n_cols]);
3029 sum_fij2_ri += temp / row_tot[i];
3030 sum_fij2_ci += temp / col_tot[j];
3033 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3034 sum_ri2 += row_tot[i] * row_tot[i];
3036 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3037 sum_cj2 += col_tot[j] * col_tot[j];
3039 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3040 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3044 if (cmd.a_statistics[CRS_ST_UC])
3046 double UX, UY, UXY, P;
3047 double ase1_yx, ase1_xy, ase1_sym;
3050 for (UX = 0., i = 0; i < n_rows; i++)
3051 if (row_tot[i] > 0.)
3052 UX -= row_tot[i] / W * log (row_tot[i] / W);
3054 for (UY = 0., j = 0; j < n_cols; j++)
3055 if (col_tot[j] > 0.)
3056 UY -= col_tot[j] / W * log (col_tot[j] / W);
3058 for (UXY = P = 0., i = 0; i < n_rows; i++)
3059 for (j = 0; j < n_cols; j++)
3061 double entry = mat[j + i * n_cols];
3066 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3067 UXY -= entry / W * log (entry / W);
3070 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3071 for (j = 0; j < n_cols; j++)
3073 double entry = mat[j + i * n_cols];
3078 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3079 + (UX - UXY) * log (col_tot[j] / W));
3080 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3081 + (UY - UXY) * log (row_tot[i] / W));
3082 ase1_sym += entry * pow2 ((UXY
3083 * log (row_tot[i] * col_tot[j] / (W * W)))
3084 - (UX + UY) * log (entry / W));
3087 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3088 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3089 t[5] = v[5] / ((2. / (W * (UX + UY)))
3090 * sqrt (P - pow2 (UX + UY - UXY) / W));
3092 v[6] = (UX + UY - UXY) / UX;
3093 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3094 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3096 v[7] = (UX + UY - UXY) / UY;
3097 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3098 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3102 if (cmd.a_statistics[CRS_ST_D])
3107 calc_symmetric (NULL, NULL, NULL);
3108 for (i = 0; i < 3; i++)
3110 v[8 + i] = somers_d_v[i];
3111 ase[8 + i] = somers_d_ase[i];
3112 t[8 + i] = somers_d_t[i];
3117 if (cmd.a_statistics[CRS_ST_ETA])
3120 double sum_Xr, sum_X2r;
3124 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3126 sum_Xr += rows[i].f * row_tot[i];
3127 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3129 SX = sum_X2r - sum_Xr * sum_Xr / W;
3131 for (SXW = 0., j = 0; j < n_cols; j++)
3135 for (cum = 0., i = 0; i < n_rows; i++)
3137 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3138 cum += rows[i].f * mat[j + i * n_cols];
3141 SXW -= cum * cum / col_tot[j];
3143 v[11] = sqrt (1. - SXW / SX);
3147 double sum_Yc, sum_Y2c;
3151 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3153 sum_Yc += cols[i].f * col_tot[i];
3154 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3156 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3158 for (SYW = 0., i = 0; i < n_rows; i++)
3162 for (cum = 0., j = 0; j < n_cols; j++)
3164 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3165 cum += cols[j].f * mat[j + i * n_cols];
3168 SYW -= cum * cum / row_tot[i];
3170 v[12] = sqrt (1. - SYW / SY);
3177 /* A wrapper around data_out() that limits string output to short
3178 string width and null terminates the result. */
3180 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3182 struct fmt_spec fmt_subst;
3184 /* Limit to short string width. */
3185 if (formats[fp->type].cat & FCAT_STRING)
3189 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3190 if (fmt_subst.type == FMT_A)
3191 fmt_subst.w = min (8, fmt_subst.w);
3193 fmt_subst.w = min (16, fmt_subst.w);
3199 data_out (s, fp, v);
3201 /* Null terminate. */